From b642730be93149baa7556e5791393168ab396175 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 15 Feb 2008 17:35:24 +0900 Subject: Code reorganization: move files into their places. This is in a separate commit to ensure renames are properly preserved. --- src/gallium/drivers/cell/Makefile | 12 + src/gallium/drivers/cell/common.h | 220 +++ src/gallium/drivers/cell/ppu/Makefile | 76 + src/gallium/drivers/cell/ppu/cell_batch.c | 217 +++ src/gallium/drivers/cell/ppu/cell_batch.h | 58 + src/gallium/drivers/cell/ppu/cell_clear.c | 76 + src/gallium/drivers/cell/ppu/cell_clear.h | 43 + src/gallium/drivers/cell/ppu/cell_context.c | 287 +++ src/gallium/drivers/cell/ppu/cell_context.h | 135 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 164 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.h | 42 + src/gallium/drivers/cell/ppu/cell_flush.c | 84 + src/gallium/drivers/cell/ppu/cell_flush.h | 38 + src/gallium/drivers/cell/ppu/cell_render.c | 210 +++ src/gallium/drivers/cell/ppu/cell_render.h | 39 + src/gallium/drivers/cell/ppu/cell_spu.c | 155 ++ src/gallium/drivers/cell/ppu/cell_spu.h | 82 + src/gallium/drivers/cell/ppu/cell_state.h | 115 ++ src/gallium/drivers/cell/ppu/cell_state_blend.c | 109 ++ src/gallium/drivers/cell/ppu/cell_state_clip.c | 84 + src/gallium/drivers/cell/ppu/cell_state_derived.c | 192 ++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 103 ++ src/gallium/drivers/cell/ppu/cell_state_emit.h | 36 + src/gallium/drivers/cell/ppu/cell_state_fs.c | 171 ++ .../drivers/cell/ppu/cell_state_rasterizer.c | 106 ++ src/gallium/drivers/cell/ppu/cell_state_sampler.c | 84 + src/gallium/drivers/cell/ppu/cell_state_surface.c | 71 + src/gallium/drivers/cell/ppu/cell_state_vertex.c | 63 + src/gallium/drivers/cell/ppu/cell_surface.c | 179 ++ src/gallium/drivers/cell/ppu/cell_surface.h | 42 + src/gallium/drivers/cell/ppu/cell_texture.c | 252 +++ src/gallium/drivers/cell/ppu/cell_texture.h | 80 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 294 +++ src/gallium/drivers/cell/ppu/cell_vbuf.h | 38 + src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 120 ++ src/gallium/drivers/cell/ppu/cell_winsys.c | 40 + src/gallium/drivers/cell/ppu/cell_winsys.h | 50 + src/gallium/drivers/cell/spu/Makefile | 72 + src/gallium/drivers/cell/spu/spu_blend.c | 62 + src/gallium/drivers/cell/spu/spu_blend.h | 37 + src/gallium/drivers/cell/spu/spu_colorpack.h | 110 ++ src/gallium/drivers/cell/spu/spu_exec.c | 1948 ++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_exec.h | 172 ++ src/gallium/drivers/cell/spu/spu_main.c | 567 ++++++ src/gallium/drivers/cell/spu/spu_main.h | 177 ++ src/gallium/drivers/cell/spu/spu_render.c | 301 +++ src/gallium/drivers/cell/spu/spu_render.h | 38 + src/gallium/drivers/cell/spu/spu_texture.c | 217 +++ src/gallium/drivers/cell/spu/spu_texture.h | 47 + src/gallium/drivers/cell/spu/spu_tile.c | 83 + src/gallium/drivers/cell/spu/spu_tile.h | 73 + src/gallium/drivers/cell/spu/spu_tri.c | 926 ++++++++++ src/gallium/drivers/cell/spu/spu_tri.h | 37 + src/gallium/drivers/cell/spu/spu_util.c | 165 ++ src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 673 +++++++ src/gallium/drivers/cell/spu/spu_vertex_shader.c | 231 +++ src/gallium/drivers/cell/spu/spu_vertex_shader.h | 63 + src/gallium/drivers/cell/spu/spu_ztest.h | 135 ++ src/gallium/drivers/failover/Makefile | 21 + src/gallium/drivers/failover/fo_context.c | 155 ++ src/gallium/drivers/failover/fo_context.h | 114 ++ src/gallium/drivers/failover/fo_state.c | 457 +++++ src/gallium/drivers/failover/fo_state_emit.c | 137 ++ src/gallium/drivers/failover/fo_winsys.h | 45 + src/gallium/drivers/i915simple/Makefile | 38 + src/gallium/drivers/i915simple/SConscript | 29 + src/gallium/drivers/i915simple/i915_batch.h | 54 + src/gallium/drivers/i915simple/i915_blit.c | 162 ++ src/gallium/drivers/i915simple/i915_blit.h | 55 + src/gallium/drivers/i915simple/i915_clear.c | 47 + src/gallium/drivers/i915simple/i915_context.c | 320 ++++ src/gallium/drivers/i915simple/i915_context.h | 304 +++ src/gallium/drivers/i915simple/i915_debug.c | 901 +++++++++ src/gallium/drivers/i915simple/i915_debug.h | 117 ++ src/gallium/drivers/i915simple/i915_debug_fp.c | 366 ++++ src/gallium/drivers/i915simple/i915_flush.c | 81 + src/gallium/drivers/i915simple/i915_fpc.h | 213 +++ src/gallium/drivers/i915simple/i915_fpc_emit.c | 375 ++++ .../drivers/i915simple/i915_fpc_translate.c | 1135 ++++++++++++ src/gallium/drivers/i915simple/i915_prim_emit.c | 215 +++ src/gallium/drivers/i915simple/i915_prim_vbuf.c | 254 +++ src/gallium/drivers/i915simple/i915_reg.h | 978 ++++++++++ src/gallium/drivers/i915simple/i915_state.c | 694 +++++++ src/gallium/drivers/i915simple/i915_state.h | 50 + .../drivers/i915simple/i915_state_derived.c | 177 ++ .../drivers/i915simple/i915_state_dynamic.c | 308 ++++ src/gallium/drivers/i915simple/i915_state_emit.c | 374 ++++ .../drivers/i915simple/i915_state_immediate.c | 221 +++ .../drivers/i915simple/i915_state_inlines.h | 230 +++ .../drivers/i915simple/i915_state_sampler.c | 231 +++ src/gallium/drivers/i915simple/i915_strings.c | 83 + src/gallium/drivers/i915simple/i915_surface.c | 191 ++ src/gallium/drivers/i915simple/i915_texture.c | 536 ++++++ src/gallium/drivers/i915simple/i915_texture.h | 17 + src/gallium/drivers/i915simple/i915_winsys.h | 115 ++ src/gallium/drivers/i965simple/Makefile | 66 + src/gallium/drivers/i965simple/SConscript | 55 + src/gallium/drivers/i965simple/brw_batch.h | 59 + src/gallium/drivers/i965simple/brw_blit.c | 218 +++ src/gallium/drivers/i965simple/brw_blit.h | 33 + src/gallium/drivers/i965simple/brw_cc.c | 269 +++ src/gallium/drivers/i965simple/brw_clip.c | 206 +++ src/gallium/drivers/i965simple/brw_clip.h | 170 ++ src/gallium/drivers/i965simple/brw_clip_line.c | 245 +++ src/gallium/drivers/i965simple/brw_clip_point.c | 47 + src/gallium/drivers/i965simple/brw_clip_state.c | 92 + src/gallium/drivers/i965simple/brw_clip_tri.c | 566 ++++++ src/gallium/drivers/i965simple/brw_clip_unfilled.c | 477 +++++ src/gallium/drivers/i965simple/brw_clip_util.c | 351 ++++ src/gallium/drivers/i965simple/brw_context.c | 245 +++ src/gallium/drivers/i965simple/brw_context.h | 690 +++++++ src/gallium/drivers/i965simple/brw_curbe.c | 368 ++++ src/gallium/drivers/i965simple/brw_defines.h | 852 +++++++++ src/gallium/drivers/i965simple/brw_draw.c | 239 +++ src/gallium/drivers/i965simple/brw_draw.h | 55 + src/gallium/drivers/i965simple/brw_draw_upload.c | 299 +++ src/gallium/drivers/i965simple/brw_eu.c | 130 ++ src/gallium/drivers/i965simple/brw_eu.h | 888 +++++++++ src/gallium/drivers/i965simple/brw_eu_debug.c | 90 + src/gallium/drivers/i965simple/brw_eu_emit.c | 1080 +++++++++++ src/gallium/drivers/i965simple/brw_eu_util.c | 126 ++ src/gallium/drivers/i965simple/brw_flush.c | 80 + src/gallium/drivers/i965simple/brw_gs.c | 196 ++ src/gallium/drivers/i965simple/brw_gs.h | 75 + src/gallium/drivers/i965simple/brw_gs_emit.c | 148 ++ src/gallium/drivers/i965simple/brw_gs_state.c | 89 + src/gallium/drivers/i965simple/brw_misc_state.c | 486 +++++ src/gallium/drivers/i965simple/brw_reg.h | 76 + src/gallium/drivers/i965simple/brw_sf.c | 351 ++++ src/gallium/drivers/i965simple/brw_sf.h | 122 ++ src/gallium/drivers/i965simple/brw_sf_emit.c | 382 ++++ src/gallium/drivers/i965simple/brw_sf_state.c | 180 ++ src/gallium/drivers/i965simple/brw_shader_info.c | 49 + src/gallium/drivers/i965simple/brw_state.c | 424 +++++ src/gallium/drivers/i965simple/brw_state.h | 158 ++ src/gallium/drivers/i965simple/brw_state_batch.c | 113 ++ src/gallium/drivers/i965simple/brw_state_cache.c | 443 +++++ src/gallium/drivers/i965simple/brw_state_pool.c | 137 ++ src/gallium/drivers/i965simple/brw_state_upload.c | 202 ++ src/gallium/drivers/i965simple/brw_strings.c | 72 + src/gallium/drivers/i965simple/brw_structs.h | 1348 ++++++++++++++ src/gallium/drivers/i965simple/brw_surface.c | 210 +++ src/gallium/drivers/i965simple/brw_tex_layout.c | 353 ++++ src/gallium/drivers/i965simple/brw_tex_layout.h | 15 + src/gallium/drivers/i965simple/brw_urb.c | 186 ++ src/gallium/drivers/i965simple/brw_util.c | 104 ++ src/gallium/drivers/i965simple/brw_util.h | 43 + src/gallium/drivers/i965simple/brw_vs.c | 120 ++ src/gallium/drivers/i965simple/brw_vs.h | 82 + src/gallium/drivers/i965simple/brw_vs_emit.c | 1332 +++++++++++++ src/gallium/drivers/i965simple/brw_vs_state.c | 102 + src/gallium/drivers/i965simple/brw_winsys.h | 205 ++ src/gallium/drivers/i965simple/brw_wm.c | 210 +++ src/gallium/drivers/i965simple/brw_wm.h | 142 ++ src/gallium/drivers/i965simple/brw_wm_decl.c | 383 ++++ src/gallium/drivers/i965simple/brw_wm_glsl.c | 1079 +++++++++++ src/gallium/drivers/i965simple/brw_wm_iz.c | 214 +++ .../drivers/i965simple/brw_wm_sampler_state.c | 273 +++ src/gallium/drivers/i965simple/brw_wm_state.c | 194 ++ .../drivers/i965simple/brw_wm_surface_state.c | 304 +++ src/gallium/drivers/softpipe/Makefile | 50 + src/gallium/drivers/softpipe/SConscript | 42 + src/gallium/drivers/softpipe/sp_clear.c | 73 + src/gallium/drivers/softpipe/sp_clear.h | 43 + src/gallium/drivers/softpipe/sp_context.c | 333 ++++ src/gallium/drivers/softpipe/sp_context.h | 152 ++ src/gallium/drivers/softpipe/sp_draw_arrays.c | 164 ++ src/gallium/drivers/softpipe/sp_flush.c | 76 + src/gallium/drivers/softpipe/sp_flush.h | 35 + src/gallium/drivers/softpipe/sp_headers.h | 82 + src/gallium/drivers/softpipe/sp_prim_setup.c | 1247 +++++++++++++ src/gallium/drivers/softpipe/sp_prim_setup.h | 79 + src/gallium/drivers/softpipe/sp_prim_vbuf.c | 221 +++ src/gallium/drivers/softpipe/sp_prim_vbuf.h | 38 + src/gallium/drivers/softpipe/sp_quad.c | 118 ++ src/gallium/drivers/softpipe/sp_quad.h | 70 + src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 108 ++ src/gallium/drivers/softpipe/sp_quad_blend.c | 749 ++++++++ src/gallium/drivers/softpipe/sp_quad_bufloop.c | 72 + src/gallium/drivers/softpipe/sp_quad_colormask.c | 110 ++ src/gallium/drivers/softpipe/sp_quad_coverage.c | 88 + src/gallium/drivers/softpipe/sp_quad_depth_test.c | 276 +++ src/gallium/drivers/softpipe/sp_quad_earlyz.c | 88 + src/gallium/drivers/softpipe/sp_quad_fs.c | 390 ++++ src/gallium/drivers/softpipe/sp_quad_occlusion.c | 85 + src/gallium/drivers/softpipe/sp_quad_output.c | 90 + src/gallium/drivers/softpipe/sp_quad_stencil.c | 352 ++++ src/gallium/drivers/softpipe/sp_quad_stipple.c | 94 + src/gallium/drivers/softpipe/sp_query.c | 107 ++ src/gallium/drivers/softpipe/sp_query.h | 39 + src/gallium/drivers/softpipe/sp_state.h | 187 ++ src/gallium/drivers/softpipe/sp_state_blend.c | 98 + src/gallium/drivers/softpipe/sp_state_clip.c | 83 + src/gallium/drivers/softpipe/sp_state_derived.c | 235 +++ src/gallium/drivers/softpipe/sp_state_fs.c | 179 ++ src/gallium/drivers/softpipe/sp_state_rasterizer.c | 62 + src/gallium/drivers/softpipe/sp_state_sampler.c | 93 + src/gallium/drivers/softpipe/sp_state_surface.c | 109 ++ src/gallium/drivers/softpipe/sp_state_vertex.c | 64 + src/gallium/drivers/softpipe/sp_surface.c | 159 ++ src/gallium/drivers/softpipe/sp_surface.h | 42 + src/gallium/drivers/softpipe/sp_tex_sample.c | 916 +++++++++ src/gallium/drivers/softpipe/sp_tex_sample.h | 17 + src/gallium/drivers/softpipe/sp_texture.c | 166 ++ src/gallium/drivers/softpipe/sp_texture.h | 71 + src/gallium/drivers/softpipe/sp_tile_cache.c | 585 ++++++ src/gallium/drivers/softpipe/sp_tile_cache.h | 104 ++ src/gallium/drivers/softpipe/sp_winsys.h | 57 + 208 files changed, 47397 insertions(+) create mode 100644 src/gallium/drivers/cell/Makefile create mode 100644 src/gallium/drivers/cell/common.h create mode 100644 src/gallium/drivers/cell/ppu/Makefile create mode 100644 src/gallium/drivers/cell/ppu/cell_batch.c create mode 100644 src/gallium/drivers/cell/ppu/cell_batch.h create mode 100644 src/gallium/drivers/cell/ppu/cell_clear.c create mode 100644 src/gallium/drivers/cell/ppu/cell_clear.h create mode 100644 src/gallium/drivers/cell/ppu/cell_context.c create mode 100644 src/gallium/drivers/cell/ppu/cell_context.h create mode 100644 src/gallium/drivers/cell/ppu/cell_draw_arrays.c create mode 100644 src/gallium/drivers/cell/ppu/cell_draw_arrays.h create mode 100644 src/gallium/drivers/cell/ppu/cell_flush.c create mode 100644 src/gallium/drivers/cell/ppu/cell_flush.h create mode 100644 src/gallium/drivers/cell/ppu/cell_render.c create mode 100644 src/gallium/drivers/cell/ppu/cell_render.h create mode 100644 src/gallium/drivers/cell/ppu/cell_spu.c create mode 100644 src/gallium/drivers/cell/ppu/cell_spu.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state_blend.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_clip.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_derived.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_emit.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_emit.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state_fs.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_rasterizer.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_sampler.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_surface.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_vertex.c create mode 100644 src/gallium/drivers/cell/ppu/cell_surface.c create mode 100644 src/gallium/drivers/cell/ppu/cell_surface.h create mode 100644 src/gallium/drivers/cell/ppu/cell_texture.c create mode 100644 src/gallium/drivers/cell/ppu/cell_texture.h create mode 100644 src/gallium/drivers/cell/ppu/cell_vbuf.c create mode 100644 src/gallium/drivers/cell/ppu/cell_vbuf.h create mode 100644 src/gallium/drivers/cell/ppu/cell_vertex_shader.c create mode 100644 src/gallium/drivers/cell/ppu/cell_winsys.c create mode 100644 src/gallium/drivers/cell/ppu/cell_winsys.h create mode 100644 src/gallium/drivers/cell/spu/Makefile create mode 100644 src/gallium/drivers/cell/spu/spu_blend.c create mode 100644 src/gallium/drivers/cell/spu/spu_blend.h create mode 100644 src/gallium/drivers/cell/spu/spu_colorpack.h create mode 100644 src/gallium/drivers/cell/spu/spu_exec.c create mode 100644 src/gallium/drivers/cell/spu/spu_exec.h create mode 100644 src/gallium/drivers/cell/spu/spu_main.c create mode 100644 src/gallium/drivers/cell/spu/spu_main.h create mode 100644 src/gallium/drivers/cell/spu/spu_render.c create mode 100644 src/gallium/drivers/cell/spu/spu_render.h create mode 100644 src/gallium/drivers/cell/spu/spu_texture.c create mode 100644 src/gallium/drivers/cell/spu/spu_texture.h create mode 100644 src/gallium/drivers/cell/spu/spu_tile.c create mode 100644 src/gallium/drivers/cell/spu/spu_tile.h create mode 100644 src/gallium/drivers/cell/spu/spu_tri.c create mode 100644 src/gallium/drivers/cell/spu/spu_tri.h create mode 100644 src/gallium/drivers/cell/spu/spu_util.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_fetch.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_shader.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_shader.h create mode 100644 src/gallium/drivers/cell/spu/spu_ztest.h create mode 100644 src/gallium/drivers/failover/Makefile create mode 100644 src/gallium/drivers/failover/fo_context.c create mode 100644 src/gallium/drivers/failover/fo_context.h create mode 100644 src/gallium/drivers/failover/fo_state.c create mode 100644 src/gallium/drivers/failover/fo_state_emit.c create mode 100644 src/gallium/drivers/failover/fo_winsys.h create mode 100644 src/gallium/drivers/i915simple/Makefile create mode 100644 src/gallium/drivers/i915simple/SConscript create mode 100644 src/gallium/drivers/i915simple/i915_batch.h create mode 100644 src/gallium/drivers/i915simple/i915_blit.c create mode 100644 src/gallium/drivers/i915simple/i915_blit.h create mode 100644 src/gallium/drivers/i915simple/i915_clear.c create mode 100644 src/gallium/drivers/i915simple/i915_context.c create mode 100644 src/gallium/drivers/i915simple/i915_context.h create mode 100644 src/gallium/drivers/i915simple/i915_debug.c create mode 100644 src/gallium/drivers/i915simple/i915_debug.h create mode 100644 src/gallium/drivers/i915simple/i915_debug_fp.c create mode 100644 src/gallium/drivers/i915simple/i915_flush.c create mode 100644 src/gallium/drivers/i915simple/i915_fpc.h create mode 100644 src/gallium/drivers/i915simple/i915_fpc_emit.c create mode 100644 src/gallium/drivers/i915simple/i915_fpc_translate.c create mode 100644 src/gallium/drivers/i915simple/i915_prim_emit.c create mode 100644 src/gallium/drivers/i915simple/i915_prim_vbuf.c create mode 100644 src/gallium/drivers/i915simple/i915_reg.h create mode 100644 src/gallium/drivers/i915simple/i915_state.c create mode 100644 src/gallium/drivers/i915simple/i915_state.h create mode 100644 src/gallium/drivers/i915simple/i915_state_derived.c create mode 100644 src/gallium/drivers/i915simple/i915_state_dynamic.c create mode 100644 src/gallium/drivers/i915simple/i915_state_emit.c create mode 100644 src/gallium/drivers/i915simple/i915_state_immediate.c create mode 100644 src/gallium/drivers/i915simple/i915_state_inlines.h create mode 100644 src/gallium/drivers/i915simple/i915_state_sampler.c create mode 100644 src/gallium/drivers/i915simple/i915_strings.c create mode 100644 src/gallium/drivers/i915simple/i915_surface.c create mode 100644 src/gallium/drivers/i915simple/i915_texture.c create mode 100644 src/gallium/drivers/i915simple/i915_texture.h create mode 100644 src/gallium/drivers/i915simple/i915_winsys.h create mode 100644 src/gallium/drivers/i965simple/Makefile create mode 100644 src/gallium/drivers/i965simple/SConscript create mode 100644 src/gallium/drivers/i965simple/brw_batch.h create mode 100644 src/gallium/drivers/i965simple/brw_blit.c create mode 100644 src/gallium/drivers/i965simple/brw_blit.h create mode 100644 src/gallium/drivers/i965simple/brw_cc.c create mode 100644 src/gallium/drivers/i965simple/brw_clip.c create mode 100644 src/gallium/drivers/i965simple/brw_clip.h create mode 100644 src/gallium/drivers/i965simple/brw_clip_line.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_point.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_state.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_tri.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_unfilled.c create mode 100644 src/gallium/drivers/i965simple/brw_clip_util.c create mode 100644 src/gallium/drivers/i965simple/brw_context.c create mode 100644 src/gallium/drivers/i965simple/brw_context.h create mode 100644 src/gallium/drivers/i965simple/brw_curbe.c create mode 100644 src/gallium/drivers/i965simple/brw_defines.h create mode 100644 src/gallium/drivers/i965simple/brw_draw.c create mode 100644 src/gallium/drivers/i965simple/brw_draw.h create mode 100644 src/gallium/drivers/i965simple/brw_draw_upload.c create mode 100644 src/gallium/drivers/i965simple/brw_eu.c create mode 100644 src/gallium/drivers/i965simple/brw_eu.h create mode 100644 src/gallium/drivers/i965simple/brw_eu_debug.c create mode 100644 src/gallium/drivers/i965simple/brw_eu_emit.c create mode 100644 src/gallium/drivers/i965simple/brw_eu_util.c create mode 100644 src/gallium/drivers/i965simple/brw_flush.c create mode 100644 src/gallium/drivers/i965simple/brw_gs.c create mode 100644 src/gallium/drivers/i965simple/brw_gs.h create mode 100644 src/gallium/drivers/i965simple/brw_gs_emit.c create mode 100644 src/gallium/drivers/i965simple/brw_gs_state.c create mode 100644 src/gallium/drivers/i965simple/brw_misc_state.c create mode 100644 src/gallium/drivers/i965simple/brw_reg.h create mode 100644 src/gallium/drivers/i965simple/brw_sf.c create mode 100644 src/gallium/drivers/i965simple/brw_sf.h create mode 100644 src/gallium/drivers/i965simple/brw_sf_emit.c create mode 100644 src/gallium/drivers/i965simple/brw_sf_state.c create mode 100644 src/gallium/drivers/i965simple/brw_shader_info.c create mode 100644 src/gallium/drivers/i965simple/brw_state.c create mode 100644 src/gallium/drivers/i965simple/brw_state.h create mode 100644 src/gallium/drivers/i965simple/brw_state_batch.c create mode 100644 src/gallium/drivers/i965simple/brw_state_cache.c create mode 100644 src/gallium/drivers/i965simple/brw_state_pool.c create mode 100644 src/gallium/drivers/i965simple/brw_state_upload.c create mode 100644 src/gallium/drivers/i965simple/brw_strings.c create mode 100644 src/gallium/drivers/i965simple/brw_structs.h create mode 100644 src/gallium/drivers/i965simple/brw_surface.c create mode 100644 src/gallium/drivers/i965simple/brw_tex_layout.c create mode 100644 src/gallium/drivers/i965simple/brw_tex_layout.h create mode 100644 src/gallium/drivers/i965simple/brw_urb.c create mode 100644 src/gallium/drivers/i965simple/brw_util.c create mode 100644 src/gallium/drivers/i965simple/brw_util.h create mode 100644 src/gallium/drivers/i965simple/brw_vs.c create mode 100644 src/gallium/drivers/i965simple/brw_vs.h create mode 100644 src/gallium/drivers/i965simple/brw_vs_emit.c create mode 100644 src/gallium/drivers/i965simple/brw_vs_state.c create mode 100644 src/gallium/drivers/i965simple/brw_winsys.h create mode 100644 src/gallium/drivers/i965simple/brw_wm.c create mode 100644 src/gallium/drivers/i965simple/brw_wm.h create mode 100644 src/gallium/drivers/i965simple/brw_wm_decl.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_glsl.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_iz.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_sampler_state.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_state.c create mode 100644 src/gallium/drivers/i965simple/brw_wm_surface_state.c create mode 100644 src/gallium/drivers/softpipe/Makefile create mode 100644 src/gallium/drivers/softpipe/SConscript create mode 100644 src/gallium/drivers/softpipe/sp_clear.c create mode 100644 src/gallium/drivers/softpipe/sp_clear.h create mode 100644 src/gallium/drivers/softpipe/sp_context.c create mode 100644 src/gallium/drivers/softpipe/sp_context.h create mode 100644 src/gallium/drivers/softpipe/sp_draw_arrays.c create mode 100644 src/gallium/drivers/softpipe/sp_flush.c create mode 100644 src/gallium/drivers/softpipe/sp_flush.h create mode 100644 src/gallium/drivers/softpipe/sp_headers.h create mode 100644 src/gallium/drivers/softpipe/sp_prim_setup.c create mode 100644 src/gallium/drivers/softpipe/sp_prim_setup.h create mode 100644 src/gallium/drivers/softpipe/sp_prim_vbuf.c create mode 100644 src/gallium/drivers/softpipe/sp_prim_vbuf.h create mode 100644 src/gallium/drivers/softpipe/sp_quad.c create mode 100644 src/gallium/drivers/softpipe/sp_quad.h create mode 100644 src/gallium/drivers/softpipe/sp_quad_alpha_test.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_blend.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_bufloop.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_colormask.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_coverage.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_depth_test.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_earlyz.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_fs.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_occlusion.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_output.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_stencil.c create mode 100644 src/gallium/drivers/softpipe/sp_quad_stipple.c create mode 100644 src/gallium/drivers/softpipe/sp_query.c create mode 100644 src/gallium/drivers/softpipe/sp_query.h create mode 100644 src/gallium/drivers/softpipe/sp_state.h create mode 100644 src/gallium/drivers/softpipe/sp_state_blend.c create mode 100644 src/gallium/drivers/softpipe/sp_state_clip.c create mode 100644 src/gallium/drivers/softpipe/sp_state_derived.c create mode 100644 src/gallium/drivers/softpipe/sp_state_fs.c create mode 100644 src/gallium/drivers/softpipe/sp_state_rasterizer.c create mode 100644 src/gallium/drivers/softpipe/sp_state_sampler.c create mode 100644 src/gallium/drivers/softpipe/sp_state_surface.c create mode 100644 src/gallium/drivers/softpipe/sp_state_vertex.c create mode 100644 src/gallium/drivers/softpipe/sp_surface.c create mode 100644 src/gallium/drivers/softpipe/sp_surface.h create mode 100644 src/gallium/drivers/softpipe/sp_tex_sample.c create mode 100644 src/gallium/drivers/softpipe/sp_tex_sample.h create mode 100644 src/gallium/drivers/softpipe/sp_texture.c create mode 100644 src/gallium/drivers/softpipe/sp_texture.h create mode 100644 src/gallium/drivers/softpipe/sp_tile_cache.c create mode 100644 src/gallium/drivers/softpipe/sp_tile_cache.h create mode 100644 src/gallium/drivers/softpipe/sp_winsys.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile new file mode 100644 index 00000000000..47aef7b05f6 --- /dev/null +++ b/src/gallium/drivers/cell/Makefile @@ -0,0 +1,12 @@ +# Cell Gallium driver Makefile + + +default: + ( cd spu ; make ) + ( cd ppu ; make ) + + + +clean: + ( cd spu ; make clean ) + ( cd ppu ; make clean ) diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h new file mode 100644 index 00000000000..4de514c3586 --- /dev/null +++ b/src/gallium/drivers/cell/common.h @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Types and tokens which are common to the SPU and PPU code. + */ + + +#ifndef CELL_COMMON_H +#define CELL_COMMON_H + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_format.h" + + +/** The standard assert macro doesn't seem to work reliably */ +#define ASSERT(x) \ + if (!(x)) { \ + ubyte *p = NULL; \ + fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ + __FILE__, __LINE__, __FUNCTION__, #x); \ + *p = 0; \ + exit(1); \ + } + + +/** for sanity checking */ +#define ASSERT_ALIGN16(ptr) \ + ASSERT((((unsigned long) (ptr)) & 0xf) == 0); + + +/** round up value to next multiple of 4 */ +#define ROUNDUP4(k) (((k) + 0x3) & ~0x3) + +/** round up value to next multiple of 8 */ +#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) + +/** round up value to next multiple of 16 */ +#define ROUNDUP16(k) (((k) + 0xf) & ~0xf) + + +#define CELL_MAX_SPUS 6 + +#define TILE_SIZE 32 + + +/** + * The low byte of a mailbox word contains the command opcode. + * Remaining higher bytes are command specific. + */ +#define CELL_CMD_OPCODE_MASK 0xff + +#define CELL_CMD_EXIT 1 +#define CELL_CMD_CLEAR_SURFACE 2 +#define CELL_CMD_FINISH 3 +#define CELL_CMD_RENDER 4 +#define CELL_CMD_BATCH 5 +#define CELL_CMD_RELEASE_VERTS 6 +#define CELL_CMD_STATE_FRAMEBUFFER 10 +#define CELL_CMD_STATE_DEPTH_STENCIL 11 +#define CELL_CMD_STATE_SAMPLER 12 +#define CELL_CMD_STATE_TEXTURE 13 +#define CELL_CMD_STATE_VERTEX_INFO 14 +#define CELL_CMD_STATE_VIEWPORT 15 +#define CELL_CMD_STATE_VS_ARRAY_INFO 16 +#define CELL_CMD_STATE_BLEND 17 +#define CELL_CMD_VS_EXECUTE 18 + + +#define CELL_NUM_BUFFERS 4 +#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ + +#define CELL_BUFFER_STATUS_FREE 10 +#define CELL_BUFFER_STATUS_USED 20 + + + +/** + * Tell SPUs about the framebuffer size, location + */ +struct cell_command_framebuffer +{ + uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */ + int width, height; + void *color_start, *depth_start; + enum pipe_format color_format, depth_format; +}; + + +/** + * Clear framebuffer to the given value/color. + */ +struct cell_command_clear_surface +{ + uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ + uint surface; /**< Temporary: 0=color, 1=Z */ + uint value; +}; + + +/** + * Array info used by the vertex shader's vertex puller. + */ +struct cell_array_info +{ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint format; /**< Pipe format of each entry. */ +} ALIGN16_ATTRIB; + + +struct cell_shader_info +{ + unsigned num_outputs; + + uint64_t declarations; + unsigned num_declarations; + uint64_t instructions; + unsigned num_instructions; + uint64_t uniforms; + uint64_t immediates; + unsigned num_immediates; +} ALIGN16_ATTRIB; + + +#define SPU_VERTS_PER_BATCH 64 +struct cell_command_vs +{ + uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ + struct cell_shader_info shader; + unsigned num_elts; + unsigned elts[SPU_VERTS_PER_BATCH]; + uint64_t vOut[SPU_VERTS_PER_BATCH]; + float plane[12][4]; + unsigned nr_planes; + unsigned nr_attrs; +} ALIGN16_ATTRIB; + + +struct cell_command_render +{ + uint64_t opcode; /**< CELL_CMD_RENDER */ + uint prim_type; /**< PIPE_PRIM_x */ + uint num_verts; + uint vertex_size; /**< bytes per vertex */ + uint num_indexes; + uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ + float xmin, ymin, xmax, ymax; /* XXX another dummy field */ + uint min_index; + boolean inline_verts; +}; + + +struct cell_command_release_verts +{ + uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +struct cell_command_texture +{ + void *start; /**< Address in main memory */ + uint width, height; +}; + + +/** XXX unions don't seem to work */ +/* XXX this should go away; all commands should be placed in batch buffers */ +struct cell_command +{ +#if 0 + struct cell_command_framebuffer fb; + struct cell_command_clear_surface clear; + struct cell_command_render render; +#endif + struct cell_command_vs vs; +} ALIGN16_ATTRIB; + + +/** This is the object passed to spe_create_thread() */ +struct cell_init_info +{ + unsigned id; + unsigned num_spus; + struct cell_command *cmd; + + /** Buffers for command batches, vertex/index data */ + ubyte *buffers[CELL_NUM_BUFFERS]; + uint *buffer_status; /**< points at cell_context->buffer_status */ +} ALIGN16_ATTRIB; + + +#endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile new file mode 100644 index 00000000000..50060f5cd30 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -0,0 +1,76 @@ +# Gallium3D Cell driver: PPU code + +# This makefile builds the g3dcell.a library which gets pulled into +# the main libGL.so library + + +TOP = ../../../../.. +include $(TOP)/configs/linux-cell + + +#PROG = gl4 + +CELL_LIB = libcell.a + +SPU_CODE_MODULE = ../spu/g3d_spu.a + + +SOURCES = \ + cell_batch.c \ + cell_clear.c \ + cell_context.c \ + cell_draw_arrays.c \ + cell_flush.c \ + cell_state_blend.c \ + cell_state_clip.c \ + cell_state_derived.c \ + cell_state_emit.c \ + cell_state_fs.c \ + cell_state_rasterizer.c \ + cell_state_sampler.c \ + cell_state_surface.c \ + cell_state_vertex.c \ + cell_spu.c \ + cell_surface.c \ + cell_texture.c \ + cell_vbuf.c \ + cell_vertex_shader.c \ + cell_winsys.c + + +OBJECTS = $(SOURCES:.c=.o) \ + +INCLUDE_DIRS = -I$(TOP)/src/mesa + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + + +default: $(CELL_LIB) + + +$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) +# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) + ar -ru $(CELL_LIB) $(OBJECTS) + +#$(PROG): $(PPU_OBJECTS) +# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) + + + +clean: + rm -f *.o *~ $(CELL_LIB) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + + + diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c new file mode 100644 index 00000000000..f45e5f25b64 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -0,0 +1,217 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_spu.h" + + + +uint +cell_get_empty_buffer(struct cell_context *cell) +{ + uint buf = 0, tries = 0; + + /* Find a buffer that's marked as free by all SPUs */ + while (1) { + uint spu, num_free = 0; + + for (spu = 0; spu < cell->num_spus; spu++) { + if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { + num_free++; + + if (num_free == cell->num_spus) { + /* found a free buffer, now mark status as used */ + for (spu = 0; spu < cell->num_spus; spu++) { + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + } + /* + printf("PPU: ALLOC BUFFER %u\n", buf); + */ + return buf; + } + } + else { + break; + } + } + + /* try next buf */ + buf = (buf + 1) % CELL_NUM_BUFFERS; + + tries++; + if (tries == 100) { + /* + printf("PPU WAITING for buffer...\n"); + */ + } + } +} + + +void +cell_batch_flush(struct cell_context *cell) +{ + static boolean flushing = FALSE; + uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + uint spu, cmd_word; + + assert(!flushing); + + if (size == 0) + return; + + flushing = TRUE; + + assert(batch < CELL_NUM_BUFFERS); + + /* + printf("cell_batch_dispatch: buf %u at %p, size %u\n", + batch, &cell->batch_buffer[batch][0], size); + */ + + /* + * Build "BATCH" command and sent to all SPUs. + */ + cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); + + for (spu = 0; spu < cell->num_spus; spu++) { + assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); + send_mbox_message(cell_global.spe_contexts[spu], cmd_word); + } + + /* When the SPUs are done copying the buffer into their locals stores + * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] + * array indicating that the PPU can re-use the buffer. + */ + + batch = cell_get_empty_buffer(cell); + + cell->buffer_size[batch] = 0; /* empty */ + cell->cur_batch = batch; + + flushing = FALSE; +} + + +uint +cell_batch_free_space(const struct cell_context *cell) +{ + uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + return free; +} + + +/** + * Append data to current batch. + */ +void +cell_batch_append(struct cell_context *cell, const void *data, uint bytes) +{ + uint size; + + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + if (size + bytes > CELL_BUFFER_SIZE) { + cell_batch_flush(cell); + size = 0; + } + + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + memcpy(cell->buffer[cell->cur_batch] + size, data, bytes); + + cell->buffer_size[cell->cur_batch] = size + bytes; +} + + +void * +cell_batch_alloc(struct cell_context *cell, uint bytes) +{ + return cell_batch_alloc_aligned(cell, bytes, 1); +} + + +void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment) +{ + void *pos; + uint size, padbytes; + + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(alignment > 0); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + padbytes = (alignment - (size % alignment)) % alignment; + + if (padbytes + size + bytes > CELL_BUFFER_SIZE) { + cell_batch_flush(cell); + size = 0; + } + else { + size += padbytes; + } + + ASSERT(size % alignment == 0); + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + pos = (void *) (cell->buffer[cell->cur_batch] + size); + + cell->buffer_size[cell->cur_batch] = size + bytes; + + return pos; +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h new file mode 100644 index 00000000000..a6eee0a8b18 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_BATCH_H +#define CELL_BATCH_H + +#include "pipe/p_compiler.h" + + +struct cell_context; + + +extern uint +cell_get_empty_buffer(struct cell_context *cell); + +extern void +cell_batch_flush(struct cell_context *cell); + +extern uint +cell_batch_free_space(const struct cell_context *cell); + +extern void +cell_batch_append(struct cell_context *cell, const void *data, uint bytes); + +extern void * +cell_batch_alloc(struct cell_context *cell, uint bytes); + +extern void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment); + + +#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c new file mode 100644 index 00000000000..07b908eec59 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + +#include <stdio.h> +#include <assert.h> +#include <stdint.h> +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/cell/common.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" + + +void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct cell_context *cell = cell_context(pipe); + uint surfIndex; + + if (cell->dirty) + cell_update_derived(cell); + + + if (!cell->cbuf_map[0]) + cell->cbuf_map[0] = pipe_surface_map(ps); + + if (ps == cell->framebuffer.zsbuf) { + surfIndex = 1; + } + else { + surfIndex = 0; + } + + + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) + cell_batch_alloc(cell, sizeof(*clr)); + clr->opcode = CELL_CMD_CLEAR_SURFACE; + clr->surface = surfIndex; + clr->value = clearValue; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h new file mode 100644 index 00000000000..ff47d43f4cd --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CLEAR_H +#define CELL_CLEAR_H + + +struct pipe_context; +struct pipe_surface; + + +extern void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + + +#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c new file mode 100644 index 00000000000..bbe1fd7a111 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -0,0 +1,287 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + + +#include <stdio.h> + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/cell/common.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_flush.h" +#include "cell_render.h" +#include "cell_state.h" +#include "cell_surface.h" +#include "cell_spu.h" +#include "cell_texture.h" +#include "cell_vbuf.h" + + + +static boolean +cell_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ + /*struct cell_context *cell = cell_context( pipe );*/ + + switch (type) { + case PIPE_TEXTURE: + /* cell supports all texture formats, XXX for now anyway */ + return TRUE; + case PIPE_SURFACE: + /* cell supports all (off-screen) surface formats, XXX for now */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +static int cell_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + +static float cell_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 0; + } +} + + +static const char * +cell_get_name( struct pipe_context *pipe ) +{ + return "Cell"; +} + +static const char * +cell_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + + + +static void +cell_destroy_context( struct pipe_context *pipe ) +{ + struct cell_context *cell = cell_context(pipe); + + cell_spu_exit(cell); + + align_free(cell); +} + + +static struct draw_context * +cell_draw_create(struct cell_context *cell) +{ + struct draw_context *draw = draw_create(); + + if (getenv("GALLIUM_CELL_VS")) { + /* plug in SPU-based vertex transformation code */ + draw->shader_queue_flush = cell_vertex_shader_queue_flush; + draw->driver_private = cell; + } + + return draw; +} + + +struct pipe_context * +cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) +{ + struct cell_context *cell; + uint spu, buf; + + /* some fields need to be 16-byte aligned, so align the whole object */ + cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); + if (!cell) + return NULL; + + memset(cell, 0, sizeof(*cell)); + + cell->winsys = cws; + cell->pipe.winsys = winsys; + cell->pipe.destroy = cell_destroy_context; + + /* queries */ + cell->pipe.is_format_supported = cell_is_format_supported; + cell->pipe.get_name = cell_get_name; + cell->pipe.get_vendor = cell_get_vendor; + cell->pipe.get_param = cell_get_param; + cell->pipe.get_paramf = cell_get_paramf; + + + /* state setters */ + cell->pipe.create_blend_state = cell_create_blend_state; + cell->pipe.bind_blend_state = cell_bind_blend_state; + cell->pipe.delete_blend_state = cell_delete_blend_state; + + cell->pipe.create_sampler_state = cell_create_sampler_state; + cell->pipe.bind_sampler_state = cell_bind_sampler_state; + cell->pipe.delete_sampler_state = cell_delete_sampler_state; + + cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; + cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; + cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; + + cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; + cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; + cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; + + cell->pipe.create_fs_state = cell_create_fs_state; + cell->pipe.bind_fs_state = cell_bind_fs_state; + cell->pipe.delete_fs_state = cell_delete_fs_state; + + cell->pipe.create_vs_state = cell_create_vs_state; + cell->pipe.bind_vs_state = cell_bind_vs_state; + cell->pipe.delete_vs_state = cell_delete_vs_state; + + cell->pipe.set_blend_color = cell_set_blend_color; + cell->pipe.set_clip_state = cell_set_clip_state; + cell->pipe.set_constant_buffer = cell_set_constant_buffer; + + cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; + + cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; + cell->pipe.set_scissor_state = cell_set_scissor_state; + cell->pipe.set_viewport_state = cell_set_viewport_state; + + cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; + cell->pipe.set_vertex_element = cell_set_vertex_element; + + cell->pipe.draw_arrays = cell_draw_arrays; + cell->pipe.draw_elements = cell_draw_elements; + + cell->pipe.clear = cell_clear_surface; + cell->pipe.flush = cell_flush; + + /* textures */ + cell->pipe.texture_create = cell_texture_create; + cell->pipe.texture_release = cell_texture_release; + cell->pipe.get_tex_surface = cell_get_tex_surface; + + cell->pipe.set_sampler_texture = cell_set_sampler_texture; + +#if 0 + cell->pipe.begin_query = cell_begin_query; + cell->pipe.end_query = cell_end_query; + cell->pipe.wait_query = cell_wait_query; +#endif + + cell_init_surface_functions(cell); + + cell->draw = cell_draw_create(cell); + + cell_init_vbuf(cell); + draw_set_rasterize_stage(cell->draw, cell->vbuf); + + /* + * SPU stuff + */ + cell->num_spus = 6; + + cell_start_spus(cell); + + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; + + /* init batch buffer status values, + * mark 0th buffer as used, rest as free. + */ + for (spu = 0; spu < cell->num_spus; spu++) { + if (buf == 0) + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + else + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; + } + } + + return &cell->pipe; +} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h new file mode 100644 index 00000000000..3b63419b5eb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CONTEXT_H +#define CELL_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/draw/draw_vbuf.h" +#include "cell_winsys.h" +#include "pipe/cell/common.h" + + +struct cell_vbuf_render; + +struct cell_vertex_shader_state +{ + struct pipe_shader_state shader; + void *draw_data; +}; + + +struct cell_fragment_shader_state +{ + struct pipe_shader_state shader; + void *data; +}; + + +struct cell_context +{ + struct pipe_context pipe; + + struct cell_winsys *winsys; + + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct cell_vertex_shader_state *vs; + const struct cell_fragment_shader_state *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[2]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct cell_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + + ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; + ubyte *zsbuf_map; + + struct pipe_surface *tex_surf; + uint *tex_map; + + uint dirty; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *render_stage; + + /** For post-transformed vertex buffering: */ + struct cell_vbuf_render *vbuf_render; + struct draw_stage *vbuf; + + struct vertex_info vertex_info; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + + uint num_spus; + + /** Buffers for command batches, vertex/index data */ + uint buffer_size[CELL_NUM_BUFFERS]; + ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + + int cur_batch; /**< which buffer is being filled w/ commands */ + + /** [4] to ensure 16-byte alignment for each status word */ + uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + +}; + + + + +static INLINE struct cell_context * +cell_context(struct pipe_context *pipe) +{ + return (struct cell_context *) pipe; +} + + +extern struct pipe_context * +cell_create_context(struct pipe_winsys *ws, struct cell_winsys *cws); + +extern void +cell_vertex_shader_queue_flush(struct draw_context *draw); + + + + +#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c new file mode 100644 index 00000000000..717cd8370f9 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_state.h" + +#include "pipe/draw/draw_context.h" + + + +static void +cell_map_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX]); +} + +static void +cell_unmap_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + +boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ +boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct cell_context *sp = cell_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + /* first, check that the primitive is not malformed. It is the + * state tracker's responsibility to do send only correctly formed + * primitives down. It currently isn't doing that though... + */ +#if 1 + count = draw_trim_prim( mode, count ); +#else + if (!draw_validate_prim( mode, count )) + assert(0); +#endif + + if (sp->dirty) + cell_update_derived( sp ); + +#if 0 + cell_map_surfaces(sp); +#endif + cell_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + void *buf = pipe->winsys->buffer_map(pipe->winsys, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + } + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + } + + /* Note: leave drawing surfaces mapped */ + cell_unmap_constant_buffers(sp); + + return TRUE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h new file mode 100644 index 00000000000..d5df4aa05f8 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_DRAW_ARRAYS_H +#define CELL_DRAW_ARRAYS_H + + +boolean cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + + +#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c new file mode 100644 index 00000000000..f62bc4650ce --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_render.h" +#include "pipe/draw/draw_context.h" + + +void +cell_flush(struct pipe_context *pipe, unsigned flags) +{ + struct cell_context *cell = cell_context(pipe); + + if (flags & PIPE_FLUSH_SWAPBUFFERS) + flags |= PIPE_FLUSH_WAIT; + + draw_flush( cell->draw ); + cell_flush_int(pipe, flags); +} + + +/** internal flush */ +void +cell_flush_int(struct pipe_context *pipe, unsigned flags) +{ + static boolean flushing = FALSE; /* recursion catcher */ + struct cell_context *cell = cell_context(pipe); + uint i; + + ASSERT(!flushing); + flushing = TRUE; + + if (flags & PIPE_FLUSH_WAIT) { + uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); + *cmd = CELL_CMD_FINISH; + } + + cell_batch_flush(cell); + +#if 0 + /* Send CMD_FINISH to all SPUs */ + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); + } +#endif + + if (flags & PIPE_FLUSH_WAIT) { + /* Wait for ack */ + for (i = 0; i < cell->num_spus; i++) { + uint k = wait_mbox_message(cell_global.spe_contexts[i]); + assert(k == CELL_CMD_FINISH); + } + } + + flushing = FALSE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h new file mode 100644 index 00000000000..eda351b1cbc --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FLUSH +#define CELL_FLUSH + +extern void +cell_flush(struct pipe_context *pipe, unsigned flags); + +extern void +cell_flush_int(struct pipe_context *pipe, unsigned flags); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c new file mode 100644 index 00000000000..4ab277a4b24 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Last stage of 'draw' pipeline: send tris to SPUs. + * \author Brian Paul + */ + +#include "cell_context.h" +#include "cell_render.h" +#include "cell_spu.h" +#include "pipe/p_util.h" +#include "pipe/draw/draw_private.h" + + +struct render_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct cell_context *cell; +}; + + +static INLINE struct render_stage * +render_stage(struct draw_stage *stage) +{ + return (struct render_stage *) stage; +} + + +static void render_begin( struct draw_stage *stage ) +{ +#if 0 + struct render_stage *render = render_stage(stage); + struct cell_context *sp = render->cell; + const struct pipe_shader_state *fs = &render->cell->fs->shader; + render->quad.nr_attrs = render->cell->nr_frag_attrs; + + render->firstFpInput = fs->input_semantic_name[0]; + + sp->quad.first->begin(sp->quad.first); +#endif +} + + +static void render_end( struct draw_stage *stage ) +{ +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ + struct render_stage *render = render_stage(stage); + /*render->cell->line_stipple_counter = 0;*/ +} + + +static void +render_point(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +static void +render_line(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +/** Write a vertex into the prim buffer */ +static void +save_vertex(struct cell_prim_buffer *buf, uint pos, + const struct vertex_header *vert) +{ + uint attr, j; + + for (attr = 0; attr < 2; attr++) { + for (j = 0; j < 4; j++) { + buf->vertex[pos][attr][j] = vert->data[attr][j]; + } + } + + /* update bounding box */ + if (vert->data[0][0] < buf->xmin) + buf->xmin = vert->data[0][0]; + if (vert->data[0][0] > buf->xmax) + buf->xmax = vert->data[0][0]; + if (vert->data[0][1] < buf->ymin) + buf->ymin = vert->data[0][1]; + if (vert->data[0][1] > buf->ymax) + buf->ymax = vert->data[0][1]; +} + + +static void +render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + struct render_stage *rs = render_stage(stage); + struct cell_context *cell = rs->cell; + struct cell_prim_buffer *buf = &cell->prim_buffer; + uint i; + + if (buf->num_verts + 3 > CELL_MAX_VERTS) { + cell_flush_prim_buffer(cell); + } + + i = buf->num_verts; + assert(i+2 <= CELL_MAX_VERTS); + save_vertex(buf, i+0, prim->v[0]); + save_vertex(buf, i+1, prim->v[1]); + save_vertex(buf, i+2, prim->v[2]); + buf->num_verts += 3; +} + + +/** + * Send the a RENDER command to all SPUs to have them render the prims + * in the current prim_buffer. + */ +void +cell_flush_prim_buffer(struct cell_context *cell) +{ + uint i; + + if (cell->prim_buffer.num_verts == 0) + return; + + for (i = 0; i < cell->num_spus; i++) { + struct cell_command_render *render = &cell_global.command[i].render; + render->prim_type = PIPE_PRIM_TRIANGLES; + render->num_verts = cell->prim_buffer.num_verts; + render->vertex_size = cell->vertex_info->size * 4; + render->xmin = cell->prim_buffer.xmin; + render->ymin = cell->prim_buffer.ymin; + render->xmax = cell->prim_buffer.xmax; + render->ymax = cell->prim_buffer.ymax; + render->vertex_data = &cell->prim_buffer.vertex; + ASSERT_ALIGN16(render->vertex_data); + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); + } + + cell->prim_buffer.num_verts = 0; + + cell->prim_buffer.xmin = 1e100; + cell->prim_buffer.ymin = 1e100; + cell->prim_buffer.xmax = -1e100; + cell->prim_buffer.ymax = -1e100; + + /* XXX temporary, need to double-buffer the prim buffer until we get + * a real command buffer/list system. + */ + cell_flush(&cell->pipe, 0x0); +} + + + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new draw/render stage. This will be plugged into the + * draw module as the last pipeline stage. + */ +struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) +{ + struct render_stage *render = CALLOC_STRUCT(render_stage); + + render->cell = cell; + render->stage.draw = cell->draw; + render->stage.begin = render_begin; + render->stage.point = render_point; + render->stage.line = render_line; + render->stage.tri = render_tri; + render->stage.end = render_end; + render->stage.reset_stipple_counter = reset_stipple_counter; + render->stage.destroy = render_destroy; + + /* + render->quad.coef = render->coef; + render->quad.posCoef = &render->posCoef; + */ + + return &render->stage; +} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h new file mode 100644 index 00000000000..826dcbafeba --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_RENDER_H +#define CELL_RENDER_H + +struct cell_context; +struct draw_stage; + +extern void +cell_flush_prim_buffer(struct cell_context *cell); + +extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); + +#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c new file mode 100644 index 00000000000..7c83a47e574 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -0,0 +1,155 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <pthread.h> + +#include "cell_spu.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/cell/common.h" + + +/* +helpful headers: +/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h +*/ + + +struct cell_global_info cell_global; + + +/** + * Write a 1-word message to the given SPE mailbox. + */ +void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) +{ + spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); +} + + +/** + * Wait for a 1-word message to arrive in given mailbox. + */ +uint +wait_mbox_message(spe_context_ptr_t ctx) +{ + do { + unsigned data; + int count = spe_out_mbox_read(ctx, &data, 1); + + if (count == 1) { + return data; + } + + if (count < 0) { + /* error */ ; + } + } while (1); +} + + +static void *cell_thread_function(void *arg) +{ + struct cell_init_info *init = (struct cell_init_info *) arg; + unsigned entry = SPE_DEFAULT_ENTRY; + + ASSERT_ALIGN16(init); + + if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, + init, NULL, NULL) < 0) { + fprintf(stderr, "spe_context_run() failed\n"); + exit(1); + } + + pthread_exit(NULL); +} + + +/** + * Create the SPU threads + */ +void +cell_start_spus(struct cell_context *cell) +{ + uint i, j; + + assert(cell->num_spus <= MAX_SPUS); + + ASSERT_ALIGN16(&cell_global.command[0]); + ASSERT_ALIGN16(&cell_global.command[1]); + + ASSERT_ALIGN16(&cell_global.inits[0]); + ASSERT_ALIGN16(&cell_global.inits[1]); + + for (i = 0; i < cell->num_spus; i++) { + cell_global.inits[i].id = i; + cell_global.inits[i].num_spus = cell->num_spus; + cell_global.inits[i].cmd = &cell_global.command[i]; + for (j = 0; j < CELL_NUM_BUFFERS; j++) { + cell_global.inits[i].buffers[j] = cell->buffer[j]; + } + cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; + + cell_global.spe_contexts[i] = spe_context_create(0, NULL); + if (!cell_global.spe_contexts[i]) { + fprintf(stderr, "spe_context_create() failed\n"); + exit(1); + } + + if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { + fprintf(stderr, "spe_program_load() failed\n"); + exit(1); + } + + pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function, + &cell_global.inits[i]); + } +} + + +/** + * Tell all the SPUs to stop/exit. + */ +void +cell_spu_exit(struct cell_context *cell) +{ + uint i; + + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); + } + + /* wait for threads to exit */ + for (i = 0; i < cell->num_spus; i++) { + void *value; + pthread_join(cell_global.spe_threads[i], &value); + cell_global.spe_threads[i] = 0; + cell_global.spe_contexts[i] = 0; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h new file mode 100644 index 00000000000..19eff94f967 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_SPU +#define CELL_SPU + + +#include <libspe2.h> +#include <libmisc.h> +#include "pipe/cell/common.h" + +#include "cell_context.h" + + +#define MAX_SPUS 8 + +/** + * Global vars, for now anyway. + */ +struct cell_global_info +{ + /** + * SPU/SPE handles, etc + */ + spe_context_ptr_t spe_contexts[MAX_SPUS]; + pthread_t spe_threads[MAX_SPUS]; + + /** + * Data sent to SPUs + */ + struct cell_init_info inits[MAX_SPUS]; + struct cell_command command[MAX_SPUS]; +}; + + +extern struct cell_global_info cell_global; + + +/** This is the handle for the actual SPE code */ +extern spe_program_handle_t g3d_spu; + + +extern void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); + +extern uint +wait_mbox_message(spe_context_ptr_t ctx); + + +extern void +cell_start_spus(struct cell_context *cell); + + +extern void +cell_spu_exit(struct cell_context *cell); + + +#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h new file mode 100644 index 00000000000..3a71ba14fa8 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -0,0 +1,115 @@ + + +#ifndef CELL_STATE_H +#define CELL_STATE_H + + +#define CELL_NEW_VIEWPORT 0x1 +#define CELL_NEW_RASTERIZER 0x2 +#define CELL_NEW_FS 0x4 +#define CELL_NEW_BLEND 0x8 +#define CELL_NEW_CLIP 0x10 +#define CELL_NEW_SCISSOR 0x20 +#define CELL_NEW_STIPPLE 0x40 +#define CELL_NEW_FRAMEBUFFER 0x80 +#define CELL_NEW_ALPHA_TEST 0x100 +#define CELL_NEW_DEPTH_STENCIL 0x200 +#define CELL_NEW_SAMPLER 0x400 +#define CELL_NEW_TEXTURE 0x800 +#define CELL_NEW_VERTEX 0x1000 +#define CELL_NEW_VS 0x2000 +#define CELL_NEW_CONSTANTS 0x4000 +#define CELL_NEW_VERTEX_INFO 0x8000 + + + +extern void +cell_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + + + +extern void * +cell_create_blend_state(struct pipe_context *, const struct pipe_blend_state *); +extern void cell_bind_blend_state(struct pipe_context *, void *); +extern void cell_delete_blend_state(struct pipe_context *, void *); + +extern void cell_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + + +void * +cell_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); + +extern void +cell_bind_sampler_state(struct pipe_context *, unsigned, void *); + +extern void +cell_delete_sampler_state(struct pipe_context *, void *); + + +extern void * +cell_create_depth_stencil_alpha_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); + +extern void +cell_bind_depth_stencil_alpha_state(struct pipe_context *, void *); + +extern void +cell_delete_depth_stencil_alpha_state(struct pipe_context *, void *); + + +void *cell_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void cell_bind_fs_state(struct pipe_context *, void *); +void cell_delete_fs_state(struct pipe_context *, void *); +void *cell_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void cell_bind_vs_state(struct pipe_context *, void *); +void cell_delete_vs_state(struct pipe_context *, void *); + + +void * +cell_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void cell_bind_rasterizer_state(struct pipe_context *, void *); +void cell_delete_rasterizer_state(struct pipe_context *, void *); + + +void cell_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void cell_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void +cell_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture); + +void cell_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void cell_set_texture_state( struct pipe_context *, + unsigned unit, struct pipe_texture * ); + +void cell_set_vertex_element(struct pipe_context *, + unsigned index, + const struct pipe_vertex_element *); + +void cell_set_vertex_buffer(struct pipe_context *, + unsigned index, + const struct pipe_vertex_buffer *); + +void cell_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + + +void cell_update_derived( struct cell_context *softpipe ); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c new file mode 100644 index 00000000000..4fc60548c85 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_blend.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" + + + +void * +cell_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + + +void +cell_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend = (const struct pipe_blend_state *)blend; + + cell->dirty |= CELL_NEW_BLEND; +} + + +void +cell_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + + +void +cell_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend_color = *blend_color; + + cell->dirty |= CELL_NEW_BLEND; +} + + + + +void * +cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + + +void +cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->depth_stencil + = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; + + cell->dirty |= CELL_NEW_DEPTH_STENCIL; +} + + +void +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) +{ + FREE(depth); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c new file mode 100644 index 00000000000..4f43665941d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_clip.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "cell_context.h" +#include "cell_state.h" +#include "pipe/draw/draw_context.h" + + +void cell_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(cell->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +void cell_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct cell_context *cell = cell_context(pipe); + + cell->viewport = *viewport; /* struct copy */ + cell->dirty |= CELL_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(cell->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +void cell_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->scissor, scissor, sizeof(*scissor) ); + cell->dirty |= CELL_NEW_SCISSOR; +} + + +void cell_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); + cell->dirty |= CELL_NEW_STIPPLE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c new file mode 100644 index 00000000000..56daf5dfde0 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -0,0 +1,192 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_vertex.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_state.h" +#include "cell_state_emit.h" + + +static int +find_vs_output(const struct pipe_shader_state *vs, + uint semantic_name, + uint semantic_index) +{ + uint i; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == semantic_name && + vs->output_semantic_index[i] == semantic_index) + return i; + } + return -1; +} + + +/** + * Determine how to map vertex program outputs to fragment program inputs. + * Basically, this will be used when computing the triangle interpolation + * coefficients from the post-transform vertex attributes. + */ +static void +calculate_vertex_layout( struct cell_context *cell ) +{ + const struct pipe_shader_state *vs = &cell->vs->shader; + const struct pipe_shader_state *fs = &cell->fs->shader; + const enum interp_mode colorInterp + = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo = &cell->vertex_info; + uint i; + int src; + +#if 0 + if (cell->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; + vinfo_vbuf->num_attribs = 0; + draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); + vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; + } +#endif + + /* reset vinfo */ + vinfo->num_attribs = 0; + + /* we always want to emit vertex pos */ + src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + for (i = 0; i < fs->num_inputs; i++) { + switch (fs->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + /* already done above */ + break; + + case TGSI_SEMANTIC_COLOR: + src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, + fs->input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); +#if 1 + if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ + src = 0; +#endif + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + fs->input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + draw_compute_vertex_size(vinfo); + + /* XXX only signal this if format really changes */ + cell->dirty |= CELL_NEW_VERTEX_INFO; +} + + +#if 0 +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct cell_context *sp) +{ + unsigned surfWidth, surfHeight; + + if (sp->framebuffer.num_cbufs > 0) { + surfWidth = sp->framebuffer.cbufs[0]->width; + surfHeight = sp->framebuffer.cbufs[0]->height; + } + else { + /* no surface? */ + surfWidth = sp->scissor.maxx; + surfHeight = sp->scissor.maxy; + } + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} +#endif + + + +void cell_update_derived( struct cell_context *cell ) +{ + if (cell->dirty & (CELL_NEW_RASTERIZER | + CELL_NEW_FS | + CELL_NEW_VS)) + calculate_vertex_layout( cell ); + +#if 0 + if (cell->dirty & (CELL_NEW_SCISSOR | + CELL_NEW_DEPTH_STENCIL_ALPHA | + CELL_NEW_FRAMEBUFFER)) + compute_cliprect(cell); +#endif + + cell_emit_state(cell); + + cell->dirty = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c new file mode 100644 index 00000000000..5d2a7864493 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "cell_context.h" +#include "cell_state.h" +#include "cell_state_emit.h" +#include "cell_batch.h" +#include "cell_texture.h" + + +static void +emit_state_cmd(struct cell_context *cell, uint cmd, + const void *state, uint state_size) +{ + uint64_t *dst = (uint64_t *) + cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size)); + *dst = cmd; + memcpy(dst + 1, state, state_size); +} + + + +void +cell_emit_state(struct cell_context *cell) +{ + if (cell->dirty & CELL_NEW_FRAMEBUFFER) { + struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; + struct pipe_surface *zbuf = cell->framebuffer.zsbuf; + struct cell_command_framebuffer *fb + = cell_batch_alloc(cell, sizeof(*fb)); + fb->opcode = CELL_CMD_STATE_FRAMEBUFFER; + fb->color_start = cell->cbuf_map[0]; + fb->color_format = cbuf->format; + fb->depth_start = cell->zsbuf_map; + fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; + fb->width = cell->framebuffer.cbufs[0]->width; + fb->height = cell->framebuffer.cbufs[0]->height; + } + + if (cell->dirty & CELL_NEW_BLEND) { + emit_state_cmd(cell, CELL_CMD_STATE_BLEND, + cell->blend, + sizeof(struct pipe_blend_state)); + } + + if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { + emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, + cell->depth_stencil, + sizeof(struct pipe_depth_stencil_alpha_state)); + } + + if (cell->dirty & CELL_NEW_SAMPLER) { + emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, + cell->sampler[0], sizeof(struct pipe_sampler_state)); + } + + if (cell->dirty & CELL_NEW_TEXTURE) { + struct cell_command_texture texture; + if (cell->texture[0]) { + texture.start = cell->texture[0]->tiled_data; + texture.width = cell->texture[0]->base.width[0]; + texture.height = cell->texture[0]->base.height[0]; + } + else { + texture.start = NULL; + texture.width = 0; + texture.height = 0; + } + + emit_state_cmd(cell, CELL_CMD_STATE_TEXTURE, + &texture, sizeof(struct cell_command_texture)); + } + + if (cell->dirty & CELL_NEW_VERTEX_INFO) { + emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, + &cell->vertex_info, sizeof(struct vertex_info)); + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h new file mode 100644 index 00000000000..59f8affe8d3 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_STATE_EMIT_H +#define CELL_STATE_EMIT_H + + +extern void +cell_emit_state(struct cell_context *cell); + + +#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c new file mode 100644 index 00000000000..3f46a87d189 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -0,0 +1,171 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/draw/draw_context.h" +#if 0 +#include "pipe/p_shader_tokens.h" +#include "pipe/llvm/gallivm.h" +#include "pipe/tgsi/util/tgsi_dump.h" +#include "pipe/tgsi/exec/tgsi_sse2.h" +#endif + +#include "cell_context.h" +#include "cell_state.h" + + +void * +cell_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + /*struct cell_context *cell = cell_context(pipe);*/ + struct cell_fragment_shader_state *state; + + state = CALLOC_STRUCT(cell_fragment_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + +#if 0 + if (cell->dump_fs) { + tgsi_dump(state->shader.tokens, 0); + } + +#if defined(__i386__) || defined(__386__) + if (cell->use_sse) { + x86_init_func( &state->sse2_program ); + tgsi_emit_sse2_fs( state->shader.tokens, &state->sse2_program ); + } +#endif + +#ifdef MESA_LLVM + state->llvm_prog = 0; + if (!gallivm_global_cpu_engine()) { + gallivm_cpu_engine_create(state->llvm_prog); + } + else + gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); +#endif +#endif + + return state; +} + + +void +cell_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->fs = (struct cell_fragment_shader_state *) fs; + + cell->dirty |= CELL_NEW_FS; +} + + +void +cell_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_fragment_shader_state *state = + (struct cell_fragment_shader_state *) fs; + + FREE( state ); +} + + +void * +cell_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *state; + + state = CALLOC_STRUCT(cell_vertex_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + + state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader); + if (state->draw_data == NULL) { + FREE( state ); + return NULL; + } + + return state; +} + + +void +cell_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->vs = (const struct cell_vertex_shader_state *) vs; + + draw_bind_vertex_shader(cell->draw, cell->vs->draw_data); + + cell->dirty |= CELL_NEW_VS; +} + + +void +cell_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + struct cell_vertex_shader_state *state = + (struct cell_vertex_shader_state *) vs; + + draw_delete_vertex_shader(cell->draw, state->draw_data); + FREE( state ); +} + + +void +cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* note: reference counting */ + pipe_buffer_reference(ws, + &cell->constants[shader].buffer, + buf->buffer); + cell->constants[shader].size = buf->size; + + cell->dirty |= CELL_NEW_CONSTANTS; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c new file mode 100644 index 00000000000..d8128ece54d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" + + + +struct spu_rasterizer_state +{ + unsigned flatshade:1; +#if 0 + unsigned light_twoside:1; + unsigned front_winding:2; /**< PIPE_WINDING_x */ + unsigned cull_mode:2; /**< PIPE_WINDING_x */ + unsigned fill_cw:2; /**< PIPE_POLYGON_MODE_x */ + unsigned fill_ccw:2; /**< PIPE_POLYGON_MODE_x */ + unsigned offset_cw:1; + unsigned offset_ccw:1; +#endif + unsigned scissor:1; + unsigned poly_smooth:1; + unsigned poly_stipple_enable:1; + unsigned point_smooth:1; +#if 0 + unsigned point_sprite:1; + unsigned point_size_per_vertex:1; /**< size computed in vertex shader */ +#endif + unsigned multisample:1; /* XXX maybe more ms state in future */ + unsigned line_smooth:1; + unsigned line_stipple_enable:1; + unsigned line_stipple_factor:8; /**< [1..256] actually */ + unsigned line_stipple_pattern:16; +#if 0 + unsigned bypass_clipping:1; +#endif + unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ + + float line_width; + float point_size; /**< used when no per-vertex size */ +#if 0 + float offset_units; + float offset_scale; + ubyte sprite_coord_mode[PIPE_MAX_SHADER_OUTPUTS]; /**< PIPE_SPRITE_COORD_ */ +#endif +}; + + + +void * +cell_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *setup) +{ + struct pipe_rasterizer_state *state + = MALLOC(sizeof(struct pipe_rasterizer_state)); + memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); + return state; +} + + +void +cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(cell->draw, setup); + + cell->rasterizer = (struct pipe_rasterizer_state *)setup; + + cell->dirty |= CELL_NEW_RASTERIZER; +} + + +void +cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_state_sampler.c new file mode 100644 index 00000000000..ade6cc8338e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_sampler.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + +void * +cell_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + +void +cell_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + cell->sampler[unit] = (struct pipe_sampler_state *)sampler; + + cell->dirty |= CELL_NEW_SAMPLER; +} + + +void +cell_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + +void +cell_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->texture[sampler] = texture; + + cell_update_texture_mapping(cell); + + cell->dirty |= CELL_NEW_TEXTURE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_surface.c b/src/gallium/drivers/cell/ppu/cell_state_surface.c new file mode 100644 index 00000000000..287610b76b9 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_surface.c @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_state.h" + + +void +cell_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct cell_context *cell = cell_context(pipe); + + if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { + struct pipe_surface *csurf = fb->cbufs[0]; + struct pipe_surface *zsurf = fb->zsbuf; + uint i; + + /* unmap old surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { + pipe_surface_unmap(cell->framebuffer.cbufs[i]); + cell->cbuf_map[i] = NULL; + } + } + + if (cell->framebuffer.zsbuf && cell->zsbuf_map) { + pipe_surface_unmap(cell->framebuffer.zsbuf); + cell->zsbuf_map = NULL; + } + + /* update my state */ + cell->framebuffer = *fb; + + /* map new surfaces */ + if (csurf) + cell->cbuf_map[0] = pipe_surface_map(csurf); + + if (zsurf) + cell->zsbuf_map = pipe_surface_map(zsurf); + + cell->dirty |= CELL_NEW_FRAMEBUFFER; + } +} + diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c new file mode 100644 index 00000000000..0f01e920f95 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "cell_context.h" +#include "cell_state.h" + +#include "pipe/draw/draw_context.h" + + +void +cell_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *attrib) +{ + struct cell_context *cell = cell_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + cell->vertex_element[index] = *attrib; /* struct copy */ + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_element(cell->draw, index, attrib); +} + + +void +cell_set_vertex_buffer(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer) +{ + struct cell_context *cell = cell_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + cell->vertex_buffer[index] = *buffer; /* struct copy */ + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_buffer(cell->draw, index, buffer); +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c new file mode 100644 index 00000000000..fca93e47424 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -0,0 +1,179 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" +#include "cell_context.h" +#include "cell_surface.h" + + +/* Upload data to a rectangular sub-region. Lots of choices how to do this: + * + * - memcpy by span to current destination + * - upload data as new buffer and blit + * + * Currently always memcpy. + */ +static void +cell_surface_data(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + const void *src, unsigned src_pitch, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, width, height, src, src_pitch, srcx, srcy); + + pipe_surface_unmap(dst); +} + + +static void +cell_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + assert( dst->cpp == src->cpp ); + + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src), + do_flip ? -src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); +} + + +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) + + +/** + * Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void +cell_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + assert(dst->pitch > 0); + assert(width <= dst->pitch); + + switch (dst->cpp) { + case 1: + { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: + { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: + { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + case 8: + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst->pitch * 4; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); +} + + +void +cell_init_surface_functions(struct cell_context *cell) +{ + cell->pipe.surface_copy = cell_surface_copy; + cell->pipe.surface_fill = cell_surface_fill; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h new file mode 100644 index 00000000000..9e58f329443 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef CELL_SURFACE_H +#define CELL_SURFACE_H + + +struct cell_context; + + +extern void +cell_init_surface_functions(struct cell_context *cell); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c new file mode 100644 index 00000000000..c8ef36002f7 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -0,0 +1,252 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +cell_texture_layout(struct cell_texture * spt) +{ + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + spt->buffer_size = 0; + + for ( level = 0 ; level <= pt->last_level ; level++ ) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + + spt->level_offset[level] = spt->buffer_size; + + spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +{ + struct cell_texture *spt = CALLOC_STRUCT(cell_texture); + if (!spt) + return NULL; + + spt->base = *templat; + + cell_texture_layout(spt); + + spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + + if (!spt->buffer) { + FREE(spt); + return NULL; + } + + return &spt->base; +} + + +void +cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct cell_texture *spt = cell_texture(*pt); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + */ + + pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + + FREE(spt); + } + *pt = NULL; +} + + +/** + * Called via pipe->get_tex_surface() + */ +struct pipe_surface * +cell_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct cell_texture *spt = cell_texture(pt); + struct pipe_surface *ps; + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = spt->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + (pt->compressed ? ps->height/4 : ps->height) * + ps->width * ps->cpp; + } else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + + +static void +tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) +{ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = h / tile_size, w_t = w / tile_size; + + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* fill in tile (i, j) */ + uint *tdst = dst + (it * w_t + jt) * tile_size2; + for (i = 0; i < tile_size; i++) { + for (j = 0; j < tile_size; j++) { + const uint srci = it * tile_size + i; + const uint srcj = jt * tile_size + j; + *tdst++ = src[srci * h + srcj]; + } + } + } + } +} + + + +/** + * Convert linear texture image data to tiled format for SPU usage. + */ +static void +cell_tile_texture(struct cell_context *cell, + struct cell_texture *texture) +{ + uint face = 0, level = 0, zslice = 0; + struct pipe_surface *surf; + const uint w = texture->base.width[0], h = texture->base.height[0]; + const uint *src; + + /* temporary restrictions: */ + assert(w >= TILE_SIZE); + assert(h >= TILE_SIZE); + assert(w % TILE_SIZE == 0); + assert(h % TILE_SIZE == 0); + + surf = cell_get_tex_surface(&cell->pipe, &texture->base, face, level, zslice); + ASSERT(surf); + + src = (const uint *) pipe_surface_map(surf); + + if (texture->tiled_data) { + align_free(texture->tiled_data); + } + texture->tiled_data = align_malloc(w * h * 4, 16); + + tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src); + + pipe_surface_unmap(surf); + + pipe_surface_reference(&surf, NULL); +} + + + +void +cell_update_texture_mapping(struct cell_context *cell) +{ + uint face = 0, level = 0, zslice = 0; + + if (cell->texture[0]) + cell_tile_texture(cell, cell->texture[0]); +#if 0 + if (cell->tex_surf && cell->tex_map) { + pipe_surface_unmap(cell->tex_surf); + cell->tex_map = NULL; + } + + /* XXX free old surface */ + + cell->tex_surf = cell_get_tex_surface(&cell->pipe, + &cell->texture[0]->base, + face, level, zslice); + + cell->tex_map = pipe_surface_map(cell->tex_surf); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h new file mode 100644 index 00000000000..0264fed88e6 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_TEXTURE_H +#define CELL_TEXTURE_H + + +struct pipe_context; +struct pipe_texture; + + +/** + * Subclass of pipe_texture + */ +struct cell_texture +{ + struct pipe_texture base; + + unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + unsigned long buffer_size; + + void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/ +}; + + +/** cast wrapper */ +static INLINE struct cell_texture * +cell_texture(struct pipe_texture *pt) +{ + return (struct cell_texture *) pt; +} + + + +extern struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); + +extern void +cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + +extern struct pipe_surface * +cell_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice); + + +extern void +cell_update_texture_mapping(struct cell_context *cell); + + +#endif /* CELL_TEXTURE */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c new file mode 100644 index 00000000000..e9fafe492ee --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + + +#include "cell_batch.h" +#include "cell_context.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_vbuf.h" +#include "pipe/draw/draw_vbuf.h" + + +/** Allow vertex data to be inlined after RENDER command */ +#define ALLOW_INLINE_VERTS 1 + + +/** + * Subclass of vbuf_render because we need a cell_context pointer in + * a few places. + */ +struct cell_vbuf_render +{ + struct vbuf_render base; + struct cell_context *cell; + uint prim; /**< PIPE_PRIM_x */ + uint vertex_size; /**< in bytes */ + void *vertex_buffer; /**< just for debug, really */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +/** cast wrapper */ +static struct cell_vbuf_render * +cell_vbuf_render(struct vbuf_render *vbr) +{ + return (struct cell_vbuf_render *) vbr; +} + + + +static const struct vertex_info * +cell_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + return &cvbr->cell->vertex_info; +} + + +static void * +cell_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ + + assert(cvbr->vertex_buf == ~0); + cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); + cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + + /* + printf("%s vertex_buf = %u count = %u\n", + __FUNCTION__, cvbr->vertex_buf, vertices_used); + */ + + /* Tell SPUs they can release the vert buf */ + if (cvbr->vertex_buf != ~0U) { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) + cell_batch_alloc(cell, sizeof(struct cell_command_release_verts)); + release->opcode = CELL_CMD_RELEASE_VERTS; + release->vertex_buf = cvbr->vertex_buf; + } + + cvbr->vertex_buf = ~0; + cell_flush_int(&cell->pipe, 0x0); + + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + + +static void +cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->prim = prim; + /*printf("cell_set_prim %u\n", prim);*/ +} + + +static void +cell_vbuf_draw(struct vbuf_render *vbr, + const ushort *indices, + uint nr_indices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + float xmin, ymin, xmax, ymax; + uint i; + uint nr_vertices = 0, min_index = ~0; + const void *vertices = cvbr->vertex_buffer; + const uint vertex_size = cvbr->vertex_size; + + for (i = 0; i < nr_indices; i++) { + if (indices[i] > nr_vertices) + nr_vertices = indices[i]; + if (indices[i] < min_index) + min_index = indices[i]; + } + nr_vertices++; + +#if 0 + /*if (min_index > 0)*/ + printf("%s min_index = %u\n", __FUNCTION__, min_index); +#endif + +#if 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", + nr_indices, nr_vertices); + printf(" "); + for (i = 0; i < nr_indices; i += 3) { + printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); + } + printf("\n"); +#elif 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", + nr_indices, nr_vertices, + indices[0], indices[1], indices[2]); + printf("ind space = %u, vert space = %u, space = %u\n", + nr_indices * 2, + nr_vertices * 4 * cell->vertex_info.size, + cell_batch_free_space(cell)); +#endif + + /* compute x/y bounding box */ + xmin = ymin = 1e50; + xmax = ymax = -1e50; + for (i = min_index; i < nr_vertices; i++) { + const float *v = (float *) ((ubyte *) vertices + i * vertex_size); + if (v[0] < xmin) + xmin = v[0]; + if (v[0] > xmax) + xmax = v[0]; + if (v[1] < ymin) + ymin = v[1]; + if (v[1] > ymax) + ymax = v[1]; + } +#if 0 + printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); + fflush(stdout); +#endif + + if (cvbr->prim != PIPE_PRIM_TRIANGLES) + return; /* only render tris for now */ + + /* build/insert batch RENDER command */ + { + const uint index_bytes = ROUNDUP8(nr_indices * 2); + const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; + const uint batch_size = sizeof(struct cell_command_render) + index_bytes; + + struct cell_command_render *render + = (struct cell_command_render *) + cell_batch_alloc(cell, batch_size); + + render->opcode = CELL_CMD_RENDER; + render->prim_type = cvbr->prim; + + render->num_indexes = nr_indices; + render->min_index = min_index; + + /* append indices after render command */ + memcpy(render + 1, indices, nr_indices * 2); + + /* if there's room, append vertices after the indices, else leave + * vertices in the original/separate buffer. + */ + render->vertex_size = 4 * cell->vertex_info.size; + render->num_verts = nr_vertices; + if (ALLOW_INLINE_VERTS && + min_index == 0 && + vertex_bytes + 16 <= cell_batch_free_space(cell)) { + /* vertex data inlined, after indices, at 16-byte boundary */ + void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16); + memcpy(dst, vertices, vertex_bytes); + render->inline_verts = TRUE; + render->vertex_buf = ~0; + } + else { + /* vertex data in separate buffer */ + render->inline_verts = FALSE; + ASSERT(cvbr->vertex_buf >= 0); + render->vertex_buf = cvbr->vertex_buf; + } + + render->xmin = xmin; + render->ymin = ymin; + render->xmax = xmax; + render->ymax = ymax; + } + +#if 0 + /* helpful for debug */ + cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT); +#endif +} + + +static void +cell_vbuf_destroy(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->cell->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +cell_init_vbuf(struct cell_context *cell) +{ + assert(cell->draw); + + cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); + + /* The max number of indexes is what can fix into a batch buffer, + * minus the render and release-verts commands. + */ + cell->vbuf_render->base.max_indices + = (CELL_BUFFER_SIZE + - sizeof(struct cell_command_render) + - sizeof(struct cell_command_release_verts)) + / sizeof(ushort); + cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; + + cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; + cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; + cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; + cell->vbuf_render->base.draw = cell_vbuf_draw; + cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; + cell->vbuf_render->base.destroy = cell_vbuf_destroy; + + cell->vbuf_render->cell = cell; +#if 1 + cell->vbuf_render->vertex_buf = ~0; +#endif + + cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); +} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h new file mode 100644 index 00000000000..d265cbf7701 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_VBUF_H +#define CELL_VBUF_H + + +struct cell_context; + +extern void +cell_init_vbuf(struct cell_context *cell); + + +#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c new file mode 100644 index 00000000000..80dd500b345 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -0,0 +1,120 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file cell_vertex_shader.c + * Vertex shader interface routines for Cell. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_spu.h" +#include "cell_batch.h" + +#include "pipe/cell/common.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" + +/** + * Run the vertex shader on all vertices in the vertex queue. + * Called by the draw module when the vertx cache needs to be flushed. + */ +void +cell_vertex_shader_queue_flush(struct draw_context *draw) +{ + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct cell_command_vs *const vs = &cell_global.command[0].vs; + uint64_t *batch; + struct cell_array_info *array_info; + unsigned i, j; + + assert(draw->vs.queue_nr != 0); + + /* XXX: do this on statechange: + */ + draw_update_vertex_fetch(draw); + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); + + batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; + + array_info = (struct cell_array_info *) &batch[1]; + assert(draw->vertex_fetch.src_ptr[i] != NULL); + array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; + array_info->attr = i; + array_info->pitch = draw->vertex_fetch.pitch[i]; + array_info->format = draw->vertex_element[i].src_format; + } + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + + sizeof(struct pipe_viewport_state)); + batch[0] = CELL_CMD_STATE_VIEWPORT; + (void) memcpy(&batch[1], &draw->viewport, + sizeof(struct pipe_viewport_state)); + + cell_batch_flush(cell); + + vs->opcode = CELL_CMD_VS_EXECUTE; + vs->shader.num_outputs = draw->num_vs_outputs; + vs->shader.declarations = (uintptr_t) draw->machine.Declarations; + vs->shader.num_declarations = draw->machine.NumDeclarations; + vs->shader.instructions = (uintptr_t) draw->machine.Instructions; + vs->shader.num_instructions = draw->machine.NumInstructions; + vs->shader.uniforms = (uintptr_t) draw->user.constants; + vs->shader.immediates = (uintptr_t) draw->machine.Imms; + vs->shader.num_immediates = draw->machine.ImmLimit / 4; + vs->nr_attrs = draw->vertex_fetch.nr_attrs; + + (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); + vs->nr_planes = draw->nr_planes; + + for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { + const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); + + for (j = 0; j < n; j++) { + vs->elts[j] = draw->vs.queue[i + j].elt; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest; + } + + for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { + vs->elts[j] = vs->elts[0]; + vs->vOut[j] = vs->vOut[0]; + } + + vs->num_elts = n; + send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); + + cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); + } + + draw->vs.queue_nr = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c new file mode 100644 index 00000000000..ebabce3c8f5 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.c @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "cell_winsys.h" + + +struct cell_winsys * +cell_get_winsys(uint format) +{ + struct cell_winsys *cws = CALLOC_STRUCT(cell_winsys); + if (cws) + cws->preferredFormat = format; + return cws; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_winsys.h new file mode 100644 index 00000000000..ae2af5696b5 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_WINSYS_H +#define CELL_WINSYS_H + +#include "pipe/p_compiler.h" + + +/** + * Very simple winsys at this time. + * Will probably eventually add SPU control info. + */ +struct cell_winsys +{ + uint preferredFormat; +}; + + +extern struct cell_winsys * +cell_get_winsys(uint format); + + + +#endif diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile new file mode 100644 index 00000000000..f202971d738 --- /dev/null +++ b/src/gallium/drivers/cell/spu/Makefile @@ -0,0 +1,72 @@ +# Gallium3D Cell driver: SPU code + +# This makefile builds the g3d_spu.a file that's linked into the +# PPU code/library. + + +TOP = ../../../../.. +include $(TOP)/configs/linux-cell + + +PROG = g3d + +PROG_SPU = $(PROG)_spu +PROG_SPU_A = $(PROG)_spu.a +PROG_SPU_EMBED_O = $(PROG)_spu-embed.o + + +SOURCES = \ + spu_main.c \ + spu_blend.c \ + spu_render.c \ + spu_texture.c \ + spu_tile.c \ + spu_tri.c \ + spu_exec.c \ + spu_util.c \ + spu_vertex_fetch.c \ + spu_vertex_shader.c + +SPU_OBJECTS = $(SOURCES:.c=.o) \ + +SPU_ASM_OUT = $(SOURCES:.c=.s) \ + +INCLUDE_DIRS = -I$(TOP)/src/mesa + + +.c.o: + $(SPU_CC) $(SPU_CFLAGS) -c $< + +.c.s: + $(SPU_CC) $(SPU_CFLAGS) -S $< + + +# The .a file will be linked into the main/PPU executable +default: $(PROG_SPU_A) + +$(PROG_SPU_A): $(PROG_SPU_EMBED_O) + $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) + +$(PROG_SPU_EMBED_O): $(PROG_SPU) + $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) + +$(PROG_SPU): $(SPU_OBJECTS) + $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) + + + +asmfiles: $(SPU_ASM_OUT) + + +clean: + rm -f *~ *.o *.a *.d *.s $(PROG_SPU) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + diff --git a/src/gallium/drivers/cell/spu/spu_blend.c b/src/gallium/drivers/cell/spu/spu_blend.c new file mode 100644 index 00000000000..23ec0eeb451 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_blend.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "spu_main.h" +#include "spu_blend.h" +#include "spu_colorpack.h" + + +void +blend_quad(uint itx, uint ity, vector float colors[4]) +{ + /* simple SRC_ALPHA, ONE_MINUS_SRC_ALPHA blending */ + vector float fbc00 = spu_unpack_color(spu.ctile.ui[ity][itx]); + vector float fbc01 = spu_unpack_color(spu.ctile.ui[ity][itx+1]); + vector float fbc10 = spu_unpack_color(spu.ctile.ui[ity+1][itx]); + vector float fbc11 = spu_unpack_color(spu.ctile.ui[ity+1][itx+1]); + + vector float alpha00 = spu_splats(spu_extract(colors[0], 3)); + vector float alpha01 = spu_splats(spu_extract(colors[1], 3)); + vector float alpha10 = spu_splats(spu_extract(colors[2], 3)); + vector float alpha11 = spu_splats(spu_extract(colors[3], 3)); + + vector float one_minus_alpha00 = spu_sub(spu_splats(1.0f), alpha00); + vector float one_minus_alpha01 = spu_sub(spu_splats(1.0f), alpha01); + vector float one_minus_alpha10 = spu_sub(spu_splats(1.0f), alpha10); + vector float one_minus_alpha11 = spu_sub(spu_splats(1.0f), alpha11); + + colors[0] = spu_add(spu_mul(colors[0], alpha00), + spu_mul(fbc00, one_minus_alpha00)); + colors[1] = spu_add(spu_mul(colors[1], alpha01), + spu_mul(fbc01, one_minus_alpha01)); + colors[2] = spu_add(spu_mul(colors[2], alpha10), + spu_mul(fbc10, one_minus_alpha10)); + colors[3] = spu_add(spu_mul(colors[3], alpha11), + spu_mul(fbc11, one_minus_alpha11)); +} + diff --git a/src/gallium/drivers/cell/spu/spu_blend.h b/src/gallium/drivers/cell/spu/spu_blend.h new file mode 100644 index 00000000000..2b594b578b4 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_blend.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_BLEND_H +#define SPU_BLEND_H + + +extern void +blend_quad(uint itx, uint ity, vector float colors[4]); + + +#endif /* SPU_BLEND_H */ diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h new file mode 100644 index 00000000000..e9fee8a3a61 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef SPU_COLORPACK_H +#define SPU_COLORPACK_H + + +#include <spu_intrinsics.h> + + +static INLINE unsigned int +spu_pack_R8G8B8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + + out = spu_shuffle(out, out, ((vector unsigned char) { + 0, 4, 8, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }) ); + + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_A8R8G8B8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_B8G8R8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, shuffle); + return spu_extract(out, 0); +} + + +static INLINE vector float +spu_unpack_color(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 0, 0, 0, 0, + 5, 5, 5, 5, + 10, 10, 10, 10, + 15, 15, 15, 15}) ); + return spu_convtf(color_u4, 32); +} + + +static INLINE vector float +spu_unpack_A8R8G8B8(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 5, 5, 5, 5, + 10, 10, 10, 10, + 15, 15, 15, 15, + 0, 0, 0, 0}) ); + + return spu_convtf(color_u4, 32); +} + + +#endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c new file mode 100644 index 00000000000..e51008b9b3c --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -0,0 +1,1948 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include <libmisc.h> +#include <spu_mfcio.h> +#include <transpose_matrix4x4.h> +#include <simdmath/ceilf4.h> +#include <simdmath/cosf4.h> +#include <simdmath/divf4.h> +#include <simdmath/floorf4.h> +#include <simdmath/log2f4.h> +#include <simdmath/powf4.h> +#include <simdmath/sinf4.h> +#include <simdmath/sqrtf4.h> +#include <simdmath/truncf4.h> + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" +#include "pipe/tgsi/util/tgsi_util.h" +#include "spu_exec.h" +#include "spu_main.h" +#include "spu_vertex_shader.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call spu_exec_machine_run() many times. + */ +void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor) +{ + qword zero; + qword not_zero; + uint i; + + mach->Samplers = samplers; + mach->Processor = processor; + mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + + zero = si_xor(zero, zero); + not_zero = si_xori(zero, 0xff); + + /* Setup constants. */ + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); + + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); +} + + +static INLINE qword +micro_abs(qword src) +{ + return si_rotmi(si_shli(src, 1), -1); +} + +static INLINE qword +micro_ceil(qword src) +{ + return (qword) _ceilf4((vec_float4) src); +} + +static INLINE qword +micro_cos(qword src) +{ + return (qword) _cosf4((vec_float4) src); +} + +static const qword br_shuf = { + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +}; + +static const qword bl_shuf = { + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +}; + +static const qword tl_shuf = { + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +}; + +static qword +micro_ddx(qword src) +{ + qword bottom_right = si_shufb(src, src, br_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(bottom_right, bottom_left); +} + +static qword +micro_ddy(qword src) +{ + qword top_left = si_shufb(src, src, tl_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(top_left, bottom_left); +} + +static INLINE qword +micro_div(qword src0, qword src1) +{ + return (qword) _divf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_flr(qword src) +{ + return (qword) _floorf4((vec_float4) src); +} + +static qword +micro_frc(qword src) +{ + return si_fs(src, (qword) _floorf4((vec_float4) src)); +} + +static INLINE qword +micro_ge(qword src0, qword src1) +{ + return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); +} + +static qword +micro_lg2(qword src) +{ + return (qword) _log2f4((vec_float4) src); +} + +static INLINE qword +micro_lt(qword src0, qword src1) +{ + const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); + + return si_xori(tmp, 0xff); +} + +static INLINE qword +micro_max(qword src0, qword src1) +{ + return si_selb(src1, src0, si_fcgt(src0, src1)); +} + +static INLINE qword +micro_min(qword src0, qword src1) +{ + return si_selb(src0, src1, si_fcgt(src0, src1)); +} + +static qword +micro_neg(qword src) +{ + return si_xor(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_set_sign(qword src) +{ + return si_or(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_pow(qword src0, qword src1) +{ + return (qword) _powf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_rnd(qword src) +{ + const qword half = (qword) spu_splats(0.5f); + + /* May be able to use _roundf4. There may be some difference, though. + */ + return (qword) _floorf4((vec_float4) si_fa(src, half)); +} + +static INLINE qword +micro_ishr(qword src0, qword src1) +{ + return si_rotma(src0, si_sfi(src1, 0)); +} + +static qword +micro_trunc(qword src) +{ + return (qword) _truncf4((vec_float4) src); +} + +static qword +micro_sin(qword src) +{ + return (qword) _sinf4((vec_float4) src); +} + +static INLINE qword +micro_sqrt(qword src) +{ + return (qword) _sqrtf4((vec_float4) src); +} + +static void +fetch_src_file_channel( + const struct spu_exec_machine *mach, + const uint file, + const uint swizzle, + const union spu_exec_channel *index, + union spu_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: { + unsigned char buffer[32] ALIGN16_ATTRIB; + unsigned i; + + for (i = 0; i < 4; i++) { + const float *ptr = mach->Consts[index->i[i]]; + const uint64_t addr = (uint64_t)(uintptr_t) ptr; + const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; + + mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f) + + (sizeof(float) * swizzle)], sizeof(float)); + } + break; + } + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (int) mach->ImmLimit ); + assert( index->i[1] < (int) mach->ImmLimit ); + assert( index->i[2] < (int) mach->ImmLimit ); + assert( index->i[3] < (int) mach->ImmLimit ); + + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct spu_exec_machine *mach, + union spu_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union spu_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + CHAN_X); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.q = si_mpyi(index.q, 17); + break; + case TGSI_FILE_CONSTANT: + index.q = si_shli(index.q, 12); + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + chan->q = micro_abs(chan->q); + break; + + case TGSI_UTIL_SIGN_SET: + chan->q = micro_set_sign(chan->q); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + chan->q = micro_neg(chan->q); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); + } +} + +static void +store_dest( + struct spu_exec_machine *mach, + const union spu_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union spu_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + + default: + assert( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kilp(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union spu_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct spu_sampler *sampler, + const union spu_exec_channel *s, + const union spu_exec_channel *t, + const union spu_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union spu_exec_channel *r, + union spu_exec_channel *g, + union spu_exec_channel *b, + union spu_exec_channel *a ) +{ + qword rgba[4]; + qword out[4]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); + + _transpose_matrix4x4(out, rgba); + r->q = out[0]; + g->q = out[1]; + b->q = out[2]; + a->q = out[3]; +} + + +static void +exec_tex(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union spu_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[1], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[1].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + + +static void +constant_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +static void +linear_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +static void +perspective_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* interpolation_func)( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration(struct spu_exec_machine *mach, + const struct tgsi_full_declaration *decl) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + interpolation_func interp; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Interpolation.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + interp = constant_interpolation; + break; + + case TGSI_INTERPOLATE_LINEAR: + interp = linear_interpolation; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = perspective_interpolation; + break; + + default: + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + interp( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + interp( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union spu_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_cflts(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + /* TGSI_OPCODE_SWZ */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + + FETCH( &r[2], 0, CHAN_W ); + r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); + r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); + r[1].q = micro_pow(r[1].q, r[2].q); + + /* r0 = (r0 > 0.0) ? r1 : 0.0 + */ + r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, + r[0].q); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sqrt(r[0].q); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + assert (0); + break; + + case TGSI_OPCODE_LOG: + assert (0); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fm(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fa(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + r[0].q = si_fm(r[0].q, r[1].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_min(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_max(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = micro_ge(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ge(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + FETCH( &r[2], 2, chan_index ); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fs(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + r[1].q = si_fs(r[1].q, r[2].q); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_frc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_flr(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_rnd(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_lg2(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = micro_pow(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + /* r2 = (r0 * r1) - (r3 * r5) + */ + r[2].q = si_fm(r[3].q, r[5].q); + r[2].q = si_fms(r[0].q, r[1].q, r[2].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + FETCH(&r[5], 0, CHAN_X); + + /* r3 = (r3 * r2) - (r1 * r5) + */ + r[1].q = si_fm(r[1].q, r[5].q); + r[3].q = si_fms(r[3].q, r[2].q, r[1].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + /* r5 = (r5 * r4) - (r0 * r2) + */ + r[0].q = si_fm(r[0].q, r[2].q); + r[5].q = si_fms(r[5].q, r[4].q, r[0].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + r[0].q = micro_abs(r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 1, CHAN_W); + + r[0].q = si_fa(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_cos(r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddx(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddy(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + /* for enabled ExecMask bits, set the killed bit */ + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fcgt(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sin(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fcgt(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + assert (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + /* r0 = (r0 < 0.0) ? r1 : r2 + */ + r[3].q = si_xor(r[3].q, r[3].q); + r[0].q = micro_lt(r[0].q, r[3].q); + r[0].q = si_selb(r[1].q, r[2].q, r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + r[1].q = micro_cos(r[0].q); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + r[1].q = micro_sin(r[0].q); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + assert(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ceil(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_csflt(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_xorbi(r[0].q, 0xff); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_trunc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_shl(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ishr(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_and(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_or(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_xor(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + if (mach->LoopMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + assert( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + assert( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +spu_exec_machine_run( struct spu_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this assertion */ + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + for (i = 0; i < mach->NumDeclarations; i++) { + uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB; + struct tgsi_full_declaration decl; + unsigned long decl_addr = (unsigned long) (mach->Declarations+i); + unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f); + + mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); + wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + + memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl)); + exec_declaration( mach, &decl ); + } + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB; + struct tgsi_full_instruction inst; + unsigned long inst_addr = (unsigned long) (mach->Instructions + pc); + unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f); + + assert(pc < mach->NumInstructions); + mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); + wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + + memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst)); + exec_instruction( mach, & inst, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h new file mode 100644 index 00000000000..b4c7661ef67 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined SPU_EXEC_H +#define SPU_EXEC_H + +#include "pipe/p_compiler.h" +#include "pipe/tgsi/exec/tgsi_exec.h" + +#if defined __cplusplus +extern "C" { +#endif + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union spu_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; + qword q; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct spu_exec_vector +{ + union spu_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct spu_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct spu_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct spu_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct spu_exec_machine +{ + /* + * 32 program temporaries + * 4 internal temporaries + * 1 address + */ + struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_ADDRS + 1] + ALIGN16_ATTRIB; + + struct spu_exec_vector *Addrs; + + struct spu_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + float (*Consts)[4]; + struct spu_exec_vector *Inputs; + struct spu_exec_vector *Outputs; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct spu_interp_coef *InterpCoefs; + struct spu_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; +}; + + +extern void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor); + +extern uint +spu_exec_machine_run( struct spu_exec_machine *mach ); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* SPU_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c new file mode 100644 index 00000000000..e375197fe60 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -0,0 +1,567 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* main() for Cell SPU code */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_texture.h" +#include "spu_tile.h" +//#include "spu_test.h" +#include "spu_vertex_shader.h" +#include "pipe/cell/common.h" +#include "pipe/p_defines.h" + + +/* +helpful headers: +/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h +/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h +*/ + +boolean Debug = FALSE; + +struct spu_global spu; + +struct spu_vs_context draw; + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const uint status[4] ALIGN16_ATTRIB + = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; + + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +static void +really_clear_tiles(uint surfaceIndex) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (surfaceIndex == 0) { + clear_c_tile(&spu.ctile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + } + } + } + else { + clear_z_tile(&spu.ztile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); + } + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (Debug) + printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, + clear->surface, clear->value); + +#define CLEAR_OPT 1 +#if CLEAR_OPT + /* set all tile's status to CLEAR */ + if (clear->surface == 0) { + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); + spu.fb.color_clear_value = clear->value; + } + else { + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); + spu.fb.depth_clear_value = clear->value; + } + return; +#endif + + if (clear->surface == 0) { + spu.fb.color_clear_value = clear->value; + clear_c_tile(&spu.ctile); + } + else { + spu.fb.depth_clear_value = clear->value; + clear_z_tile(&spu.ztile); + } + + /* + printf("SPU: %s num=%d w=%d h=%d\n", + __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); + */ + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + /* XXX we don't want this here, but it fixes bad tile results */ + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif + + if (Debug) + printf("SPU %u: CLEAR SURF done\n", spu.init.id); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + if (Debug) + printf("SPU %u: RELEASE VERTS %u\n", + spu.init.id, release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ + if (Debug) + printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + spu.init.id, + cmd->width, + cmd->height, + cmd->color_start, + cmd->color_format, + cmd->depth_format); + + ASSERT_ALIGN16(cmd->color_start); + ASSERT_ALIGN16(cmd->depth_start); + + spu.fb.color_start = cmd->color_start; + spu.fb.depth_start = cmd->depth_start; + spu.fb.color_format = cmd->color_format; + spu.fb.depth_format = cmd->depth_format; + spu.fb.width = cmd->width; + spu.fb.height = cmd->height; + spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; + spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + + if (spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM) + spu.fb.zsize = 4; + else if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) + spu.fb.zsize = 2; + else + spu.fb.zsize = 0; + + if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else + ASSERT(0); +} + + +static void +cmd_state_blend(const struct pipe_blend_state *state) +{ + if (Debug) + printf("SPU %u: BLEND: enabled %d\n", + spu.init.id, + state->blend_enable); + + memcpy(&spu.blend, state, sizeof(*state)); +} + + +static void +cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state) +{ + if (Debug) + printf("SPU %u: DEPTH_STENCIL: ztest %d\n", + spu.init.id, + state->depth.enabled); + + memcpy(&spu.depth_stencil, state, sizeof(*state)); +} + + +static void +cmd_state_sampler(const struct pipe_sampler_state *state) +{ + if (Debug) + printf("SPU %u: SAMPLER\n", + spu.init.id); + + memcpy(&spu.sampler[0], state, sizeof(*state)); + if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture = sample_texture_bilinear; + else + spu.sample_texture = sample_texture_nearest; +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ + if (Debug) + printf("SPU %u: TEXTURE at %p size %u x %u\n", + spu.init.id, texture->start, texture->width, texture->height); + + memcpy(&spu.texture, texture, sizeof(*texture)); + spu.tex_size = (vector float) + { spu.texture.width, spu.texture.height, 0.0, 0.0}; + spu.tex_size_mask = (vector unsigned int) + { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + if (Debug) { + printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, + vinfo->num_attribs); + } + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); + memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_ATTRIB_MAX); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.format[attr] = vs_info->format; + draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_finish(void) +{ + if (Debug) + printf("SPU %u: FINISH\n", spu.init.id); + really_clear_tiles(0); + /* wait for all outstanding DMAs to finish */ + mfc_write_tag_mask(~0); + mfc_read_tag_status_all(); + /* send mbox message to PPU */ + spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ + const uint buf = (opcode >> 8) & 0xff; + uint size = (opcode >> 16); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); + uint pos; + + if (Debug) + printf("SPU %u: BATCH buffer %u, len %u, from %p\n", + spu.init.id, buf, size, spu.init.buffers[buf]); + + ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + size = ROUNDUP16(size); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + mfc_get(buffer, /* dest */ + (unsigned int) spu.init.buffers[buf], /* src */ + size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + /* Tell PPU we're done copying the buffer to local store */ + if (Debug) + printf("SPU %u: release batch buf %u\n", spu.init.id, buf); + release_buffer(buf); + + for (pos = 0; pos < usize; /* no incr */) { + switch (buffer[pos]) { + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; + } + break; + case CELL_CMD_CLEAR_SURFACE: + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) &buffer[pos]; + cmd_clear_surface(clr); + pos += sizeof(*clr) / 8; + } + break; + case CELL_CMD_RENDER: + { + struct cell_command_render *render + = (struct cell_command_render *) &buffer[pos]; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += pos_incr; + } + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } + break; + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_STATE_BLEND: + cmd_state_blend((struct pipe_blend_state *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); + break; + case CELL_CMD_STATE_DEPTH_STENCIL: + cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); + break; + case CELL_CMD_STATE_SAMPLER: + cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8); + break; + case CELL_CMD_STATE_TEXTURE: + cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8); + break; + case CELL_CMD_STATE_VERTEX_INFO: + cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); + break; + default: + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); + ASSERT(0); + break; + } + } + + if (Debug) + printf("SPU %u: BATCH complete\n", spu.init.id); +} + + +/** + * Temporary/simple main loop for SPEs: Get a command, execute it, repeat. + */ +static void +main_loop(void) +{ + struct cell_command cmd; + int exitFlag = 0; + + if (Debug) + printf("SPU %u: Enter main loop\n", spu.init.id); + + ASSERT((sizeof(struct cell_command) & 0xf) == 0); + ASSERT_ALIGN16(&cmd); + + while (!exitFlag) { + unsigned opcode; + int tag = 0; + + if (Debug) + printf("SPU %u: Wait for cmd...\n", spu.init.id); + + /* read/wait from mailbox */ + opcode = (unsigned int) spu_read_in_mbox(); + + if (Debug) + printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); + + /* command payload */ + mfc_get(&cmd, /* dest */ + (unsigned int) spu.init.cmd, /* src */ + sizeof(struct cell_command), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + + /* + * NOTE: most commands should be contained in a batch buffer + */ + + switch (opcode & CELL_CMD_OPCODE_MASK) { + case CELL_CMD_EXIT: + if (Debug) + printf("SPU %u: EXIT\n", spu.init.id); + exitFlag = 1; + break; + case CELL_CMD_VS_EXECUTE: + spu_execute_vertex_shader(&draw, &cmd.vs); + break; + case CELL_CMD_BATCH: + cmd_batch(opcode); + break; + default: + printf("Bad opcode!\n"); + } + + } + + if (Debug) + printf("SPU %u: Exit main loop\n", spu.init.id); +} + + + +static void +one_time_init(void) +{ + memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); + memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); + invalidate_tex_cache(); +} + + + +/* In some versions of the SDK the SPE main takes 'unsigned long' as a + * parameter. In others it takes 'unsigned long long'. Use a define to + * select between the two. + */ +#ifdef SPU_MAIN_PARAM_LONG_LONG +typedef unsigned long long main_param_t; +#else +typedef unsigned long main_param_t; +#endif + +/** + * SPE entrypoint. + */ +int +main(main_param_t speid, main_param_t argp) +{ + int tag = 0; + + (void) speid; + + ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + ASSERT(sizeof(struct cell_command_render) % 8 == 0); + + one_time_init(); + + if (Debug) + printf("SPU: main() speid=%lu\n", speid); + + mfc_get(&spu.init, /* dest */ + (unsigned int) argp, /* src */ + sizeof(struct cell_init_info), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + +#if 0 + if (spu.init.id==0) + spu_test_misc(); +#endif + + main_loop(); + + return 0; +} diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h new file mode 100644 index 00000000000..1710a175123 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -0,0 +1,177 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_MAIN_H +#define SPU_MAIN_H + + +#include <spu_mfcio.h> + +#include "pipe/cell/common.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/p_state.h" + + + +#define MAX_WIDTH 1024 +#define MAX_HEIGHT 1024 + + +typedef union { + ushort us[TILE_SIZE][TILE_SIZE]; + uint ui[TILE_SIZE][TILE_SIZE]; + vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; + vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; +} tile_t; + + +#define TILE_STATUS_CLEAR 1 +#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ + + +struct spu_framebuffer { + void *color_start; /**< addr of color surface in main memory */ + void *depth_start; /**< addr of depth surface in main memory */ + enum pipe_format color_format; + enum pipe_format depth_format; + uint width, height; /**< size in pixels */ + uint width_tiles, height_tiles; /**< width and height in tiles */ + + uint color_clear_value; + uint depth_clear_value; + + uint zsize; /**< 0, 2 or 4 bytes per Z */ +} ALIGN16_ATTRIB; + + +/** + * All SPU global/context state will be in singleton object of this type: + */ +struct spu_global +{ + struct cell_init_info init; + + struct spu_framebuffer fb; + struct pipe_blend_state blend_stencil; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_blend_state blend; + struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct cell_command_texture texture; + + struct vertex_info vertex_info; + + /* XXX more state to come */ + + + /** current color and Z tiles */ + tile_t ctile ALIGN16_ATTRIB; + tile_t ztile ALIGN16_ATTRIB; + + /** Current tiles' status */ + ubyte cur_ctile_status, cur_ztile_status; + + /** Status of all tiles in framebuffer */ + ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + + + /** for converting RGBA to PIPE_FORMAT_x colors */ + vector unsigned char color_shuffle; + + vector float tex_size; + vector unsigned int tex_size_mask; /**< == int(size - 1) */ + + vector float (*sample_texture)(vector float texcoord); + +} ALIGN16_ATTRIB; + + +extern struct spu_global spu; +extern boolean Debug; + + + + +/* DMA TAGS */ + +#define TAG_SURFACE_CLEAR 10 +#define TAG_VERTEX_BUFFER 11 +#define TAG_READ_TILE_COLOR 12 +#define TAG_READ_TILE_Z 13 +#define TAG_WRITE_TILE_COLOR 14 +#define TAG_WRITE_TILE_Z 15 +#define TAG_INDEX_BUFFER 16 +#define TAG_BATCH_BUFFER 17 +#define TAG_MISC 18 +#define TAG_TEXTURE_TILE 19 +#define TAG_INSTRUCTION_FETCH 20 + + + +static INLINE void +wait_on_mask(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_any(); +} + + +static INLINE void +wait_on_mask_all(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_all(); +} + + + + + +static INLINE void +memset16(ushort *d, ushort value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +static INLINE void +memset32(uint *d, uint value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +#endif /* SPU_MAIN_H */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c new file mode 100644 index 00000000000..932fb500b3f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -0,0 +1,301 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <libmisc.h> +#include <spu_mfcio.h> + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "pipe/cell/common.h" + + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, + uint *txmin, uint *tymin, + uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 + /* Debug: full-window bounding box */ + uint txmax = spu.fb.width_tiles - 1; + uint tymax = spu.fb.height_tiles - 1; + *txmin = 0; + *tymin = 0; + *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + *box_width_tiles = spu.fb.width_tiles; + (void) render; + (void) txmax; + (void) tymax; +#else + uint txmax, tymax, box_height_tiles; + + *txmin = (uint) render->xmin / TILE_SIZE; + *tymin = (uint) render->ymin / TILE_SIZE; + txmax = (uint) render->xmax / TILE_SIZE; + tymax = (uint) render->ymax / TILE_SIZE; + if (txmax >= spu.fb.width_tiles) + txmax = spu.fb.width_tiles-1; + if (tymax >= spu.fb.height_tiles) + tymax = spu.fb.height_tiles-1; + *box_width_tiles = txmax - *txmin + 1; + box_height_tiles = tymax - *tymin + 1; + *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 + printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, + render->xmin, render->ymin, render->xmax, render->ymax); + printf("SPU %u: tiles: %u, %u .. %u, %u\n", + spu.init.id, *txmin, *tymin, txmax, tymax); + ASSERT(render->xmin <= render->xmax); + ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ + return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_GETTING; + } + } + + if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_GETTING; + } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ + if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ztile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); + } + + if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ctile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); + } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.depth_stencil.depth.enabled) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } +} + + +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ + /* we'll DMA into these buffers */ + ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + const uint vertex_size = render->vertex_size; /* in bytes */ + /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + uint index_bytes; + const ubyte *vertices; + const ushort *indexes; + uint i, j; + + + if (Debug) { + printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " + "inline_vert=%u\n", + spu.init.id, + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); + + /* + printf(" bound: %g, %g .. %g, %g\n", + render->xmin, render->ymin, render->xmax, render->ymax); + */ + } + + ASSERT(sizeof(*render) % 4 == 0); + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + + + /* indexes are right after the render command in the batch buffer */ + indexes = (const ushort *) (render + 1); + index_bytes = ROUNDUP8(render->num_indexes * 2); + *pos_incr = index_bytes / 8 + sizeof(*render) / 8; + + + if (render->inline_verts) { + /* Vertices are after indexes in batch buffer at next 16-byte addr */ + vertices = (const ubyte *) render + (*pos_incr * 8); + vertices = (const ubyte *) align_pointer((void *) vertices, 16); + ASSERT_ALIGN16(vertices); + *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; + } + else { + /* Begin DMA fetch of vertex buffer */ + ubyte *src = spu.init.buffers[render->vertex_buf]; + ubyte *dest = vertex_data; + + /* skip vertex data we won't use */ +#if 01 + src += render->min_index * vertex_size; + dest += render->min_index * vertex_size; + total_vertex_bytes -= render->min_index * vertex_size; +#endif + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT_ALIGN16(dest); + ASSERT_ALIGN16(src); + + mfc_get(dest, /* in vertex_data[] array */ + (unsigned int) src, /* src in main memory */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + vertices = vertex_data; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ + uint txmin, tymin, box_width_tiles, box_num_tiles; + tile_bounding_box(render, &txmin, &tymin, + &box_num_tiles, &box_width_tiles); + + + /* make sure any pending clears have completed */ + wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + + /** + ** loop over tiles, rendering tris + **/ + for (i = 0; i < box_num_tiles; i++) { + const uint tx = txmin + i % box_width_tiles; + const uint ty = tymin + i / box_width_tiles; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + + if (!my_tile(tx, ty)) + continue; + + spu.cur_ctile_status = spu.ctile_status[ty][tx]; + spu.cur_ztile_status = spu.ztile_status[ty][tx]; + + get_cz_tiles(tx, ty); + + uint drawn = 0; + + /* loop over tris */ + for (j = 0; j < render->num_indexes; j += 3) { + const float *v0, *v1, *v2; + + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); + v1 = (const float *) (vertices + indexes[j+1] * vertex_size); + v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + + drawn += tri_draw(v0, v1, v2, tx, ty); + } + + //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + + /* write color/z tiles back to main framebuffer, if dirtied */ + put_cz_tiles(tx, ty); + + wait_put_cz_tiles(); /* XXX seems unnecessary... */ + + spu.ctile_status[ty][tx] = spu.cur_ctile_status; + spu.ztile_status[ty][tx] = spu.cur_ztile_status; + } + + if (Debug) + printf("SPU %u: RENDER done\n", + spu.init.id); +} + + diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h new file mode 100644 index 00000000000..fbcdc5ec316 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_RENDER_H +#define SPU_RENDER_H + +#include "pipe/cell/common.h" + +extern void +cmd_render(const struct cell_command_render *render, uint *pos_incr); + +#endif /* SPU_RENDER_H */ + diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c new file mode 100644 index 00000000000..3962aaa4a9b --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -0,0 +1,217 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_colorpack.h" + + +/** + * Number of texture tiles to cache. + * Note that this will probably be the largest consumer of SPU local store/ + * memory for this driver! + */ +#define CACHE_SIZE 16 + +static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; + +static vector unsigned int tex_tile_xy[CACHE_SIZE]; + + + +/** + * Mark all tex cache entries as invalid. + */ +void +invalidate_tex_cache(void) +{ + /* XXX memset? */ + uint i; + for (i = 0; i < CACHE_SIZE; i++) { + tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U }); + } +} + + +/** + * Return the cache pos/index which corresponds to tile (tx,ty) + */ +static INLINE uint +cache_pos(vector unsigned int txty) +{ + uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE; + return pos; +} + + +/** + * Make sure the tile for texel (i,j) is present, return its position/index + * in the cache. + */ +static uint +get_tex_tile(vector unsigned int ij) +{ + /* tile address: tx,ty */ + const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */ + const uint pos = cache_pos(txty); + + if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) || + (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) { + + /* texture cache miss, fetch tile from main memory */ + const uint tiles_per_row = spu.texture.width / TILE_SIZE; + const uint bytes_per_tile = sizeof(tile_t); + const void *src = (const ubyte *) spu.texture.start + + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile; + + printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", + spu.init.id, + spu_extract(txty,0), + spu_extract(txty,1), + pos, + spu_extract(tex_tile_xy[pos],0), + spu_extract(tex_tile_xy[pos],1)); + + ASSERT_ALIGN16(tex_tiles[pos].ui); + ASSERT_ALIGN16(src); + + mfc_get(tex_tiles[pos].ui, /* dest */ + (unsigned int) src, + bytes_per_tile, /* size */ + TAG_TEXTURE_TILE, + 0, /* tid */ + 0 /* rid */); + + wait_on_mask(1 << TAG_TEXTURE_TILE); + + tex_tile_xy[pos] = txty; + } + else { +#if 0 + printf("SPU %u: tex cache HIT at %d, %d\n", + spu.init.id, tx, ty); +#endif + } + + return pos; +} + + +/** + * Get texture sample at texcoord. + * XXX this is extremely primitive for now. + */ +vector float +sample_texture_nearest(vector float texcoord) +{ + vector float tc = spu_mul(texcoord, spu.tex_size); + vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ + itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ + vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ + uint pos = get_tex_tile(itc); + uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; + return spu_unpack_A8R8G8B8(texel); +} + + +vector float +sample_texture_bilinear(vector float texcoord) +{ + static const vector unsigned int offset10 = {1, 0, 0, 0}; + static const vector unsigned int offset01 = {0, 1, 0, 0}; + + vector float tc = spu_mul(texcoord, spu.tex_size); + tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ + + /* integer texcoords S,T: */ + vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ + vector unsigned int itc01 = spu_add(itc00, offset01); + vector unsigned int itc10 = spu_add(itc00, offset10); + vector unsigned int itc11 = spu_add(itc10, offset01); + + /* mask (GL_REPEAT) */ + itc00 = spu_and(itc00, spu.tex_size_mask); + itc01 = spu_and(itc01, spu.tex_size_mask); + itc10 = spu_and(itc10, spu.tex_size_mask); + itc11 = spu_and(itc11, spu.tex_size_mask); + + /* intra tile addr */ + vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); + vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1); + vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); + vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); + + /* get tile cache positions */ + uint pos00 = get_tex_tile(itc00); + uint pos01, pos10, pos11; + if ((spu_extract(ij00, 0) < TILE_SIZE-1) && + (spu_extract(ij00, 1) < TILE_SIZE-1)) { + /* all texels are in the same tile */ + pos01 = pos10 = pos11 = pos00; + } + else { + pos01 = get_tex_tile(itc01); + pos10 = get_tex_tile(itc10); + pos11 = get_tex_tile(itc11); + } + + /* get texels from tiles and convert to float[4] */ + vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); + vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); + vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); + vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); + vector signed int itc1024 = spu_convts(tc1024, 0); + itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); + vector float weight = spu_convtf(itc1024, 10); + + /* smeared frac and 1-frac */ + vector float sfrac = spu_splats(spu_extract(weight, 0)); + vector float tfrac = spu_splats(spu_extract(weight, 1)); + vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); + vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); + + /* multiply the samples (colors) by the S/T weights */ + texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); + texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); + texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); + texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); + + /* compute sum of weighted samples */ + vector float texel_sum = spu_add(texel00, texel01); + texel_sum = spu_add(texel_sum, texel10); + texel_sum = spu_add(texel_sum, texel11); + + return texel_sum; +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h new file mode 100644 index 00000000000..95eb87080f1 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TEXTURE_H +#define SPU_TEXTURE_H + + +#include "pipe/p_compiler.h" + + +extern void +invalidate_tex_cache(void); + + +extern vector float +sample_texture_nearest(vector float texcoord); + + +extern vector float +sample_texture_bilinear(vector float texcoord); + + +#endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c new file mode 100644 index 00000000000..12dc2463283 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "spu_tile.h" +#include "spu_main.h" + + +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + src += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("get_tile: dest: %p src: 0x%x size: %d\n", + tile, (unsigned int) src, bytesPerTile); + */ + mfc_get(tile->ui, /* dest in local memory */ + (unsigned int) src, /* src in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + + +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + dst += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", + spu.init.id, + tile, (unsigned int) dst, bytesPerTile); + */ + mfc_put((void *) tile->ui, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h new file mode 100644 index 00000000000..e53340a55a4 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TILE_H +#define SPU_TILE_H + + +#include <libmisc.h> +#include <spu_mfcio.h> +#include "spu_main.h" +#include "pipe/cell/common.h" + + + +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); + +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); + + + +static INLINE void +clear_c_tile(tile_t *ctile) +{ + memset32((uint*) ctile->ui, + spu.fb.color_clear_value, + TILE_SIZE * TILE_SIZE); +} + + +static INLINE void +clear_z_tile(tile_t *ztile) +{ + if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { + memset16((ushort*) ztile->us, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } + else { + ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); + memset32((uint*) ztile->ui, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } +} + + +#endif /* SPU_TILE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c new file mode 100644 index 00000000000..be9624cf7d9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -0,0 +1,926 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Triangle rendering within a tile. + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_util.h" +#include "spu_blend.h" +#include "spu_colorpack.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_tri.h" + +#include "spu_ztest.h" + + +/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ +typedef vector unsigned int mask_t; + +typedef union +{ + vector float v; + float f[4]; +} float4; + + +/** + * Simplified types taken from other parts of Gallium + */ +struct vertex_header { + vector float data[1]; +}; + + + +/* XXX fix this */ +#undef CEILF +#define CEILF(X) ((float) (int) ((X) + 0.99999)) + + +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +#define DEBUG_VERTS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +struct interp_coef +{ + float4 a0; + float4 dadx; + float4 dady; +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const struct vertex_header *vmax; + const struct vertex_header *vmid; + const struct vertex_header *vmin; + const struct vertex_header *vprovoke; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + uint tx, ty; + + int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; + +#if 0 + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; +#else + struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; +#endif + +#if 0 + struct quad_header quad; +#endif + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; +}; + + + +static struct setup_stage setup; + + + + +#if 0 +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} +#endif + +#if 0 +/** + * Clip setup.quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip(struct setup_stage *setup) +{ + const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (setup.quad.x0 >= maxx || + setup.quad.y0 >= maxy || + setup.quad.x0 + 1 < minx || + setup.quad.y0 + 1 < miny) { + /* totally clipped */ + setup.quad.mask = 0x0; + return; + } + if (setup.quad.x0 < minx) + setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup.quad.y0 < miny) + setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup.quad.x0 == maxx - 1) + setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup.quad.y0 == maxy - 1) + setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} +#endif + +#if 0 +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad(struct setup_stage *setup) +{ + quad_clip(setup); + if (setup.quad.mask) { + struct softpipe_context *sp = setup.softpipe; + sp->quad.first->run(sp->quad.first, &setup.quad); + } +} +#endif + +/** + * Evaluate attribute coefficients (plane equations) to compute + * attribute values for the four fragments in a quad. + * Eg: four colors will be compute. + */ +static INLINE void +eval_coeff(uint slot, float x, float y, vector float result[4]) +{ + switch (spu.vertex_info.interp_mode[slot]) { + case INTERP_CONSTANT: + result[QUAD_TOP_LEFT] = + result[QUAD_TOP_RIGHT] = + result[QUAD_BOTTOM_LEFT] = + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; + break; + + case INTERP_LINEAR: + /* fall-through, for now */ + default: + { + register vector float dadx = setup.coef[slot].dadx.v; + register vector float dady = setup.coef[slot].dady.v; + register vector float topLeft + = spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + result[QUAD_TOP_LEFT] = topLeft; + result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); + result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); + result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); + } + } +} + + +static INLINE vector float +eval_z(float x, float y) +{ + const uint slot = 0; + const float dzdx = setup.coef[slot].dadx.f[2]; + const float dzdy = setup.coef[slot].dady.f[2]; + const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; + return spu_add(topLeftv, derivs); +} + + +static INLINE mask_t +do_depth_test(int x, int y, mask_t quadmask) +{ + float4 zvals; + mask_t mask; + + zvals.v = eval_z((float) x, (float) y); + + if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { + int ix = (x - setup.cliprect_minx) / 4; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); + } + else { + int ix = (x - setup.cliprect_minx) / 2; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); + } + + if (spu_extract(spu_orx(mask), 0)) + spu.cur_ztile_status = TILE_STATUS_DIRTY; + + return mask; +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + * Note: about 1/5 to 1/7 of the time, mask is zero and this function + * should be skipped. But adding the test for that slows things down + * overall. + */ +static INLINE void +emit_quad( int x, int y, mask_t mask ) +{ +#if 0 + struct softpipe_context *sp = setup.softpipe; + setup.quad.x0 = x; + setup.quad.y0 = y; + setup.quad.mask = mask; + sp->quad.first->run(sp->quad.first, &setup.quad); +#else + + if (spu.depth_stencil.depth.enabled) { + mask = do_depth_test(x, y, mask); + } + + /* If any bits in mask are set... */ + if (spu_extract(spu_orx(mask), 0)) { + const int ix = x - setup.cliprect_minx; + const int iy = y - setup.cliprect_miny; + const vector unsigned char shuffle = spu.color_shuffle; + vector float colors[4]; + + spu.cur_ctile_status = TILE_STATUS_DIRTY; + + if (spu.texture.start) { + /* texture mapping */ + vector float texcoords[4]; + eval_coeff(2, (float) x, (float) y, texcoords); + + if (spu_extract(mask, 0)) + colors[0] = spu.sample_texture(texcoords[0]); + if (spu_extract(mask, 1)) + colors[1] = spu.sample_texture(texcoords[1]); + if (spu_extract(mask, 2)) + colors[2] = spu.sample_texture(texcoords[2]); + if (spu_extract(mask, 3)) + colors[3] = spu.sample_texture(texcoords[3]); + } + else { + /* simple shading */ + eval_coeff(1, (float) x, (float) y, colors); + } + +#if 1 + if (spu.blend.blend_enable) + blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); +#endif + + if (spu_extract(mask, 0)) + spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); + if (spu_extract(mask, 1)) + spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); + if (spu_extract(mask, 2)) + spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); + if (spu_extract(mask, 3)) + spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); + +#if 0 + /* SIMD_Z with swizzled color buffer (someday) */ + vector unsigned int uicolors = *((vector unsigned int *) &colors); + spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask); +#endif + } + +#endif +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * The mask is a uint4 vector and each element will be 0 or 0xffffffff. + */ +static INLINE mask_t calculate_mask( int x ) +{ + /* This is a little tricky. + * Use & instead of && to avoid branches. + * Use negation to convert true/false to ~0/0 values. + */ + mask_t mask; + mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); + mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); + mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); + mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( void ) +{ + int minleft, maxright; + int x; + + switch (setup.span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup.span.left[0], setup.span.left[1]); + maxright = MAX2(setup.span.right[0], setup.span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup.span.left[0]; + maxright = setup.span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup.span.left[1]; + maxright = setup.span.right[1]; + break; + + default: + return; + } + + + /* OK, we're very likely to need the tile data now. + * clear or finish waiting if needed. + */ + if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ctile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_COLOR); + spu.cur_ctile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_c_tile(&spu.ctile); + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); + + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ztile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_Z); + spu.cur_ztile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_z_tile(&spu.ztile); + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { +#if 1 + emit_quad( x, setup.span.y, calculate_mask( x ) ); +#endif + } + + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; +} + +#if DEBUG_VERTS +static void print_vertex(const struct vertex_header *v) +{ + int i; + fprintf(stderr, "Vertex: (%p)\n", v); + for (i = 0; i < setup.quad.nr_attrs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + } +} +#endif + + +static boolean setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) +{ + +#if DEBUG_VERTS + fprintf(stderr, "Triangle:\n"); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); +#endif + + setup.vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = spu_extract(v0->data[0], 1); + float y1 = spu_extract(v1->data[0], 1); + float y2 = spu_extract(v2->data[0], 1); + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup.vmin = v0; + setup.vmid = v1; + setup.vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup.vmin = v2; + setup.vmid = v0; + setup.vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup.vmin = v0; + setup.vmid = v2; + setup.vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup.vmin = v1; + setup.vmid = v0; + setup.vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup.vmin = v2; + setup.vmid = v1; + setup.vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup.vmin = v1; + setup.vmid = v2; + setup.vmax = v0; + } + } + } + + /* Check if triangle is completely outside the tile bounds */ + if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) + return FALSE; + if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) + return FALSE; + + setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0); + setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1); + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup.emaj.dx * setup.ebot.dy - + setup.ebot.dx * setup.emaj.dy); + + setup.oneoverarea = 1.0f / area; + /* + _mesa_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup.oneoverarea, area, prim->det ); + */ + } + +#if 0 + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW); +#endif + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex->data[slot]. + * The result will be put into setup.coef[slot].a0. + * \param slot which attribute slot + */ +static INLINE void +const_coeff(uint slot) +{ + setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0.v = setup.vprovoke->data[slot]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static INLINE void +tri_linear_coeff(uint slot, uint firstComp, uint lastComp) +{ + uint i; + const float *vmin_d = (float *) &setup.vmin->data[slot]; + const float *vmid_d = (float *) &setup.vmid->data[slot]; + const float *vmax_d = (float *) &setup.vmax->data[slot]; + const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f; + const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + + for (i = firstComp; i < lastComp; i++) { + float botda = vmid_d[i] - vmin_d[i]; + float majda = vmax_d[i] - vmin_d[i]; + float a = setup.ebot.dy * majda - botda * setup.emaj.dy; + float b = setup.emaj.dx * botda - majda * setup.ebot.dx; + + ASSERT(slot < PIPE_MAX_SHADER_INPUTS); + + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + setup.coef[slot].a0.f[i] = (vmin_d[i] - + (setup.coef[slot].dadx.f[i] * x + + setup.coef[slot].dady.f[i] * y)); + } + + /* + _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup.coef[slot].a0[i], + setup.coef[slot].dadx.f[i], + setup.coef[slot].dady.f[i]); + */ +} + + +/** + * As above, but interp setup all four vector components. + */ +static INLINE void +tri_linear_coeff4(uint slot) +{ + const vector float vmin_d = setup.vmin->data[slot]; + const vector float vmid_d = setup.vmid->data[slot]; + const vector float vmax_d = setup.vmax->data[slot]; + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); + setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + + setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + + +#if 0 +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( unsigned slot, + unsigned i ) +{ + /* premultiply by 1/w: + */ + float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; + float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; + float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; + + float botda = mida - mina; + float majda = maxa - mina; + float a = setup.ebot.dy * majda - botda * setup.emaj.dy; + float b = setup.emaj.dx * botda - majda * setup.ebot.dx; + + /* + printf("tri persp %d,%d: %f %f %f\n", slot, i, + setup.vmin->data[slot][i], + setup.vmid->data[slot][i], + setup.vmax->data[slot][i] + ); + */ + + assert(slot < PIPE_MAX_SHADER_INPUTS); + assert(i <= 3); + + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; + setup.coef[slot].a0.f[i] = (mina - + (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + + setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); +} +#endif + + +/** + * Compute the setup.coef[] array dadx, dady, a0 values. + * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients(void) +{ +#if 1 + uint i; + + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + switch (spu.vertex_info.interp_mode[i]) { + case INTERP_NONE: + break; + case INTERP_POS: + /*tri_linear_coeff(i, 2, 3);*/ + /* XXX interp W if PERSPECTIVE... */ + tri_linear_coeff4(i); + break; + case INTERP_CONSTANT: + const_coeff(i); + break; + case INTERP_LINEAR: + tri_linear_coeff4(i); + break; + case INTERP_PERSPECTIVE: + tri_linear_coeff4(i); /* temporary */ + break; + default: + ASSERT(0); + } + } +#else + ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS); + ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR || + spu.vertex_info.interp_mode[1] == INTERP_CONSTANT); + tri_linear_coeff(0, 2, 3); /* slot 0, z */ + tri_linear_coeff(1, 0, 4); /* slot 1, color */ +#endif +} + + +static void setup_tri_edges(void) +{ + float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; + float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; + + float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; + float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; + + setup.emaj.sy = CEILF(vmin_y); + setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); + setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; + setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; + + setup.etop.sy = CEILF(vmid_y); + setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); + setup.etop.dxdy = setup.etop.dx / setup.etop.dy; + setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; + + setup.ebot.sy = CEILF(vmin_y); + setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); + setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; + setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const int minx = setup.cliprect_minx; + const int maxx = setup.cliprect_maxx; + const int miny = setup.cliprect_miny; + const int maxy = setup.cliprect_maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + ASSERT((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup.span.y) { + flush_spans(); + setup.span.y = block(_y); + } + + setup.span.left[_y&1] = left; + setup.span.right[_y&1] = right; + setup.span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Draw triangle into tile at (tx, ty) (tile coords) + * The tile data should have already been fetched. + */ +boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) +{ + setup.tx = tx; + setup.ty = ty; + + /* set clipping bounds to tile bounds */ + setup.cliprect_minx = tx * TILE_SIZE; + setup.cliprect_miny = ty * TILE_SIZE; + setup.cliprect_maxx = (tx + 1) * TILE_SIZE; + setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + + if (!setup_sort_vertices((struct vertex_header *) v0, + (struct vertex_header *) v1, + (struct vertex_header *) v2)) { + return FALSE; /* totally clipped */ + } + + setup_tri_coefficients(); + setup_tri_edges(); + + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; + /* setup.span.z_mode = tri_z_mode( setup.ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup.oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); + subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); + subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); + } + + flush_spans(); + + return TRUE; +} diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h new file mode 100644 index 00000000000..aa694dd7c93 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_TRI_H +#define SPU_TRI_H + + +extern boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); + + +#endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c new file mode 100644 index 00000000000..ac373240c1f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -0,0 +1,165 @@ +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" +//#include "tgsi_build.h" +#include "pipe/tgsi/util/tgsi_util.h" + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + component ); + } + + return swizzle; +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + assert( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c new file mode 100644 index 00000000000..45e3c26c001 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -0,0 +1,673 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <spu_mfcio.h> +#include <transpose_matrix4x4.h> + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_exec.h" +#include "spu_vertex_shader.h" +#include "spu_main.h" + +#define CACHE_NAME attribute +#define CACHED_TYPE qword +#define CACHE_TYPE CACHE_TYPE_RO +#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER +#define CACHE_LOG2NNWAY 2 +#define CACHE_LOG2NSETS 6 +#include <cache-api.h> + +/* Yes folks, this is ugly. + */ +#undef CACHE_NWAY +#undef CACHE_NSETS +#define CACHE_NAME attribute +#define CACHE_NWAY 4 +#define CACHE_NSETS (1U << 6) + + +#define DRAW_DBG 0 + +static const qword fetch_shuffle_data[] = { + /* Shuffle used by CVT_64_FLOAT + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED + */ + { + 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, + 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED + */ + { + 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, + 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, + }, + + /* High value shuffle used by trans4x4. + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 + }, + + /* Low value shuffle used by trans4x4. + */ + { + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F + } +}; + + +static INLINE void +trans4x4(qword row0, qword row1, qword row2, qword row3, qword *out, + const qword *shuffle) +{ + qword t1 = si_shufb(row0, row2, shuffle[3]); + qword t2 = si_shufb(row0, row2, shuffle[4]); + qword t3 = si_shufb(row1, row3, shuffle[3]); + qword t4 = si_shufb(row1, row3, shuffle[4]); + + out[0] = si_shufb(t1, t3, shuffle[3]); + out[1] = si_shufb(t1, t3, shuffle[4]); + out[2] = si_shufb(t2, t4, shuffle[3]); + out[3] = si_shufb(t2, t4, shuffle[4]); +} + + +/** + * Fetch between 1 and 32 bytes from an unaligned address + */ +static INLINE void +fetch_unaligned(qword *dst, unsigned ea, unsigned size) +{ + qword tmp[4]; + const int shift = ea & 0x0f; + const unsigned aligned_start_ea = ea & ~0x0f; + const unsigned aligned_end_ea = (ea + size) & ~0x0f; + const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1; + unsigned i; + + + if (shift == 0) { + /* Data is already aligned. Fetch directly into the destination buffer. + */ + for (i = 0; i < num_entries; i++) { + dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); + } + } else { + /* Fetch data from the cache to the local buffer. + */ + for (i = 0; i < num_entries; i++) { + tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); + } + + + /* Fix the alignment of the data and write to the destination buffer. + */ + for (i = 0; i < ((size + 15) / 16); i++) { + dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift), + (qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16)); + } + } +} + + +#define CVT_32_FLOAT(q, s) (*(q)) + +static INLINE qword +CVT_64_FLOAT(const qword *qw, const qword *shuffle) +{ + qword a = si_frds(qw[0]); + qword b = si_frds(si_rotqbyi(qw[0], 8)); + qword c = si_frds(qw[1]); + qword d = si_frds(si_rotqbyi(qw[1], 8)); + + qword ab = si_shufb(a, b, shuffle[0]); + qword cd = si_shufb(c, d, si_rotqbyi(shuffle[0], 8)); + + return si_or(ab, cd); +} + + +static INLINE qword +CVT_8_USCALED(const qword *qw, const qword *shuffle) +{ + return si_cuflt(si_shufb(*qw, *qw, shuffle[1]), 0); +} + + +static INLINE qword +CVT_16_USCALED(const qword *qw, const qword *shuffle) +{ + return si_cuflt(si_shufb(*qw, *qw, shuffle[2]), 0); +} + + +static INLINE qword +CVT_32_USCALED(const qword *qw, const qword *shuffle) +{ + (void) shuffle; + return si_cuflt(*qw, 0); +} + +static INLINE qword +CVT_8_SSCALED(const qword *qw, const qword *shuffle) +{ + return si_csflt(si_shufb(*qw, *qw, shuffle[1]), 0); +} + + +static INLINE qword +CVT_16_SSCALED(const qword *qw, const qword *shuffle) +{ + return si_csflt(si_shufb(*qw, *qw, shuffle[2]), 0); +} + + +static INLINE qword +CVT_32_SSCALED(const qword *qw, const qword *shuffle) +{ + (void) shuffle; + return si_csflt(*qw, 0); +} + + +static INLINE qword +CVT_8_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 255.0f); + return si_fm(CVT_8_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_16_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 65535.0f); + return si_fm(CVT_16_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_32_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 4294967295.0f); + return si_fm(CVT_32_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_8_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 127.0f); + return si_fm(CVT_8_SSCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_16_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 32767.0f); + return si_fm(CVT_16_SSCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_32_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 2147483647.0f); + return si_fm(CVT_32_SSCALED(qw, shuffle), scale); +} + +#define SZ_4 si_il(0U) +#define SZ_3 si_fsmbi(0x000f) +#define SZ_2 si_fsmbi(0x00ff) +#define SZ_1 si_fsmbi(0x0fff) + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define FETCH_ATTRIB( NAME, SZ, CVT, N ) \ +static void \ +fetch_##NAME(qword *out, const qword *in, qword defaults, \ + const qword *shuffle) \ +{ \ + qword tmp[4]; \ + \ + tmp[0] = si_selb(CVT(in + (0 * N), shuffle), defaults, SZ); \ + tmp[1] = si_selb(CVT(in + (1 * N), shuffle), defaults, SZ); \ + tmp[2] = si_selb(CVT(in + (2 * N), shuffle), defaults, SZ); \ + tmp[3] = si_selb(CVT(in + (3 * N), shuffle), defaults, SZ); \ + trans4x4(tmp[0], tmp[1], tmp[2], tmp[3], out, shuffle); \ +} + + +FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT, 2 ) + +FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT, 1 ) + +FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED, 1 ) + +FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED, 1 ) + +FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM, 1 ) + +FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM, 1 ) + +FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED, 1 ) + +FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED, 1 ) + +FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM, 1 ) + +FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM, 1 ) + +FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED, 1 ) + +FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED, 1 ) + +FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM, 1 ) + +FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM, 1 ) + +FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM, 1 ) + + + +static spu_fetch_func get_fetch_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return fetch_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return fetch_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return fetch_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return fetch_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return fetch_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return fetch_A8R8G8B8_UNORM; + + case 0: + return NULL; /* not sure why this is needed */ + + default: + assert(0); + return NULL; + } +} + + +static unsigned get_vertex_size( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return 8; + case PIPE_FORMAT_R64G64_FLOAT: + return 2 * 8; + case PIPE_FORMAT_R64G64B64_FLOAT: + return 3 * 8; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return 4 * 8; + + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32_USCALED: + case PIPE_FORMAT_R32_UNORM: + case PIPE_FORMAT_R32_FLOAT: + return 4; + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + return 2 * 4; + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32B32_USCALED: + case PIPE_FORMAT_R32G32B32_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + return 3 * 4; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_USCALED: + case PIPE_FORMAT_R32G32B32A32_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return 4 * 4; + + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_USCALED: + return 2; + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16G16_UNORM: + return 2 * 2; + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16_UNORM: + return 3 * 2; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + return 4 * 2; + + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8_UNORM: + return 1; + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8G8_UNORM: + return 2 * 1; + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8G8B8_UNORM: + return 3 * 1; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8G8B8A8_UNORM: + return 4 * 1; + + case 0: + return 0; /* not sure why this is needed */ + + default: + assert(0); + return 0; + } +} + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + unsigned nr_attrs = draw->vertex_fetch.nr_attrs; + unsigned attr; + + assert(count <= 4); + +#if DRAW_DBG + printf("SPU: %s count = %u, nr_attrs = %u\n", + __FUNCTION__, count, nr_attrs); +#endif + + /* loop over vertex attributes (vertex shader inputs) + */ + for (attr = 0; attr < nr_attrs; attr++) { + const qword default_values = (qword)(vec_float4){ 0.0, 0.0, 0.0, 1.0 }; + const unsigned pitch = draw->vertex_fetch.pitch[attr]; + const uint64_t src = draw->vertex_fetch.src_ptr[attr]; + const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; + unsigned i; + unsigned idx; + const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; + const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; + qword in[2 * 4]; + + + /* Fetch four attributes for four vertices. + */ + idx = 0; + for (i = 0; i < count; i++) { + const uint64_t addr = src + (elts[i] * pitch); + +#if DRAW_DBG + printf("SPU: fetching = 0x%llx\n", addr); +#endif + + fetch_unaligned(& in[idx], addr, bytes_per_entry); + idx += quads_per_entry; + } + + /* Be nice and zero out any missing vertices. + */ + (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); + + + /* Convert all 4 vertices to vectors of float. + */ + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, default_values, + fetch_shuffle_data); + } +} + + +void spu_update_vertex_fetch( struct spu_vs_context *draw ) +{ + unsigned i; + + + /* Invalidate the vertex cache. + */ + for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { + CACHELINE_CLEARVALID(i); + } + + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + draw->vertex_fetch.fetch[i] = + get_fetch_func(draw->vertex_fetch.format[i]); + draw->vertex_fetch.size[i] = + get_vertex_size(draw->vertex_fetch.format[i]); + } + + draw->vertex_fetch.fetch_func = generic_vertex_fetch; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c new file mode 100644 index 00000000000..c1cbbb6d1e9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -0,0 +1,231 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + * Ian Romanick <idr@us.ibm.com> + */ + +#include <spu_mfcio.h> + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" +#include "pipe/draw/draw_private.h" +#include "pipe/draw/draw_context.h" +#include "pipe/cell/common.h" +#include "spu_main.h" + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<<i); + } + + return mask; +} + + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer the input vertex data + * \param elts indexes of four input vertices + * \param count number of vertices to shade [1..4] + * \param vOut array of pointers to four output vertices + */ +static void +run_vertex_program(struct spu_vs_context *draw, + unsigned elts[4], unsigned count, + const uint64_t *vOut) +{ + struct spu_exec_machine *machine = &draw->machine; + unsigned int j; + + ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + + machine->Processor = TGSI_PROCESSOR_VERTEX; + + ASSERT_ALIGN16(draw->constants); + machine->Consts = (float (*)[4]) draw->constants; + + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + spu_vertex_fetch( draw, machine, elts, count ); + + /* run shader */ + spu_exec_machine_run( machine ); + + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + unsigned char buffer[sizeof(struct vertex_header) + + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + struct vertex_header *const tmpOut = + (struct vertex_header *) buffer; + const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) + + (sizeof(float) * 4 + * draw->num_vs_outputs)); + + mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, + draw->nr_planes); + tmpOut->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + tmpOut->data[0][0] = x * scale[0] + trans[0]; + tmpOut->data[0][1] = y * scale[1] + trans[1]; + tmpOut->data[0][2] = z * scale[2] + trans[2]; + tmpOut->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + + mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + } /* loop over vertices */ +} + + +static void +spu_bind_vertex_shader(struct spu_vs_context *draw, + void *uniforms, + void *planes, + unsigned nr_planes, + unsigned num_outputs + ) +{ + draw->constants = (float (*)[4]) uniforms; + + (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); + draw->nr_planes = nr_planes; + draw->num_vs_outputs = num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); +} + + +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] + ALIGN16_ATTRIB; + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + const uint64_t immediate_addr = vs->shader.immediates; + const unsigned immediate_size = + ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates) + + (immediate_addr & 0x0f)); + + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, + TAG_VERTEX_BUFFER, 0, 0); + + draw->machine.Instructions = (struct tgsi_full_instruction *) + vs->shader.instructions; + draw->machine.NumInstructions = vs->shader.num_instructions; + + draw->machine.Declarations = (struct tgsi_full_declaration *) + vs->shader.declarations; + draw->machine.NumDeclarations = vs->shader.num_declarations; + + draw->vertex_fetch.nr_attrs = vs->nr_attrs; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], + sizeof(float) * 4 * vs->shader.num_immediates); + + spu_bind_vertex_shader(draw, vs->shader.uniforms, + vs->plane, vs->nr_planes, + vs->shader.num_outputs); + + for (i = 0; i < vs->num_elts; i += 4) { + const unsigned batch_size = MIN2(vs->num_elts - i, 4); + + run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); + } +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h new file mode 100644 index 00000000000..b5bf31e67db --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -0,0 +1,63 @@ +#ifndef SPU_VERTEX_SHADER_H +#define SPU_VERTEX_SHADER_H + +#include "pipe/p_format.h" +#include "spu_exec.h" + +struct spu_vs_context; + +typedef void (*spu_fetch_func)(qword *out, const qword *in, qword defaults, + const qword *shuffle_data); +typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count ); + +struct spu_vs_context { + struct pipe_viewport_state viewport; + + struct { + uint64_t src_ptr[PIPE_ATTRIB_MAX]; + unsigned pitch[PIPE_ATTRIB_MAX]; + unsigned size[PIPE_ATTRIB_MAX]; + enum pipe_format format[PIPE_ATTRIB_MAX]; + unsigned nr_attrs; + boolean dirty; + + spu_fetch_func fetch[PIPE_ATTRIB_MAX]; + spu_full_fetch_func fetch_func; + } vertex_fetch; + + /* Clip derived state: + */ + float plane[12][4]; + unsigned nr_planes; + + struct spu_exec_machine machine; + const float (*constants)[4]; + + unsigned num_vs_outputs; +}; + +extern void spu_update_vertex_fetch(struct spu_vs_context *draw); + +static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + if (draw->vertex_fetch.dirty) { + spu_update_vertex_fetch(draw); + draw->vertex_fetch.dirty = 0; + } + + (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); +} + +struct cell_command_vs; + +extern void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs); + +#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/gallium/drivers/cell/spu/spu_ztest.h b/src/gallium/drivers/cell/spu/spu_ztest.h new file mode 100644 index 00000000000..ce8ad003393 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_ztest.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Zbuffer/depth test code. + */ + + +#ifndef SPU_ZTEST_H +#define SPU_ZTEST_H + + +#ifdef __SPU__ +#include <spu_intrinsics.h> +#endif + + + +/** + * Perform Z testing for a 16-bit/value Z buffer. + * + * \param zvals vector of four fragment zvalues as floats + * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this + * contains the Z values for 2 quads, 8 pixels. + * \param x x coordinate of quad (only lsbit is significant) + * \param inMask indicates which fragments in the quad are alive + * \return new mask indicating which fragments are alive after ztest + */ +static INLINE vector unsigned int +spu_z16_test_less(vector float zvals, vector unsigned short *zbuf, + uint x, vector unsigned int inMask) +{ +#define ZERO 0x80 + vector unsigned int zvals_ui4, zbuf_ui4, mask; + + /* convert floats to uints in [0, 65535] */ + zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */ + zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */ + + /* XXX this conditional could be removed with a bit of work */ + if (x & 1) { + /* convert zbuffer values from ushorts to uints */ + /* gather lower four ushorts */ + zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, + (vector unsigned int) *zbuf, + ((vector unsigned char) { + ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11, + ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15})); + /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf_ui4, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); + /* convert zbuffer values from uints back to ushorts, preserve lower 4 */ + *zbuf = (vector unsigned short) + spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, + ((vector unsigned char) { + 16, 17, 18, 19, 20, 21, 22, 23, + 2, 3, 6, 7, 10, 11, 14, 15})); + } + else { + /* convert zbuffer values from ushorts to uints */ + /* gather upper four ushorts */ + zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, + (vector unsigned int) *zbuf, + ((vector unsigned char) { + ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, + ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7})); + /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf_ui4, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); + /* convert zbuffer values from uints back to ushorts, preserve upper 4 */ + *zbuf = (vector unsigned short) + spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, + ((vector unsigned char) { + 2, 3, 6, 7, 10, 11, 14, 15, + 24, 25, 26, 27, 28, 29, 30, 31})); + } + return mask; +#undef ZERO +} + + +/** + * As above, but Zbuffer values as 32-bit uints + */ +static INLINE vector unsigned int +spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr, + vector unsigned int inMask) +{ + vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr; + + /* convert floats to uints in [0, 0xffffffff] */ + zvals_ui4 = spu_convtu(zvals, 32); + /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask); + + return mask; +} + + +#endif /* SPU_ZTEST_H */ diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile new file mode 100644 index 00000000000..72d0895c74d --- /dev/null +++ b/src/gallium/drivers/failover/Makefile @@ -0,0 +1,21 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = failover + +DRIVER_SOURCES = \ + fo_state.c \ + fo_state_emit.c \ + fo_context.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c new file mode 100644 index 00000000000..7ce4a7df17e --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.c @@ -0,0 +1,155 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_context.h" + +#include "fo_context.h" +#include "fo_winsys.h" + + + +static void failover_destroy( struct pipe_context *pipe ) +{ + struct failover_context *failover = failover_context( pipe ); + + free( failover ); +} + + + +static boolean failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct failover_context *failover = failover_context( pipe ); + + /* If there has been any statechange since last time, try hardware + * rendering again: + */ + if (failover->dirty) { + failover->mode = FO_HW; + } + + /* Try hardware: + */ + if (failover->mode == FO_HW) { + if (!failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count )) { + + failover->hw->flush( failover->hw, ~0 ); + failover->mode = FO_SW; + } + } + + /* Possibly try software: + */ + if (failover->mode == FO_SW) { + + if (failover->dirty) + failover_state_emit( failover ); + + failover->sw->draw_elements( failover->sw, + indexBuffer, + indexSize, + prim, + start, + count ); + + /* Be ready to switch back to hardware rendering without an + * intervening flush. Unlikely to be much performance impact to + * this: + */ + failover->sw->flush( failover->sw, ~0 ); + } + + return TRUE; +} + + +static boolean failover_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return failover_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *failover_create( struct pipe_context *hw, + struct pipe_context *sw ) +{ + struct failover_context *failover = CALLOC_STRUCT(failover_context); + if (failover == NULL) + return NULL; + + failover->hw = hw; + failover->sw = sw; + failover->pipe.winsys = hw->winsys; + failover->pipe.destroy = failover_destroy; + failover->pipe.is_format_supported = hw->is_format_supported; + failover->pipe.get_name = hw->get_name; + failover->pipe.get_vendor = hw->get_vendor; + failover->pipe.get_param = hw->get_param; + failover->pipe.get_paramf = hw->get_paramf; + + failover->pipe.draw_arrays = failover_draw_arrays; + failover->pipe.draw_elements = failover_draw_elements; + failover->pipe.clear = hw->clear; + + /* No software occlusion fallback (or other optional functionality) + * at this point - if the hardware doesn't support it, don't + * advertise it to the application. + */ + failover->pipe.begin_query = hw->begin_query; + failover->pipe.end_query = hw->end_query; + + failover_init_state_functions( failover ); + +#if 0 + failover->pipe.surface_alloc = hw->surface_alloc; +#endif + failover->pipe.get_tex_surface = hw->get_tex_surface; + + failover->pipe.surface_copy = hw->surface_copy; + failover->pipe.surface_fill = hw->surface_fill; + failover->pipe.texture_create = hw->texture_create; + failover->pipe.texture_release = hw->texture_release; + failover->pipe.flush = hw->flush; + + failover->dirty = 0; + + return &failover->pipe; +} + diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h new file mode 100644 index 00000000000..1dc87291c9f --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.h @@ -0,0 +1,114 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef FO_CONTEXT_H +#define FO_CONTEXT_H + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + + + +#define FO_NEW_VIEWPORT 0x1 +#define FO_NEW_RASTERIZER 0x2 +#define FO_NEW_FRAGMENT_SHADER 0x4 +#define FO_NEW_BLEND 0x8 +#define FO_NEW_CLIP 0x10 +#define FO_NEW_SCISSOR 0x20 +#define FO_NEW_STIPPLE 0x40 +#define FO_NEW_FRAMEBUFFER 0x80 +#define FO_NEW_ALPHA_TEST 0x100 +#define FO_NEW_DEPTH_STENCIL 0x200 +#define FO_NEW_SAMPLER 0x400 +#define FO_NEW_TEXTURE 0x800 +#define FO_NEW_VERTEX 0x2000 +#define FO_NEW_VERTEX_SHADER 0x4000 +#define FO_NEW_BLEND_COLOR 0x8000 +#define FO_NEW_CLEAR_COLOR 0x10000 +#define FO_NEW_VERTEX_BUFFER 0x20000 +#define FO_NEW_VERTEX_ELEMENT 0x40000 + + + +#define FO_HW 0 +#define FO_SW 1 + +struct fo_state { + void *sw_state; + void *hw_state; +}; +struct failover_context { + struct pipe_context pipe; /**< base class */ + + + /* The most recent drawing state as set by the driver: + */ + const struct fo_state *blend; + const struct fo_state *sampler[PIPE_MAX_SAMPLERS]; + const struct fo_state *depth_stencil; + const struct fo_state *rasterizer; + const struct fo_state *fragment_shader; + const struct fo_state *vertex_shader; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + + unsigned dirty; + unsigned dirty_sampler; + unsigned dirty_texture; + unsigned dirty_vertex_buffer; + unsigned dirty_vertex_element; + + + unsigned mode; + struct pipe_context *hw; + struct pipe_context *sw; +}; + + + +void failover_init_state_functions( struct failover_context *failover ); +void failover_state_emit( struct failover_context *failover ); + +static INLINE struct failover_context * +failover_context( struct pipe_context *pipe ) +{ + return (struct failover_context *)pipe; +} + + +#endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c new file mode 100644 index 00000000000..0fc5568da11 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state.c @@ -0,0 +1,457 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "fo_context.h" + + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date. + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + * not the whole object. + * - By adding a callback in the state tracker to re-emit state. The + * state tracker knows the current state already and can re-emit it + * without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + + +static void * +failover_create_blend_state( struct pipe_context *pipe, + const struct pipe_blend_state *blend ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_blend_state(failover->sw, blend); + state->hw_state = failover->hw->create_blend_state(failover->hw, blend); + + return state; +} + +static void +failover_bind_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state *)blend; + failover->blend = state; + failover->dirty |= FO_NEW_BLEND; + failover->sw->bind_blend_state( failover->sw, state->sw_state ); + failover->hw->bind_blend_state( failover->hw, state->hw_state ); +} + +static void +failover_delete_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct fo_state *state = (struct fo_state*)blend; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_blend_state(failover->sw, state->sw_state); + failover->hw->delete_blend_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->blend_color = *blend_color; + failover->dirty |= FO_NEW_BLEND_COLOR; + failover->sw->set_blend_color( failover->sw, blend_color ); + failover->hw->set_blend_color( failover->hw, blend_color ); +} + +static void +failover_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->clip = *clip; + failover->dirty |= FO_NEW_CLIP; + failover->sw->set_clip_state( failover->sw, clip ); + failover->hw->set_clip_state( failover->hw, clip ); +} + + +static void * +failover_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ); + state->hw_state = failover->hw->create_depth_stencil_alpha_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state *)depth_stencil; + failover->depth_stencil = state; + failover->dirty |= FO_NEW_DEPTH_STENCIL; + failover->sw->bind_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->bind_depth_stencil_alpha_state(failover->hw, state->hw_state); +} + +static void +failover_delete_depth_stencil_state(struct pipe_context *pipe, + void *ds) +{ + struct fo_state *state = (struct fo_state*)ds; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *framebuffer) +{ + struct failover_context *failover = failover_context(pipe); + + failover->framebuffer = *framebuffer; + failover->dirty |= FO_NEW_FRAMEBUFFER; + failover->sw->set_framebuffer_state( failover->sw, framebuffer ); + failover->hw->set_framebuffer_state( failover->hw, framebuffer ); +} + + +static void * +failover_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_fs_state(failover->sw, templ); + state->hw_state = failover->hw->create_fs_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)fs; + failover->fragment_shader = state; + failover->dirty |= FO_NEW_FRAGMENT_SHADER; + failover->sw->bind_fs_state(failover->sw, state->sw_state); + failover->hw->bind_fs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_fs_state(struct pipe_context *pipe, + void *fs) +{ + struct fo_state *state = (struct fo_state*)fs; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_fs_state(failover->sw, state->sw_state); + failover->hw->delete_fs_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void * +failover_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_vs_state(failover->sw, templ); + state->hw_state = failover->hw->create_vs_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_vs_state(struct pipe_context *pipe, + void *vs) +{ + struct failover_context *failover = failover_context(pipe); + + struct fo_state *state = (struct fo_state*)vs; + failover->vertex_shader = state; + failover->dirty |= FO_NEW_VERTEX_SHADER; + failover->sw->bind_vs_state(failover->sw, state->sw_state); + failover->hw->bind_vs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_vs_state(struct pipe_context *pipe, + void *vs) +{ + struct fo_state *state = (struct fo_state*)vs; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_vs_state(failover->sw, state->sw_state); + failover->hw->delete_vs_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->poly_stipple = *stipple; + failover->dirty |= FO_NEW_STIPPLE; + failover->sw->set_polygon_stipple( failover->sw, stipple ); + failover->hw->set_polygon_stipple( failover->hw, stipple ); +} + + +static void * +failover_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ); + state->hw_state = failover->hw->create_rasterizer_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + struct failover_context *failover = failover_context(pipe); + + struct fo_state *state = (struct fo_state*)raster; + failover->rasterizer = state; + failover->dirty |= FO_NEW_RASTERIZER; + failover->sw->bind_rasterizer_state(failover->sw, state->sw_state); + failover->hw->bind_rasterizer_state(failover->hw, state->hw_state); +} + +static void +failover_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + struct fo_state *state = (struct fo_state*)raster; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_rasterizer_state(failover->sw, state->sw_state); + failover->hw->delete_rasterizer_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + + +static void +failover_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->scissor = *scissor; + failover->dirty |= FO_NEW_SCISSOR; + failover->sw->set_scissor_state( failover->sw, scissor ); + failover->hw->set_scissor_state( failover->hw, scissor ); +} + + +static void * +failover_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_sampler_state(failover->sw, templ); + state->hw_state = failover->hw->create_sampler_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)sampler; + failover->sampler[unit] = state; + failover->dirty |= FO_NEW_SAMPLER; + failover->dirty_sampler |= (1<<unit); + failover->sw->bind_sampler_state(failover->sw, unit, + state->sw_state); + failover->hw->bind_sampler_state(failover->hw, unit, + state->hw_state); +} + +static void +failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) +{ + struct fo_state *state = (struct fo_state*)sampler; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_sampler_state(failover->sw, state->sw_state); + failover->hw->delete_sampler_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + + +static void +failover_set_sampler_texture(struct pipe_context *pipe, + unsigned unit, + struct pipe_texture *texture) +{ + struct failover_context *failover = failover_context(pipe); + + failover->texture[unit] = texture; + failover->dirty |= FO_NEW_TEXTURE; + failover->dirty_texture |= (1<<unit); + failover->sw->set_sampler_texture( failover->sw, unit, texture ); + failover->hw->set_sampler_texture( failover->hw, unit, texture ); +} + + +static void +failover_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->viewport = *viewport; + failover->dirty |= FO_NEW_VIEWPORT; + failover->sw->set_viewport_state( failover->sw, viewport ); + failover->hw->set_viewport_state( failover->hw, viewport ); +} + + +static void +failover_set_vertex_buffer(struct pipe_context *pipe, + unsigned unit, + const struct pipe_vertex_buffer *vertex_buffer) +{ + struct failover_context *failover = failover_context(pipe); + + failover->vertex_buffer[unit] = *vertex_buffer; + failover->dirty |= FO_NEW_VERTEX_BUFFER; + failover->dirty_vertex_buffer |= (1<<unit); + failover->sw->set_vertex_buffer( failover->sw, unit, vertex_buffer ); + failover->hw->set_vertex_buffer( failover->hw, unit, vertex_buffer ); +} + + +static void +failover_set_vertex_element(struct pipe_context *pipe, + unsigned unit, + const struct pipe_vertex_element *vertex_element) +{ + struct failover_context *failover = failover_context(pipe); + + failover->vertex_element[unit] = *vertex_element; + failover->dirty |= FO_NEW_VERTEX_ELEMENT; + failover->dirty_vertex_element |= (1<<unit); + failover->sw->set_vertex_element( failover->sw, unit, vertex_element ); + failover->hw->set_vertex_element( failover->hw, unit, vertex_element ); +} + +void +failover_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct failover_context *failover = failover_context(pipe); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + failover->sw->set_constant_buffer(failover->sw, shader, index, buf); + failover->hw->set_constant_buffer(failover->hw, shader, index, buf); +} + + +void +failover_init_state_functions( struct failover_context *failover ) +{ + failover->pipe.create_blend_state = failover_create_blend_state; + failover->pipe.bind_blend_state = failover_bind_blend_state; + failover->pipe.delete_blend_state = failover_delete_blend_state; + failover->pipe.create_sampler_state = failover_create_sampler_state; + failover->pipe.bind_sampler_state = failover_bind_sampler_state; + failover->pipe.delete_sampler_state = failover_delete_sampler_state; + failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; + failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; + failover->pipe.delete_depth_stencil_alpha_state = failover_delete_depth_stencil_state; + failover->pipe.create_rasterizer_state = failover_create_rasterizer_state; + failover->pipe.bind_rasterizer_state = failover_bind_rasterizer_state; + failover->pipe.delete_rasterizer_state = failover_delete_rasterizer_state; + failover->pipe.create_fs_state = failover_create_fs_state; + failover->pipe.bind_fs_state = failover_bind_fs_state; + failover->pipe.delete_fs_state = failover_delete_fs_state; + failover->pipe.create_vs_state = failover_create_vs_state; + failover->pipe.bind_vs_state = failover_bind_vs_state; + failover->pipe.delete_vs_state = failover_delete_vs_state; + + failover->pipe.set_blend_color = failover_set_blend_color; + failover->pipe.set_clip_state = failover_set_clip_state; + failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; + failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; + failover->pipe.set_scissor_state = failover_set_scissor_state; + failover->pipe.set_sampler_texture = failover_set_sampler_texture; + failover->pipe.set_viewport_state = failover_set_viewport_state; + failover->pipe.set_vertex_buffer = failover_set_vertex_buffer; + failover->pipe.set_vertex_element = failover_set_vertex_element; + failover->pipe.set_constant_buffer = failover_set_constant_buffer; +} diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c new file mode 100644 index 00000000000..c663dd49476 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "fo_context.h" + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date. + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + * not the whole object. + * - By adding a callback in the state tracker to re-emit state. The + * state tracker knows the current state already and can re-emit it + * without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + +/* Bring the software pipe uptodate with current state. + * + * With constant state objects we would probably just send all state + * to both rasterizers all the time??? + */ +void +failover_state_emit( struct failover_context *failover ) +{ + unsigned i; + + if (failover->dirty & FO_NEW_BLEND) + failover->sw->bind_blend_state( failover->sw, + failover->blend->sw_state ); + + if (failover->dirty & FO_NEW_BLEND_COLOR) + failover->sw->set_blend_color( failover->sw, &failover->blend_color ); + + if (failover->dirty & FO_NEW_CLIP) + failover->sw->set_clip_state( failover->sw, &failover->clip ); + + if (failover->dirty & FO_NEW_DEPTH_STENCIL) + failover->sw->bind_depth_stencil_alpha_state( failover->sw, + failover->depth_stencil->sw_state ); + + if (failover->dirty & FO_NEW_FRAMEBUFFER) + failover->sw->set_framebuffer_state( failover->sw, &failover->framebuffer ); + + if (failover->dirty & FO_NEW_FRAGMENT_SHADER) + failover->sw->bind_fs_state( failover->sw, + failover->fragment_shader->sw_state ); + + if (failover->dirty & FO_NEW_VERTEX_SHADER) + failover->sw->bind_vs_state( failover->sw, + failover->vertex_shader->sw_state ); + + if (failover->dirty & FO_NEW_STIPPLE) + failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple ); + + if (failover->dirty & FO_NEW_RASTERIZER) + failover->sw->bind_rasterizer_state( failover->sw, + failover->rasterizer->sw_state ); + + if (failover->dirty & FO_NEW_SCISSOR) + failover->sw->set_scissor_state( failover->sw, &failover->scissor ); + + if (failover->dirty & FO_NEW_VIEWPORT) + failover->sw->set_viewport_state( failover->sw, &failover->viewport ); + + if (failover->dirty & FO_NEW_SAMPLER) { + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (failover->dirty_sampler & (1<<i)) { + failover->sw->bind_sampler_state( failover->sw, i, + failover->sampler[i]->sw_state ); + } + } + } + + if (failover->dirty & FO_NEW_TEXTURE) { + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (failover->dirty_texture & (1<<i)) { + failover->sw->set_sampler_texture( failover->sw, i, + failover->texture[i] ); + } + } + } + + if (failover->dirty & FO_NEW_VERTEX_BUFFER) { + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (failover->dirty_vertex_buffer & (1<<i)) { + failover->sw->set_vertex_buffer( failover->sw, i, + &failover->vertex_buffer[i] ); + } + } + } + + if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (failover->dirty_vertex_element & (1<<i)) { + failover->sw->set_vertex_element( failover->sw, i, + &failover->vertex_element[i] ); + } + } + } + + failover->dirty = 0; + failover->dirty_vertex_element = 0; + failover->dirty_vertex_buffer = 0; + failover->dirty_texture = 0; + failover->dirty_sampler = 0; +} diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h new file mode 100644 index 00000000000..a8ce997a1f6 --- /dev/null +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef FO_WINSYS_H +#define FO_WINSYS_H + + +/* This is the interface that failover requires any window system + * hosting it to implement. This is the only include file in failover + * which is public. + */ + + +struct pipe_context; + + +struct pipe_context *failover_create( struct pipe_context *hw, + struct pipe_context *sw ); + + +#endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile new file mode 100644 index 00000000000..2f91de3afc6 --- /dev/null +++ b/src/gallium/drivers/i915simple/Makefile @@ -0,0 +1,38 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i915simple + +DRIVER_SOURCES = \ + i915_blit.c \ + i915_clear.c \ + i915_flush.c \ + i915_context.c \ + i915_context.c \ + i915_debug.c \ + i915_debug_fp.c \ + i915_state.c \ + i915_state_immediate.c \ + i915_state_dynamic.c \ + i915_state_derived.c \ + i915_state_emit.c \ + i915_state_sampler.c \ + i915_strings.c \ + i915_prim_emit.c \ + i915_prim_vbuf.c \ + i915_texture.c \ + i915_fpc_emit.c \ + i915_fpc_translate.c \ + i915_surface.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript new file mode 100644 index 00000000000..f5fb96b995c --- /dev/null +++ b/src/gallium/drivers/i915simple/SConscript @@ -0,0 +1,29 @@ +Import('*') + +env = env.Clone() + +i915simple = env.ConvenienceLibrary( + target = 'i915simple', + source = [ + 'i915_blit.c', + 'i915_clear.c', + 'i915_context.c', + 'i915_debug.c', + 'i915_debug_fp.c', + 'i915_flush.c', + 'i915_fpc_emit.c', + 'i915_fpc_translate.c', + 'i915_prim_emit.c', + 'i915_prim_vbuf.c', + 'i915_state.c', + 'i915_state_derived.c', + 'i915_state_dynamic.c', + 'i915_state_emit.c', + 'i915_state_immediate.c', + 'i915_state_sampler.c', + 'i915_strings.c', + 'i915_surface.c', + 'i915_texture.c', + ]) + +Export('i915simple') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h new file mode 100644 index 00000000000..fb88cd6db0d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BATCH_H +#define I915_BATCH_H + +#include "i915_winsys.h" +#include "i915_debug.h" + +#define BATCH_LOCALS + +#define BEGIN_BATCH( dwords, relocs ) \ + (i915->batch_start = i915->winsys->batch_start( i915->winsys, dwords, relocs )) + +#define OUT_BATCH( dword ) \ + i915->winsys->batch_dword( i915->winsys, dword ) + +#define OUT_RELOC( buf, flags, delta ) \ + i915->winsys->batch_reloc( i915->winsys, buf, flags, delta ) + +#define ADVANCE_BATCH() + +#define FLUSH_BATCH() do { \ + if (0) i915_dump_batchbuffer( i915 ); \ + i915->winsys->batch_flush( i915->winsys ); \ + i915->batch_start = NULL; \ + i915->hardware_dirty = ~0; \ +} while (0) + +#endif diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c new file mode 100644 index 00000000000..db4671ff553 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -0,0 +1,162 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_blit.h" +#include "i915_reg.h" +#include "i915_batch.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void +i915_fill_blit(struct i915_context *i915, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + BATCH_LOCALS; + + dst_pitch *= (short) cpp; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = dst_pitch | (0xF0 << 16) | (1 << 24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25); + CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | + XY_COLOR_BLT_WRITE_RGB); + break; + default: + return; + } + +// DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", +// __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + + + if (!BEGIN_BATCH(6, 1)) { + FLUSH_BATCH(); + assert(BEGIN_BATCH(6, 1)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); + OUT_BATCH(color); + ADVANCE_BATCH(); +} + + +void +i915_copy_blit( struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h ) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + BATCH_LOCALS; + + + I915_DBG(i915, + "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + + src_pitch *= (short) cpp; + dst_pitch *= (short) cpp; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = + (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = + (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + break; + default: + return; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + /* Hardware can handle negative pitches but loses the ability to do + * proper overlapping blits in that case. We don't really have a + * need for either at this stage. + */ + assert (dst_pitch > 0 && src_pitch > 0); + + + if (!BEGIN_BATCH(8, 2)) { + FLUSH_BATCH(); + assert(BEGIN_BATCH(8, 2)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((dst_y << 16) | dst_x); + OUT_BATCH((dst_y2 << 16) | dst_x2); + OUT_RELOC(dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH(((int) src_pitch & 0xffff)); + OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset); + ADVANCE_BATCH(); +} + + diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h new file mode 100644 index 00000000000..6e5b44e1247 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BLIT_H +#define I915_BLIT_H + +#include "i915_context.h" + +extern void i915_copy_blit(struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short srcx, short srcy, + short dstx, short dsty, + short w, short h ); + +extern void i915_fill_blit(struct i915_context *i915, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, unsigned color); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c new file mode 100644 index 00000000000..cde69daacc0 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_clear.c @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "i915_context.h" +#include "i915_state.h" + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c new file mode 100644 index 00000000000..497623a7006 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -0,0 +1,320 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_state.h" +#include "i915_batch.h" +#include "i915_texture.h" +#include "i915_reg.h" + +#include "pipe/draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +i915_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_U_L8, + PIPE_FORMAT_U_A8, + PIPE_FORMAT_U_I8, + PIPE_FORMAT_U_A8_L8, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format surface_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + switch (type) { + case PIPE_TEXTURE: + list = tex_supported; + break; + case PIPE_SURFACE: + list = surface_supported; + break; + default: + assert(0); + } + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +static int +i915_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +i915_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static void i915_destroy( struct pipe_context *pipe ) +{ + struct i915_context *i915 = i915_context( pipe ); + + draw_destroy( i915->draw ); + + FREE( i915 ); +} + + + + +static boolean +i915_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct i915_context *i915 = i915_context( pipe ); + struct draw_context *draw = i915->draw; + unsigned i; + + if (i915->dirty) + i915_update_derived( i915 ); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (i915->vertex_buffer[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + i915->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + draw_set_mapped_constant_buffer(draw, + i915->current.constants[PIPE_SHADER_VERTEX]); + + /* draw! */ + draw_arrays(i915->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (i915->vertex_buffer[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + + +static boolean i915_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, + struct i915_winsys *i915_winsys, + unsigned pci_id ) +{ + struct i915_context *i915; + unsigned is_i945 = 0; + + switch (pci_id) { + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + break; + + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + case PCI_CHIP_I945_GME: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q33_G: + case PCI_CHIP_Q35_G: + is_i945 = 1; + break; + + default: + pipe_winsys->printf(pipe_winsys, + "%s: unknown pci id 0x%x, cannot create context\n", + __FUNCTION__, pci_id); + return NULL; + } + + i915 = CALLOC_STRUCT(i915_context); + if (i915 == NULL) + return NULL; + + i915->winsys = i915_winsys; + i915->pipe.winsys = pipe_winsys; + + i915->pipe.destroy = i915_destroy; + i915->pipe.is_format_supported = i915_is_format_supported; + i915->pipe.get_param = i915_get_param; + i915->pipe.get_paramf = i915_get_paramf; + + i915->pipe.clear = i915_clear; + + + i915->pipe.draw_arrays = i915_draw_arrays; + i915->pipe.draw_elements = i915_draw_elements; + + /* + * Create drawing context and plug our rendering stage into it. + */ + i915->draw = draw_create(); + assert(i915->draw); + if (GETENV("I915_VBUF")) { + draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); + } + else { + draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); + } + + i915_init_surface_functions(i915); + i915_init_state_functions(i915); + i915_init_flush_functions(i915); + i915_init_string_functions(i915); + + i915->pci_id = pci_id; + i915->flags.is_i945 = is_i945; + + i915->pipe.texture_create = i915_texture_create; + i915->pipe.texture_release = i915_texture_release; + + i915->dirty = ~0; + i915->hardware_dirty = ~0; + + /* Batch stream debugging is a bit hacked up at the moment: + */ + i915->batch_start = NULL; + + /* + * XXX we could plug GL selection/feedback into the drawing pipeline + * by specifying a different setup/render stage. + */ + + return &i915->pipe; +} + diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h new file mode 100644 index 00000000000..b4ea63c3e74 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -0,0 +1,304 @@ + /************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/draw/draw_vertex.h" + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4 0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2 +#define I915_DYNAMIC_IAB 3 +#define I915_DYNAMIC_BC_0 4 /* just the header */ +#define I915_DYNAMIC_BC_1 5 +#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_1 7 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_DYNAMIC_SC_ENA_0 10 +#define I915_DYNAMIC_SC_RECT_0 11 +#define I915_DYNAMIC_SC_RECT_1 12 +#define I915_DYNAMIC_SC_RECT_2 13 +#define I915_MAX_DYNAMIC 14 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_DYNAMIC 1 /* handled specially */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 + +#define I915_MAX_CONSTANT 32 + + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + /** user constants, plus extra constants from shader translation */ + uint num_constants[PIPE_SHADER_TYPES]; + + uint *program; + uint program_len; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; +}; + + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +struct i915_context +{ + struct pipe_context pipe; + struct i915_winsys *winsys; + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + const struct pipe_shader_state *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + + unsigned dirty; + + unsigned *batch_start; + + /** Vertex buffer */ + struct pipe_buffer *vbo; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; + unsigned pci_id; + + struct { + unsigned is_i945:1; + } flags; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<<I915_CACHE_STATIC) +#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) +#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) +#define I915_HW_MAP (1<<I915_CACHE_MAP) +#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) +#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) +#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) +#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) + + +/*********************************************************************** + * i915_prim_emit.c: + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_prim_vbuf.c: + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_state_emit.c: + */ +void i915_emit_hardware_state(struct i915_context *i915 ); + + + +/*********************************************************************** + * i915_clear.c: + */ +void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +/*********************************************************************** + * i915_surface.c: + */ +void i915_init_surface_functions( struct i915_context *i915 ); + +void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_flush_functions( struct i915_context *i915 ); +void i915_init_string_functions( struct i915_context *i915 ); + + + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct i915_context * +i915_context( struct pipe_context *pipe ) +{ + return (struct i915_context *)pipe; +} + + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c new file mode 100644 index 00000000000..94db44e1aaf --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -0,0 +1,901 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +//#include "imports.h" + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_debug.h" +#include "pipe/p_winsys.h" + + +static void +PRINTF( + struct debug_stream *stream, + const char *fmt, + ... ) +{ + va_list args; + char buffer[256]; + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + stream->winsys->printf( stream->winsys, buffer ); + va_end( args ); +} + + +static boolean debug( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned i; + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + +static const char *get_prim_name( unsigned val ) +{ + switch (val & PRIM3D_MASK) { + case PRIM3D_TRILIST: return "TRILIST"; break; + case PRIM3D_TRISTRIP: return "TRISTRIP"; break; + case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break; + case PRIM3D_TRIFAN: return "TRIFAN"; break; + case PRIM3D_POLY: return "POLY"; break; + case PRIM3D_LINELIST: return "LINELIST"; break; + case PRIM3D_LINESTRIP: return "LINESTRIP"; break; + case PRIM3D_RECTLIST: return "RECTLIST"; break; + case PRIM3D_POINTLIST: return "POINTLIST"; break; + case PRIM3D_DIB: return "DIB"; break; + case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break; + case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break; + default: return "????"; break; + } +} + +static boolean debug_prim( struct debug_stream *stream, const char *name, + boolean dump_floats, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i; + + + + PRINTF(stream, "%s %s (%d dwords):\n", name, prim, len); + PRINTF(stream, "\t0x%08x\n", ptr[0]); + for (i = 1; i < len; i++) { + if (dump_floats) + PRINTF(stream, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]); + else + PRINTF(stream, "\t0x%08x\n", ptr[i]); + } + + + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + + +static boolean debug_program( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + PRINTF(stream, "%s (%d dwords):\n", name, len); + i915_disassemble_program( stream, ptr, len ); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static boolean debug_chain( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned old_offset = stream->offset + len * sizeof(unsigned); + unsigned i; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + + stream->offset = ptr[1] & ~0x3; + + if (stream->offset < old_offset) + PRINTF(stream, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + else + PRINTF(stream, "\n... skipping from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + + + return TRUE; +} + + +static boolean debug_variable_length_prim( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i, len; + + ushort *idx = (ushort *)(ptr+1); + for (i = 0; idx[i] != 0xffff; i++) + ; + + len = 1+(i+2)/2; + + PRINTF(stream, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static void +BITS( + struct debug_stream *stream, + unsigned dw, + unsigned hi, + unsigned lo, + const char *fmt, + ... ) +{ + va_list args; + char buffer[256]; + unsigned himask = ~0UL >> (31 - (hi)); + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + stream->winsys->printf( stream->winsys, buffer ); + va_end( args ); + + PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); +} + +#define MBZ( dw, hi, lo) do { \ + unsigned x = (dw) >> (lo); \ + unsigned lomask = (1 << (lo)) - 1; \ + unsigned himask; \ + himask = (1UL << (hi)) - 1; \ + assert ((x & himask & ~lomask) == 0); \ +} while (0) + +static void +FLAG( + struct debug_stream *stream, + unsigned dw, + unsigned bit, + const char *fmt, + ... ) +{ + if (((dw) >> (bit)) & 1) { + va_list args; + char buffer[256]; + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + stream->winsys->printf( stream->winsys, buffer ); + va_end( args ); + + PRINTF(stream, "\n"); + } +} + +static boolean debug_load_immediate( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 4) & 0xff; + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords, flags: %x):\n", name, len, bits); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + if (bits & (1<<0)) { + PRINTF(stream, "\t LIS0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + BITS(stream, ptr[j], 0, 0, "vb invalidate disable"); + j++; + } + if (bits & (1<<1)) { + PRINTF(stream, "\t LIS1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 29, 24, "vb dword width"); + BITS(stream, ptr[j], 21, 16, "vb dword pitch"); + BITS(stream, ptr[j], 15, 0, "vb max index"); + j++; + } + if (bits & (1<<2)) { + int i; + PRINTF(stream, "\t LIS2: 0x%08x\n", ptr[j]); + for (i = 0; i < 8; i++) { + unsigned tc = (ptr[j] >> (i * 4)) & 0xf; + if (tc != 0xf) + BITS(stream, tc, 3, 0, "tex coord %d", i); + } + j++; + } + if (bits & (1<<3)) { + PRINTF(stream, "\t LIS3: 0x%08x\n", ptr[j]); + j++; + } + if (bits & (1<<4)) { + PRINTF(stream, "\t LIS4: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 23, "point width"); + BITS(stream, ptr[j], 22, 19, "line width"); + FLAG(stream, ptr[j], 18, "alpha flatshade"); + FLAG(stream, ptr[j], 17, "fog flatshade"); + FLAG(stream, ptr[j], 16, "spec flatshade"); + FLAG(stream, ptr[j], 15, "rgb flatshade"); + BITS(stream, ptr[j], 14, 13, "cull mode"); + FLAG(stream, ptr[j], 12, "vfmt: point width"); + FLAG(stream, ptr[j], 11, "vfmt: specular/fog"); + FLAG(stream, ptr[j], 10, "vfmt: rgba"); + FLAG(stream, ptr[j], 9, "vfmt: depth offset"); + BITS(stream, ptr[j], 8, 6, "vfmt: position (2==xyzw)"); + FLAG(stream, ptr[j], 5, "force dflt diffuse"); + FLAG(stream, ptr[j], 4, "force dflt specular"); + FLAG(stream, ptr[j], 3, "local depth offset enable"); + FLAG(stream, ptr[j], 2, "vfmt: fp32 fog coord"); + FLAG(stream, ptr[j], 1, "sprite point"); + FLAG(stream, ptr[j], 0, "antialiasing"); + j++; + } + if (bits & (1<<5)) { + PRINTF(stream, "\t LIS5: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 28, "rgba write disables"); + FLAG(stream, ptr[j], 27, "force dflt point width"); + FLAG(stream, ptr[j], 26, "last pixel enable"); + FLAG(stream, ptr[j], 25, "global z offset enable"); + FLAG(stream, ptr[j], 24, "fog enable"); + BITS(stream, ptr[j], 23, 16, "stencil ref"); + BITS(stream, ptr[j], 15, 13, "stencil test"); + BITS(stream, ptr[j], 12, 10, "stencil fail op"); + BITS(stream, ptr[j], 9, 7, "stencil pass z fail op"); + BITS(stream, ptr[j], 6, 4, "stencil pass z pass op"); + FLAG(stream, ptr[j], 3, "stencil write enable"); + FLAG(stream, ptr[j], 2, "stencil test enable"); + FLAG(stream, ptr[j], 1, "color dither enable"); + FLAG(stream, ptr[j], 0, "logiop enable"); + j++; + } + if (bits & (1<<6)) { + PRINTF(stream, "\t LIS6: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "alpha test enable"); + BITS(stream, ptr[j], 30, 28, "alpha func"); + BITS(stream, ptr[j], 27, 20, "alpha ref"); + FLAG(stream, ptr[j], 19, "depth test enable"); + BITS(stream, ptr[j], 18, 16, "depth func"); + FLAG(stream, ptr[j], 15, "blend enable"); + BITS(stream, ptr[j], 14, 12, "blend func"); + BITS(stream, ptr[j], 11, 8, "blend src factor"); + BITS(stream, ptr[j], 7, 4, "blend dst factor"); + FLAG(stream, ptr[j], 3, "depth write enable"); + FLAG(stream, ptr[j], 2, "color write enable"); + BITS(stream, ptr[j], 1, 0, "provoking vertex"); + j++; + } + + + PRINTF(stream, "\n"); + + assert(j == len); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + +static boolean debug_load_indirect( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 8) & 0x3f; + unsigned i, j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + for (i = 0; i < 6; i++) { + if (bits & (1<<i)) { + switch (1<<(8+i)) { + case LI0_STATE_STATIC_INDIRECT: + PRINTF(stream, " STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_DYNAMIC_INDIRECT: + PRINTF(stream, " DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + break; + case LI0_STATE_SAMPLER: + PRINTF(stream, " SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_MAP: + PRINTF(stream, " MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_PROGRAM: + PRINTF(stream, " PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_CONSTANTS: + PRINTF(stream, " CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + default: + assert(0); + break; + } + } + } + + if (bits == 0) { + PRINTF(stream, "\t DUMMY: 0x%08x\n", ptr[j++]); + } + + PRINTF(stream, "\n"); + + + assert(j == len); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + +static void BR13( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + FLAG(stream, val, 30, "clipping enable"); + BITS(stream, val, 25, 24, "color depth (3==32bpp)"); + BITS(stream, val, 23, 16, "raster op"); + BITS(stream, val, 15, 0, "dest pitch"); +} + + +static void BR22( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y1"); + BITS(stream, val, 15, 0, "dest x1"); +} + +static void BR23( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y2"); + BITS(stream, val, 15, 0, "dest x2"); +} + +static void BR09( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- dest address\n", val); +} + +static void BR26( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "src y1"); + BITS(stream, val, 15, 0, "src x1"); +} + +static void BR11( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 15, 0, "src pitch"); +} + +static void BR12( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- src address\n", val); +} + +static void BR16( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- color\n", val); +} + +static boolean debug_copy_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR26(stream, ptr[j++]); + BR11(stream, ptr[j++]); + BR12(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_color_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR16(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_modes4( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 21, 18, "logicop func"); + FLAG(stream, ptr[j], 17, "stencil test mask modify-enable"); + FLAG(stream, ptr[j], 16, "stencil write mask modify-enable"); + BITS(stream, ptr[j], 15, 8, "stencil test mask"); + BITS(stream, ptr[j], 7, 0, "stencil write mask"); + j++; + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_map_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "map mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TMn.0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + FLAG(stream, ptr[j], 1, "vertical line stride"); + FLAG(stream, ptr[j], 0, "vertical line stride offset"); + j++; + } + + { + PRINTF(stream, "\t TMn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "height"); + BITS(stream, ptr[j], 20, 10, "width"); + BITS(stream, ptr[j], 9, 7, "surface format"); + BITS(stream, ptr[j], 6, 3, "texel format"); + FLAG(stream, ptr[j], 2, "use fence regs"); + FLAG(stream, ptr[j], 1, "tiled surface"); + FLAG(stream, ptr[j], 0, "tile walk ymajor"); + j++; + } + { + PRINTF(stream, "\t TMn.2: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "dword pitch"); + BITS(stream, ptr[j], 20, 15, "cube face enables"); + BITS(stream, ptr[j], 14, 9, "max lod"); + FLAG(stream, ptr[j], 8, "mip layout right"); + BITS(stream, ptr[j], 7, 0, "depth"); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_sampler_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "sampler mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TSn.0: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "reverse gamma"); + FLAG(stream, ptr[j], 30, "planar to packed"); + FLAG(stream, ptr[j], 29, "yuv->rgb"); + BITS(stream, ptr[j], 28, 27, "chromakey index"); + BITS(stream, ptr[j], 26, 22, "base mip level"); + BITS(stream, ptr[j], 21, 20, "mip mode filter"); + BITS(stream, ptr[j], 19, 17, "mag mode filter"); + BITS(stream, ptr[j], 16, 14, "min mode filter"); + BITS(stream, ptr[j], 13, 5, "lod bias (s4.4)"); + FLAG(stream, ptr[j], 4, "shadow enable"); + FLAG(stream, ptr[j], 3, "max-aniso-4"); + BITS(stream, ptr[j], 2, 0, "shadow func"); + j++; + } + + { + PRINTF(stream, "\t TSn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 24, "min lod"); + MBZ( ptr[j], 23, 18 ); + FLAG(stream, ptr[j], 17, "kill pixel enable"); + FLAG(stream, ptr[j], 16, "keyed tex filter mode"); + FLAG(stream, ptr[j], 15, "chromakey enable"); + BITS(stream, ptr[j], 14, 12, "tcx wrap mode"); + BITS(stream, ptr[j], 11, 9, "tcy wrap mode"); + BITS(stream, ptr[j], 8, 6, "tcz wrap mode"); + FLAG(stream, ptr[j], 5, "normalized coords"); + BITS(stream, ptr[j], 4, 1, "map (surface) index"); + FLAG(stream, ptr[j], 0, "EAST deinterlacer enable"); + j++; + } + { + PRINTF(stream, "\t TSn.2: 0x%08x (default color)\n", ptr[j]); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_dest_vars( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "early classic ztest"); + FLAG(stream, ptr[j], 30, "opengl tex default color"); + FLAG(stream, ptr[j], 29, "bypass iz"); + FLAG(stream, ptr[j], 28, "lod preclamp"); + BITS(stream, ptr[j], 27, 26, "dither pattern"); + FLAG(stream, ptr[j], 25, "linear gamma blend"); + FLAG(stream, ptr[j], 24, "debug dither"); + BITS(stream, ptr[j], 23, 20, "dstorg x"); + BITS(stream, ptr[j], 19, 16, "dstorg y"); + MBZ (ptr[j], 15, 15 ); + BITS(stream, ptr[j], 14, 12, "422 write select"); + BITS(stream, ptr[j], 11, 8, "cbuf format"); + BITS(stream, ptr[j], 3, 2, "zbuf format"); + FLAG(stream, ptr[j], 1, "vert line stride"); + FLAG(stream, ptr[j], 1, "vert line stride offset"); + j++; + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_buf_info( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 28, 28, "aux buffer id"); + BITS(stream, ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); + FLAG(stream, ptr[j], 23, "use fence regs"); + FLAG(stream, ptr[j], 22, "tiled surface"); + FLAG(stream, ptr[j], 21, "tile walk ymajor"); + MBZ (ptr[j], 20, 14); + BITS(stream, ptr[j], 13, 2, "dword pitch"); + MBZ (ptr[j], 2, 0); + j++; + } + + PRINTF(stream, "\t0x%08x -- buffer base address\n", ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean i915_debug_packet( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned cmd = *ptr; + + switch (((cmd >> 29) & 0x7)) { + case 0x0: + switch ((cmd >> 23) & 0x3f) { + case 0x0: + return debug(stream, "MI_NOOP", 1); + case 0x3: + return debug(stream, "MI_WAIT_FOR_EVENT", 1); + case 0x4: + return debug(stream, "MI_FLUSH", 1); + case 0xA: + debug(stream, "MI_BATCH_BUFFER_END", 1); + return FALSE; + case 0x22: + return debug(stream, "MI_LOAD_REGISTER_IMM", 3); + case 0x31: + return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); + default: + (void)debug(stream, "UNKNOWN 0x0 case!", 1); + assert(0); + break; + } + break; + case 0x1: + (void) debug(stream, "UNKNOWN 0x1 case!", 1); + assert(0); + break; + case 0x2: + switch ((cmd >> 22) & 0xff) { + case 0x50: + return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); + case 0x53: + return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); + default: + return debug(stream, "blit command", (cmd & 0xff) + 2); + } + break; + case 0x3: + switch ((cmd >> 24) & 0x1f) { + case 0x6: + return debug(stream, "3DSTATE_ANTI_ALIASING", 1); + case 0x7: + return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1); + case 0x8: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2); + case 0x9: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); + case 0xb: + return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); + case 0xc: + return debug(stream, "3DSTATE_MODES5", 1); + case 0xd: + return debug_modes4(stream, "3DSTATE_MODES4", 1); + case 0x15: + return debug(stream, "3DSTATE_FOG_COLOR", 1); + case 0x16: + return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1); + case 0x1c: + /* 3DState16NP */ + switch((cmd >> 19) & 0x1f) { + case 0x10: + return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1); + case 0x11: + return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); + default: + (void) debug(stream, "UNKNOWN 0x1c case!", 1); + assert(0); + break; + } + break; + case 0x1d: + /* 3DStateMW */ + switch ((cmd >> 16) & 0xff) { + case 0x0: + return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); + case 0x1: + return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); + case 0x4: + return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); + case 0x5: + return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); + case 0x6: + return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); + case 0x7: + return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); + case 0x80: + return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); + case 0x81: + return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); + case 0x83: + return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); + case 0x85: + return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); + case 0x88: + return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); + case 0x89: + return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); + case 0x8e: + return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); + case 0x97: + return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); + case 0x98: + return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); + case 0x99: + return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); + case 0x9a: + return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); + case 0x9c: + return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); + default: + assert(0); + return 0; + } + break; + case 0x1e: + if (cmd & (1 << 23)) + return debug(stream, "???", (cmd & 0xffff) + 1); + else + return debug(stream, "", 1); + break; + case 0x1f: + if ((cmd & (1 << 23)) == 0) + return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); + else if (cmd & (1 << 17)) + { + if ((cmd & 0xffff) == 0) + return debug_variable_length_prim(stream); + else + return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); + } + else + return debug_prim(stream, "3DPRIM (indirect sequential)", 0, 2); + break; + default: + return debug(stream, "", 0); + } + default: + assert(0); + return 0; + } + + assert(0); + return 0; +} + + + +void +i915_dump_batchbuffer( struct i915_context *i915 ) +{ + struct debug_stream stream; + unsigned *start = i915->batch_start; + unsigned *end = i915->winsys->batch_start( i915->winsys, 0, 0 ); + unsigned long bytes = (unsigned long) (end - start) * 4; + boolean done = FALSE; + + stream.offset = 0; + stream.ptr = (char *)start; + stream.print_addresses = 0; + stream.winsys = i915->pipe.winsys; + + if (!start || !end) { + stream.winsys->printf( stream.winsys, "\n\nBATCH: ???\n"); + return; + } + + stream.winsys->printf( stream.winsys, "\n\nBATCH: (%d)\n", bytes / 4); + + while (!done && + stream.offset < bytes) + { + if (!i915_debug_packet( &stream )) + break; + + assert(stream.offset <= bytes && + stream.offset >= 0); + } + + stream.winsys->printf( stream.winsys, "END-BATCH\n\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h new file mode 100644 index 00000000000..0bcd0942334 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.h @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_DEBUG_H +#define I915_DEBUG_H + +#include <stdarg.h> + +struct i915_context; + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; + struct pipe_winsys *winsys; +}; + + +/* Internal functions + */ +void i915_disassemble_program(struct debug_stream *stream, + const unsigned *program, unsigned sz); + +void i915_print_ureg(const char *msg, unsigned ureg); + + +#define DEBUG_BATCH 0x1 +#define DEBUG_BLIT 0x2 +#define DEBUG_BUFFER 0x4 +#define DEBUG_CONSTANTS 0x8 +#define DEBUG_CONTEXT 0x10 +#define DEBUG_DRAW 0x20 +#define DEBUG_DYNAMIC 0x40 +#define DEBUG_FLUSH 0x80 +#define DEBUG_MAP 0x100 +#define DEBUG_PROGRAM 0x200 +#define DEBUG_REGIONS 0x400 +#define DEBUG_SAMPLER 0x800 +#define DEBUG_STATIC 0x1000 +#define DEBUG_SURFACE 0x2000 +#define DEBUG_WINSYS 0x4000 + +#include "pipe/p_compiler.h" + +#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) + +#include "pipe/p_winsys.h" + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + if ((i915)->debug & FILE_DEBUG_FLAG) { + va_list args; + char buffer[256]; + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + i915->pipe.winsys->printf( i915->pipe.winsys, buffer ); + va_end( args ); + } +} + +#else + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + (void) i915; + (void) fmt; +} + +#endif + + +void i915_dump_batchbuffer( struct i915_context *i915 ); + + + +void i915_debug_init( struct i915_context *i915 ); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c new file mode 100644 index 00000000000..ebfdb3d93c5 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -0,0 +1,366 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + + +static void +PRINTF( + struct debug_stream *stream, + const char *fmt, + ... ) +{ + va_list args; + char buffer[256]; + + va_start( args, fmt ); + vsprintf( buffer, fmt, args ); + stream->winsys->printf( stream->winsys, buffer ); + va_end( args ); +} + + +static const char *opcodes[0x20] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + + +static const int args[0x20] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + + +static const char *regname[0x8] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(struct debug_stream *stream, unsigned type, unsigned nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + PRINTF(stream, "T_DIFFUSE"); + return; + case T_SPECULAR: + PRINTF(stream, "T_SPECULAR"); + return; + case T_FOG_W: + PRINTF(stream, "T_FOG_W"); + return; + default: + PRINTF(stream, "T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + PRINTF(stream, "oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + PRINTF(stream, "oD"); + return; + } + break; + default: + break; + } + + PRINTF(stream, "%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(struct debug_stream *stream, unsigned reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + PRINTF(stream, "."); + + for (i = 3; i >= 0; i--) { + if (reg & (1 << ((i * 4) + 3))) + PRINTF(stream, "-"); + + switch ((reg >> (i * 4)) & 0x7) { + case 0: + PRINTF(stream, "x"); + break; + case 1: + PRINTF(stream, "y"); + break; + case 2: + PRINTF(stream, "z"); + break; + case 3: + PRINTF(stream, "w"); + break; + case 4: + PRINTF(stream, "0"); + break; + case 5: + PRINTF(stream, "1"); + break; + default: + PRINTF(stream, "?"); + break; + } + } +} + + +static void +print_src_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + print_reg_neg_swizzle(stream, dword); +} + + +static void +print_dest_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + PRINTF(stream, "."); + if (dword & A0_DEST_CHANNEL_X) + PRINTF(stream, "x"); + if (dword & A0_DEST_CHANNEL_Y) + PRINTF(stream, "y"); + if (dword & A0_DEST_CHANNEL_Z) + PRINTF(stream, "z"); + if (dword & A0_DEST_CHANNEL_W) + PRINTF(stream, "w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + + +static void +print_arith_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(stream, program[0]); + if (program[0] & A0_DEST_SATURATE) + PRINTF(stream, " = SATURATE "); + else + PRINTF(stream, " = "); + } + + PRINTF(stream, "%s ", opcodes[opcode]); + + print_src_reg(stream, GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC2_REG(program[2])); + PRINTF(stream, "\n"); + return; +} + + +static void +print_tex_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + print_dest_reg(stream, program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, " = "); + + PRINTF(stream, "%s ", opcodes[opcode]); + + PRINTF(stream, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_texkil_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "TEXKIL "); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_dcl_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "%s ", opcodes[opcode]); + print_dest_reg(stream, + program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, "\n"); +} + + +void +i915_disassemble_program(struct debug_stream *stream, + const unsigned * program, unsigned sz) +{ + unsigned size = program[0] & 0x1ff; + unsigned i; + + PRINTF(stream, "\t\tBEGIN\n"); + + assert(size + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + unsigned opcode = program[0] & (0x1f << 24); + + PRINTF(stream, "\t\t"); + + if ((int) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(stream, opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode < T0_TEXKILL) + print_tex_op(stream, opcode >> 24, program); + else if (opcode == T0_TEXKILL) + print_texkil_op(stream, opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(stream, opcode >> 24, program); + else + PRINTF(stream, "Unknown opcode 0x%x\n", opcode); + } + + PRINTF(stream, "\t\tEND\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c new file mode 100644 index 00000000000..3c2069b8273 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -0,0 +1,81 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_batch.h" + + +/** + * In future we may want a fence-like interface instead of finish. + */ +static void i915_flush( struct pipe_context *pipe, + unsigned flags ) +{ + struct i915_context *i915 = i915_context(pipe); + + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + unsigned flush = MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush |= INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush |= FLUSH_MAP_CACHE; + + if (!BEGIN_BATCH(1, 0)) { + FLUSH_BATCH(); + assert(BEGIN_BATCH(1, 0)); + } + OUT_BATCH( flush ); + ADVANCE_BATCH(); + } + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH(); + + if (flags & PIPE_FLUSH_WAIT) { + i915->winsys->batch_finish(i915->winsys); + } +} + + + +void i915_init_flush_functions( struct i915_context *i915 ) +{ + i915->pipe.flush = i915_flush; +} diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h new file mode 100644 index 00000000000..8c7b68aefb5 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -0,0 +1,213 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_FPC_H +#define I915_FPC_H + +#include "pipe/p_util.h" + +#include "i915_context.h" +#include "i915_reg.h" + + + +#define I915_PROGRAM_SIZE 192 + + + +/** + * Program translation state + */ +struct i915_fp_compile { + const struct pipe_shader_state *shader; + + struct vertex_info *vertex_info; + + uint declarations[I915_PROGRAM_SIZE]; + uint program[I915_PROGRAM_SIZE]; + + uint input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + uint input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + + uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + + /** points into the i915->current.constants array: */ + float (*constants)[4]; + uint num_constants; + uint constant_flags[I915_MAX_CONSTANT]; /**< status of each constant */ + + uint *csr; /**< Cursor, points into program. */ + + uint *decl; /**< Cursor, points into declarations. */ + + uint decl_s; /**< flags for which s regs need to be decl'd */ + uint decl_t; /**< flags for which t regs need to be decl'd */ + + uint temp_flag; /**< Tracks temporary regs which are in use */ + uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */ + + uint nr_tex_indirect; + uint nr_tex_insn; + uint nr_alu_insn; + uint nr_decl_insn; + + boolean error; /**< Set if i915_program_error() is called */ + uint wpos_tex; + uint NumNativeInstructions; + uint NumNativeAluInstructions; + uint NumNativeTexInstructions; + uint NumNativeTexIndirections; +}; + + +/* Having zero and one in here makes the definition of swizzle a lot + * easier. + */ +#define UREG_TYPE_SHIFT 29 +#define UREG_NR_SHIFT 24 +#define UREG_CHANNEL_X_NEGATE_SHIFT 23 +#define UREG_CHANNEL_X_SHIFT 20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT 19 +#define UREG_CHANNEL_Y_SHIFT 16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT 15 +#define UREG_CHANNEL_Z_SHIFT 12 +#define UREG_CHANNEL_W_NEGATE_SHIFT 11 +#define UREG_CHANNEL_W_SHIFT 8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 +#define UREG_CHANNEL_ZERO_SHIFT 4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ 1 +#define UREG_CHANNEL_ONE_SHIFT 0 + +#define UREG_BAD 0xffffffff /* not a valid ureg */ + +#define X SRC_X +#define Y SRC_Y +#define Z SRC_Z +#define W SRC_W +#define ZERO SRC_ZERO +#define ONE SRC_ONE + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \ + ((nr) << UREG_NR_SHIFT) | \ + (X << UREG_CHANNEL_X_SHIFT) | \ + (Y << UREG_CHANNEL_Y_SHIFT) | \ + (Z << UREG_CHANNEL_Z_SHIFT) | \ + (W << UREG_CHANNEL_W_SHIFT) | \ + (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \ + (ONE << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) +#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)®_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +/* One neat thing about the UREG representation: + */ +static INLINE int +swizzle(int reg, uint x, uint y, uint z, uint w) +{ + assert(x <= SRC_ONE); + assert(y <= SRC_ONE); + assert(z <= SRC_ONE); + assert(w <= SRC_ONE); + return ((reg & ~UREG_XYZW_CHANNEL_MASK) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); +} + + + +/*********************************************************************** + * Public interface for the compiler + */ +extern void i915_translate_fragment_program( struct i915_context *i915 ); + + + +extern uint i915_get_temp(struct i915_fp_compile *p); +extern uint i915_get_utemp(struct i915_fp_compile *p); +extern void i915_release_utemps(struct i915_fp_compile *p); + + +extern uint i915_emit_texld(struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, uint coord, uint op); + +extern uint i915_emit_arith(struct i915_fp_compile *p, + uint op, + uint dest, + uint mask, + uint saturate, + uint src0, uint src1, uint src2); + +extern uint i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags); + + +extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); + +extern uint i915_emit_const2f(struct i915_fp_compile *p, + float c0, float c1); + +extern uint i915_emit_const4fv(struct i915_fp_compile *p, + const float * c); + +extern uint i915_emit_const4f(struct i915_fp_compile *p, + float c0, float c1, + float c2, float c3); + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_disassemble_program(const uint * program, uint sz); + + +/*====================================================================== + * i915_fpc_translate.c + */ + +extern void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...); + +extern void +i915_translate_fragment_program(struct i915_context *i915); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c new file mode 100644 index 00000000000..74924ff0a1d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -0,0 +1,375 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + + +#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) +#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT) +#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT) +#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT) +#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT) + +/* These are special, and don't have swizzle/negate bits. + */ +#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT) +#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \ + (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT)) + + +/* Macros for translating UREG's into the various register fields used + * by the I915 programmable unit. + */ +#define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT) +#define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT) +#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT) +#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT) + +#define UREG_MASK 0xffffff00 +#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \ + (REG_NR_MASK << UREG_NR_SHIFT)) + + +#define I915_CONSTFLAG_PARAM 0x1f + +uint +i915_get_temp(struct i915_fp_compile *p) +{ + int bit = ffs(~p->temp_flag); + if (!bit) { + i915_program_error(p, "i915_get_temp: out of temporaries\n"); + return 0; + } + + p->temp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_R, (bit - 1)); +} + + +uint +i915_get_utemp(struct i915_fp_compile * p) +{ + int bit = ffs(~p->utemp_flag); + if (!bit) { + i915_program_error(p, "i915_get_utemp: out of temporaries\n"); + return 0; + } + + p->utemp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_U, (bit - 1)); +} + +void +i915_release_utemps(struct i915_fp_compile *p) +{ + p->utemp_flag = ~0x7; +} + + +uint +i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags) +{ + uint reg = UREG(type, nr); + + if (type == REG_TYPE_T) { + if (p->decl_t & (1 << nr)) + return reg; + + p->decl_t |= (1 << nr); + } + else if (type == REG_TYPE_S) { + if (p->decl_s & (1 << nr)) + return reg; + + p->decl_s |= (1 << nr); + } + else + return reg; + + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); + *(p->decl++) = D1_MBZ; + *(p->decl++) = D2_MBZ; + + p->nr_decl_insn++; + return reg; +} + +uint +i915_emit_arith(struct i915_fp_compile * p, + uint op, + uint dest, + uint mask, + uint saturate, uint src0, uint src1, uint src2) +{ + uint c[3]; + uint nr_const = 0; + + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); + assert(dest); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) + c[nr_const++] = 0; + if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) + c[nr_const++] = 1; + if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) + c[nr_const++] = 2; + + /* Recursively call this function to MOV additional const values + * into temporary registers. Use utemp registers for this - + * currently shouldn't be possible to run out, but keep an eye on + * this. + */ + if (nr_const > 1) { + uint s[3], first, i, old_utemp_flag; + + s[0] = src0; + s[1] = src1; + s[2] = src2; + old_utemp_flag = p->utemp_flag; + + first = GET_UREG_NR(s[c[0]]); + for (i = 1; i < nr_const; i++) { + if (GET_UREG_NR(s[c[i]]) != first) { + uint tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + s[c[i]], 0, 0); + s[c[i]] = tmp; + } + } + + src0 = s[0]; + src1 = s[1]; + src2 = s[2]; + p->utemp_flag = old_utemp_flag; /* restore */ + } + + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + + p->nr_alu_insn++; + return dest; +} + +uint i915_emit_texld( struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, + uint coord, + uint op ) +{ + uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + if (coord != k) { + /* No real way to work around this in the general case - need to + * allocate and declare a new temporary register (a utemp won't + * do). Will fallback for now. + */ + i915_program_error(p, "Can't (yet) swizzle TEX arguments"); + assert(0); + return 0; + } + + /* Don't worry about saturate as we only support + */ + if (destmask != A0_DEST_CHANNEL_ALL) { + uint tmp = i915_get_utemp(p); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); + i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); + return dest; + } + else { + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + + if (GET_UREG_TYPE(coord) != REG_TYPE_T) { + p->nr_tex_indirect++; + } + + *(p->csr++) = (op | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); + + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + + p->nr_tex_insn++; + return dest; + } +} + + +uint +i915_emit_const1f(struct i915_fp_compile * p, float c0) +{ + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); + if (c0 == 1.0) + return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + continue; + for (idx = 0; idx < 4; idx++) { + if (!(p->constant_flags[reg] & (1 << idx)) || + p->constants[reg][idx] == c0) { + p->constants[reg][idx] = c0; + p->constant_flags[reg] |= 1 << idx; + if (reg + 1 > p->num_constants) + p->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const1f: out of constants\n"); + return 0; +} + +uint +i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) +{ + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); + if (c0 == 1.0) + return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + + if (c1 == 0.0) + return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); + if (c1 == 1.0) + return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == 0xf || + p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + continue; + for (idx = 0; idx < 3; idx++) { + if (!(p->constant_flags[reg] & (3 << idx))) { + p->constants[reg][idx + 0] = c0; + p->constants[reg][idx + 1] = c1; + p->constant_flags[reg] |= 3 << idx; + if (reg + 1 > p->num_constants) + p->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const2f: out of constants\n"); + return 0; +} + + + +uint +i915_emit_const4f(struct i915_fp_compile * p, + float c0, float c1, float c2, float c3) +{ + unsigned reg; + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == 0xf && + p->constants[reg][0] == c0 && + p->constants[reg][1] == c1 && + p->constants[reg][2] == c2 && + p->constants[reg][3] == c3) { + return UREG(REG_TYPE_CONST, reg); + } + else if (p->constant_flags[reg] == 0) { + + p->constants[reg][0] = c0; + p->constants[reg][1] = c1; + p->constants[reg][2] = c2; + p->constants[reg][3] = c3; + p->constant_flags[reg] = 0xf; + if (reg + 1 > p->num_constants) + p->num_constants = reg + 1; + return UREG(REG_TYPE_CONST, reg); + } + } + + i915_program_error(p, "i915_emit_const4f: out of constants\n"); + return 0; +} + + +uint +i915_emit_const4fv(struct i915_fp_compile * p, const float * c) +{ + return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); +} + + +#if 00000/*UNUSED*/ +/* Reserve a slot in the constant file for a Mesa state parameter. + * These will later need to be tracked on statechanges, but that is + * done elsewhere. + */ +uint +i915_emit_param4fv(struct i915_fp_compile * p, const float * values) +{ + struct i915_fragment_program *fp = p->fp; + int i; + + for (i = 0; i < fp->nr_params; i++) { + if (fp->param[i].values == values) + return UREG(REG_TYPE_CONST, fp->param[i].reg); + } + + if (p->constants->nr_constants == I915_MAX_CONSTANT || + fp->nr_params == I915_MAX_CONSTANT) { + i915_program_error(p, "i915_emit_param4fv: out of constants\n"); + return 0; + } + + { + int reg = p->constants->nr_constants++; + int i = fp->nr_params++; + + assert (p->constant_flags[reg] == 0); + p->constant_flags[reg] = I915_CONSTFLAG_PARAM; + + fp->param[i].values = values; + fp->param[i].reg = reg; + + return UREG(REG_TYPE_CONST, reg); + } +} +#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c new file mode 100644 index 00000000000..868f0c7e046 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -0,0 +1,1135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdarg.h> + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + +#include "pipe/draw/draw_vertex.h" + + +/** + * Simple pass-through fragment shader to use when we don't have + * a real shader (or it fails to compile for some reason). + */ +static unsigned passthrough[] = +{ + _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), + + /* declare input color: + */ + (D0_DCL | + (REG_TYPE_T << D0_TYPE_SHIFT) | + (T_DIFFUSE << D0_NR_SHIFT) | + D0_CHANNEL_ALL), + 0, + 0, + + /* move to output color: + */ + (A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)), + 0x01230000, /* .xyzw */ + 0 +}; + + +/* 1, -1/3!, 1/5!, -1/7! */ +static const float sin_constants[4] = { 1.0, + -1.0f / (3 * 2 * 1), + 1.0f / (5 * 4 * 3 * 2 * 1), + -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -1/2!, 1/4!, -1/6! */ +static const float cos_constants[4] = { 1.0, + -1.0f / (2 * 1), + 1.0f / (4 * 3 * 2 * 1), + -1.0f / (6 * 5 * 4 * 3 * 2 * 1) +}; + + + +/** + * component-wise negation of ureg + */ +static INLINE int +negate(int reg, int x, int y, int z, int w) +{ + /* Another neat thing about the UREG representation */ + return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | + ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | + ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | + ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + +static void +i915_use_passthrough_shader(struct i915_context *i915) +{ + debug_printf("**** Using i915 pass-through fragment shader\n"); + + i915->current.program = (uint *) MALLOC(sizeof(passthrough)); + if (i915->current.program) { + memcpy(i915->current.program, passthrough, sizeof(passthrough)); + i915->current.program_len = Elements(passthrough); + } + + i915->current.num_constants[PIPE_SHADER_FRAGMENT] = 0; + i915->current.num_user_constants[PIPE_SHADER_FRAGMENT] = 0; +} + + +void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...) +{ + va_list args; + char buffer[1024]; + + debug_printf("i915_program_error: "); + va_start( args, msg ); + vsprintf( buffer, msg, args ); + va_end( args ); + debug_printf(buffer); + debug_printf("\n"); + + p->error = 1; +} + + + +/** + * Construct a ureg for the given source register. Will emit + * constants, apply swizzling and negation as needed. + */ +static uint +src_vector(struct i915_fp_compile *p, + const struct tgsi_full_src_register *source) +{ + uint index = source->SrcRegister.Index; + uint src, sem_name, sem_ind; + + switch (source->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + i915_program_error(p, "Exceeded max temporary reg"); + return 0; + } + src = UREG(REG_TYPE_R, index); + break; + case TGSI_FILE_INPUT: + /* XXX: Packing COL1, FOGC into a single attribute works for + * texenv programs, but will fail for real fragment programs + * that use these attributes and expect them to be a full 4 + * components wide. Could use a texcoord to pass these + * attributes if necessary, but that won't work in the general + * case. + * + * We also use a texture coordinate to pass wpos when possible. + */ + + /* use vertex format info to map a slot number to a VF attrib */ + assert(index < p->vertex_info->num_attribs); + + sem_name = p->input_semantic_name[index]; + sem_ind = p->input_semantic_index[index]; + + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + debug_printf("SKIP SEM POS\n"); + /* + assert(p->wpos_tex != -1); + src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); + */ + break; + case TGSI_SEMANTIC_COLOR: + if (sem_ind == 0) { + src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); + } + else { + /* secondary color */ + assert(sem_ind == 1); + src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); + src = swizzle(src, X, Y, Z, ONE); + } + break; + case TGSI_SEMANTIC_FOG: + src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); + src = swizzle(src, W, W, W, W); + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); + break; + default: + i915_program_error(p, "Bad source->Index"); + return 0; + } + break; + + case TGSI_FILE_IMMEDIATE: + /* XXX unfinished - need to append immediates onto const buffer */ + /* fall-through */ + case TGSI_FILE_CONSTANT: + src = UREG(REG_TYPE_CONST, index); + break; + + default: + i915_program_error(p, "Bad source->File"); + return 0; + } + + if (source->SrcRegister.Extended) { + src = swizzle(src, + source->SrcRegisterExtSwz.ExtSwizzleX, + source->SrcRegisterExtSwz.ExtSwizzleY, + source->SrcRegisterExtSwz.ExtSwizzleZ, + source->SrcRegisterExtSwz.ExtSwizzleW); + } + else { + src = swizzle(src, + source->SrcRegister.SwizzleX, + source->SrcRegister.SwizzleY, + source->SrcRegister.SwizzleZ, + source->SrcRegister.SwizzleW); + } + + + /* There's both negate-all-components and per-component negation. + * Try to handle both here. + */ + { + int nx = source->SrcRegisterExtSwz.NegateX; + int ny = source->SrcRegisterExtSwz.NegateY; + int nz = source->SrcRegisterExtSwz.NegateZ; + int nw = source->SrcRegisterExtSwz.NegateW; + if (source->SrcRegister.Negate) { + nx = !nx; + ny = !ny; + nz = !nz; + nw = !nw; + } + src = negate(src, nx, ny, nz, nw); + } + + /* no abs() or post-abs negation */ +#if 0 + /* XXX assertions disabled to allow arbfplight.c to run */ + /* XXX enable these assertions, or fix things */ + assert(!source->SrcRegisterExtMod.Absolute); + assert(!source->SrcRegisterExtMod.Negate); +#endif + return src; +} + + +/** + * Construct a ureg for a destination register. + */ +static uint +get_result_vector(struct i915_fp_compile *p, + const struct tgsi_full_dst_register *dest) +{ + switch (dest->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + uint sem_name = p->output_semantic_name[dest->DstRegister.Index]; + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + return UREG(REG_TYPE_OD, 0); + case TGSI_SEMANTIC_COLOR: + return UREG(REG_TYPE_OC, 0); + default: + i915_program_error(p, "Bad inst->DstReg.Index/semantics"); + return 0; + } + } + case TGSI_FILE_TEMPORARY: + return UREG(REG_TYPE_R, dest->DstRegister.Index); + default: + i915_program_error(p, "Bad inst->DstReg.File"); + return 0; + } +} + + +/** + * Compute flags for saturation and writemask. + */ +static uint +get_result_flags(const struct tgsi_full_instruction *inst) +{ + const uint writeMask + = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint flags = 0x0; + + if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + flags |= A0_DEST_SATURATE; + + if (writeMask & TGSI_WRITEMASK_X) + flags |= A0_DEST_CHANNEL_X; + if (writeMask & TGSI_WRITEMASK_Y) + flags |= A0_DEST_CHANNEL_Y; + if (writeMask & TGSI_WRITEMASK_Z) + flags |= A0_DEST_CHANNEL_Z; + if (writeMask & TGSI_WRITEMASK_W) + flags |= A0_DEST_CHANNEL_W; + + return flags; +} + + +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +translate_tex_src_target(struct i915_fp_compile *p, uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_1D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_2D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_RECT: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_3D: + return D0_SAMPLE_TYPE_VOLUME; + case TGSI_TEXTURE_CUBE: + return D0_SAMPLE_TYPE_CUBE; + default: + i915_program_error(p, "TexSrc type"); + return 0; + } +} + + +/** + * Generate texel lookup instruction. + */ +static void +emit_tex(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode) +{ + uint texture = inst->InstructionExtTexture.Texture; + uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint tex = translate_tex_src_target( p, texture ); + uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); + uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + + i915_emit_texld( p, + get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_flags( inst ), + sampler, + coord, + opcode); +} + + +/** + * Generate a simple arithmetic instruction + * \param opcode the i915 opcode + * \param numArgs the number of input/src arguments + */ +static void +emit_simple_arith(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + uint arg1, arg2, arg3; + + assert(numArgs <= 3); + + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + + i915_emit_arith( p, + opcode, + get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_flags( inst ), 0, + arg1, + arg2, + arg3 ); +} + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +/* + * Translate TGSI instruction to i915 instruction. + * + * Possible concerns: + * + * SIN, COS -- could use another taylor step? + * LIT -- results seem a little different to sw mesa + * LOG -- different to mesa on negative numbers, but this is conformant. + */ +static void +i915_translate_instruction(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst) +{ + uint writemask; + uint src0, src1, src2, flags; + uint tmp = 0; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + i915_emit_arith(p, + A0_MAX, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src0, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_ADD: + emit_simple_arith(p, inst, A0_ADD, 2); + break; + + case TGSI_OPCODE_CMP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), + 0, src0, src2, src1); /* NOTE: order of src2, src1 */ + break; + + case TGSI_OPCODE_COS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 + * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * result = DP4 t0, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, Y, X, ONE), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + break; + + case TGSI_OPCODE_DP3: + emit_simple_arith(p, inst, A0_DP3, 2); + break; + + case TGSI_OPCODE_DP4: + emit_simple_arith(p, inst, A0_DP4, 2); + break; + + case TGSI_OPCODE_DPH: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, Y, Z, ONE), src1, 0); + break; + + case TGSI_OPCODE_DST: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, ONE, Y, Z, ONE), + swizzle(src1, ONE, Y, ONE, W), 0); + break; + + case TGSI_OPCODE_END: + /* no-op */ + break; + + case TGSI_OPCODE_EX2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_FLR: + emit_simple_arith(p, inst, A0_FLR, 1); + break; + + case TGSI_OPCODE_FRC: + emit_simple_arith(p, inst, A0_FRC, 1); + break; + + case TGSI_OPCODE_KIL: + /* unconditional kill */ + assert(0); /* not tested yet */ +#if 0 + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ + 0, src0, T0_TEXKILL); +#endif + break; + + case TGSI_OPCODE_KILP: + /* kill if src[0].x < 0 || src[0].y < 0 ... */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ + 0, src0, T0_TEXKILL); + break; + + case TGSI_OPCODE_LG2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_LOG, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_LIT: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* tmp = max( a.xyzw, a.00zw ) + * XXX: Clamp tmp.w to -128..128 + * tmp.y = log(tmp.y) + * tmp.y = tmp.w * tmp.y + * tmp.y = exp(tmp.y) + * result = cmp (a.11-x1, a.1x01, a.1xy1 ) + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, + src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + + i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, Y, ZERO, ZERO), + swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + + i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), + swizzle(tmp, ONE, X, ZERO, ONE), + swizzle(tmp, ONE, X, Y, ONE)); + + break; + + case TGSI_OPCODE_LRP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + flags = get_result_flags(inst); + tmp = i915_get_utemp(p); + + /* b*a + c*(1-a) + * + * b*a + c - ca + * + * tmp = b*a + c, + * result = (-c)*a + tmp + */ + i915_emit_arith(p, A0_MAD, tmp, + flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + + i915_emit_arith(p, A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); + break; + + case TGSI_OPCODE_MAD: + emit_simple_arith(p, inst, A0_MAD, 3); + break; + + case TGSI_OPCODE_MAX: + emit_simple_arith(p, inst, A0_MAX, 2); + break; + + case TGSI_OPCODE_MIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + i915_emit_arith(p, + A0_MAX, + tmp, flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); + + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + break; + + case TGSI_OPCODE_MOV: + /* aka TGSI_OPCODE_SWZ */ + emit_simple_arith(p, inst, A0_MOV, 1); + break; + + case TGSI_OPCODE_MUL: + emit_simple_arith(p, inst, A0_MUL, 2); + break; + + case TGSI_OPCODE_POW: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + /* XXX: masking on intermediate values, here and elsewhere. + */ + i915_emit_arith(p, + A0_LOG, + tmp, A0_DEST_CHANNEL_X, 0, + swizzle(src0, X, X, X, X), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, swizzle(tmp, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RET: + /* XXX: no-op? */ + break; + + case TGSI_OPCODE_RCP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RCP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RSQ: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RSQ, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_SCS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * scs.x = DP4 t1, sin_constants + * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * scs.y = DP4 t1, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(src0, X, X, ONE, ONE), + swizzle(src0, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + if (writemask & TGSI_WRITEMASK_Y) { + uint tmp1; + + if (writemask & TGSI_WRITEMASK_X) + tmp1 = i915_get_utemp(p); + else + tmp1 = tmp; + + i915_emit_arith(p, + A0_MUL, + tmp1, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_Y, 0, + swizzle(tmp1, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + } + + if (writemask & TGSI_WRITEMASK_X) { + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_X, 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + } + break; + + case TGSI_OPCODE_SGE: + emit_simple_arith(p, inst, A0_SGE, 2); + break; + + case TGSI_OPCODE_SIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * result = DP4 t1.wzyx, sin_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + break; + + case TGSI_OPCODE_SLT: + emit_simple_arith(p, inst, A0_SLT, 2); + break; + + case TGSI_OPCODE_SUB: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src1, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_TEX: + if (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide + == TGSI_EXTSWIZZLE_W) { + emit_tex(p, inst, T0_TEXLDP); + } + else { + emit_tex(p, inst, T0_TEXLD); + } + break; + + case TGSI_OPCODE_TXB: + emit_tex(p, inst, T0_TEXLDB); + break; + + case TGSI_OPCODE_XPD: + /* Cross product: + * result.x = src0.y * src1.z - src0.z * src1.y; + * result.y = src0.z * src1.x - src0.x * src1.z; + * result.z = src0.x * src1.y - src0.y * src1.x; + * result.w = undef; + */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, Z, X, Y, ONE), + swizzle(src1, Y, Z, X, ONE), 0); + + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, Y, Z, X, ONE), + swizzle(src1, Z, X, Y, ONE), + negate(tmp, 1, 1, 1, 0)); + break; + + default: + i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); + return; + } + + i915_release_utemps(p); +} + + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p the translation state + * \param tokens the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, + const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_INPUT) { + /* save input register info for use in src_vector() */ + uint ind, sem, semi; + ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; + semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + /*debug_printf("FS Input DECL [%u] sem %u\n", ind, sem);*/ + p->input_semantic_name[ind] = sem; + p->input_semantic_index[ind] = semi; + } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_OUTPUT) { + /* save output register info for use in get_result_vector() */ + uint ind, sem, semi; + ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; + semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + /*debug_printf("FS Output DECL [%u] sem %u\n", ind, sem);*/ + p->output_semantic_name[ind] = sem; + p->output_semantic_index[ind] = semi; + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* XXX append the immediate to the const buffer... */ + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + i915_translate_instruction(p, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + + } /* while */ + + tgsi_parse_free (&parse); +} + + +static struct i915_fp_compile * +i915_init_compile(struct i915_context *i915, + const struct pipe_shader_state *fs) +{ + struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + + p->shader = i915->fs; + + p->vertex_info = &i915->current.vertex_info; + + /* new constants found during translation get appended after the + * user-provided constants. + */ + p->constants = i915->current.constants[PIPE_SHADER_FRAGMENT]; + p->num_constants = i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]; + + p->nr_tex_indirect = 1; /* correct? */ + p->nr_tex_insn = 0; + p->nr_alu_insn = 0; + p->nr_decl_insn = 0; + + memset(p->constant_flags, 0, sizeof(p->constant_flags)); + + p->csr = p->program; + p->decl = p->declarations; + p->decl_s = 0; + p->decl_t = 0; + p->temp_flag = 0xffff000; + p->utemp_flag = ~0x7; + + p->wpos_tex = -1; + + /* initialize the first program word */ + *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; + + return p; +} + + +/* Copy compile results to the fragment program struct and destroy the + * compilation context. + */ +static void +i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) +{ + unsigned long program_size = (unsigned long) (p->csr - p->program); + unsigned long decl_size = (unsigned long) (p->decl - p->declarations); + + if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) + i915_program_error(p, "Exceeded max nr indirect texture lookups"); + + if (p->nr_tex_insn > I915_MAX_TEX_INSN) + i915_program_error(p, "Exceeded max TEX instructions"); + + if (p->nr_alu_insn > I915_MAX_ALU_INSN) + i915_program_error(p, "Exceeded max ALU instructions"); + + if (p->nr_decl_insn > I915_MAX_DECL_INSN) + i915_program_error(p, "Exceeded max DECL instructions"); + + /* free old program, if present */ + if (i915->current.program) { + FREE(i915->current.program); + i915->current.program_len = 0; + } + + if (p->error) { + p->NumNativeInstructions = 0; + p->NumNativeAluInstructions = 0; + p->NumNativeTexInstructions = 0; + p->NumNativeTexIndirections = 0; + + i915_use_passthrough_shader(i915); + } + else { + p->NumNativeInstructions + = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; + p->NumNativeAluInstructions = p->nr_alu_insn; + p->NumNativeTexInstructions = p->nr_tex_insn; + p->NumNativeTexIndirections = p->nr_tex_indirect; + + /* patch in the program length */ + p->declarations[0] |= program_size + decl_size - 2; + + /* Copy compilation results to fragment program struct: + */ + i915->current.program + = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); + if (i915->current.program) { + i915->current.program_len = program_size + decl_size; + + memcpy(i915->current.program, + p->declarations, + decl_size * sizeof(uint)); + + memcpy(i915->current.program + decl_size, + p->program, + program_size * sizeof(uint)); + } + + /* update number of constants */ + i915->current.num_constants[PIPE_SHADER_FRAGMENT] = p->num_constants; + assert(i915->current.num_constants[PIPE_SHADER_FRAGMENT] + >= i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]); + } + + /* Release the compilation struct: + */ + FREE(p); +} + + +/** + * Find an unused texture coordinate slot to use for fragment WPOS. + * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). + */ +static void +i915_find_wpos_space(struct i915_fp_compile *p) +{ +#if 0 + const uint inputs + = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ + uint i; + + p->wpos_tex = -1; + + if (inputs & (1 << TGSI_ATTRIB_POS)) { + for (i = 0; i < I915_TEX_UNITS; i++) { + if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { + p->wpos_tex = i; + return; + } + } + + i915_program_error(p, "No free texcoord for wpos value"); + } +#else + if (p->shader->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + /* frag shader using the fragment position input */ +#if 0 + assert(0); +#endif + } +#endif +} + + + + +/** + * Rather than trying to intercept and jiggle depth writes during + * emit, just move the value into its correct position at the end of + * the program: + */ +static void +i915_fixup_depth_write(struct i915_fp_compile *p) +{ + /* XXX assuming pos/depth is always in output[0] */ + if (p->shader->output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + const uint depth = UREG(REG_TYPE_OD, 0); + + i915_emit_arith(p, + A0_MOV, /* opcode */ + depth, /* dest reg */ + A0_DEST_CHANNEL_W, /* write mask */ + 0, /* saturate? */ + swizzle(depth, X, Y, Z, Z), /* src0 */ + 0, 0 /* src1, src2 */); + } +} + + +void +i915_translate_fragment_program( struct i915_context *i915 ) +{ + struct i915_fp_compile *p = i915_init_compile(i915, i915->fs); + const struct tgsi_token *tokens = i915->fs->tokens; + + i915_find_wpos_space(p); + + i915_translate_instructions(p, tokens); + i915_fixup_depth_write(p); + + i915_fini_compile(i915, p); +} diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c new file mode 100644 index 00000000000..c4a706c37d8 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -0,0 +1,215 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/draw/draw_private.h" +#include "pipe/p_util.h" + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_batch.h" + + + +/** + * Primitive emit to hardware. No support for vertex buffers or any + * nice fast paths. + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct i915_context *i915; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +/** + * Extract the needed fields from vertex_header and emit i915 dwords. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. + */ +static INLINE void +emit_hw_vertex( struct i915_context *i915, + const struct vertex_header *vertex) +{ + const struct vertex_info *vinfo = &i915->current.vertex_info; + uint i; + uint count = 0; /* for debug/sanity */ + + for (i = 0; i < vinfo->num_attribs; i++) { + switch (vinfo->emit[i]) { + case EMIT_OMIT: + /* no-op */ + break; + case EMIT_1F: + OUT_BATCH( fui(vertex->data[i][0]) ); + count++; + break; + case EMIT_2F: + OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(vertex->data[i][1]) ); + count += 2; + break; + case EMIT_3F: + OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(vertex->data[i][1]) ); + OUT_BATCH( fui(vertex->data[i][2]) ); + count += 3; + break; + case EMIT_4F: + OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(vertex->data[i][1]) ); + OUT_BATCH( fui(vertex->data[i][2]) ); + OUT_BATCH( fui(vertex->data[i][3]) ); + count += 4; + break; + case EMIT_4UB: + OUT_BATCH( pack_ub4(float_to_ubyte( vertex->data[i][2] ), + float_to_ubyte( vertex->data[i][1] ), + float_to_ubyte( vertex->data[i][0] ), + float_to_ubyte( vertex->data[i][3] )) ); + count += 1; + break; + default: + assert(0); + } + } + assert(count == vinfo->size); +} + + + +static INLINE void +emit_prim( struct draw_stage *stage, + struct prim_header *prim, + unsigned hwprim, + unsigned nr ) +{ + struct i915_context *i915 = setup_stage(stage)->i915; + unsigned vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + unsigned i; + + assert(vertex_size >= 12); /* never smaller than 12 bytes */ + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + FLUSH_BATCH(); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + assert(0); + return; + } + } + + /* Emit each triangle as a single primitive. I told you this was + * simple. + */ + OUT_BATCH(_3DPRIMITIVE | + hwprim | + ((4 + vertex_size * nr)/4 - 2)); + + for (i = 0; i < nr; i++) + emit_hw_vertex(i915, prim->v[i]); +} + + +static void +setup_tri( struct draw_stage *stage, struct prim_header *prim ) +{ + emit_prim( stage, prim, PRIM3D_TRILIST, 3 ); +} + + +static void +setup_line(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_LINELIST, 2 ); +} + + +static void +setup_point(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_POINTLIST, 1 ); +} + + +static void setup_flush( struct draw_stage *stage, unsigned flags ) +{ +} + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. This gets plugged into + * the 'draw' module's pipeline. + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ) +{ + struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + + setup->i915 = i915; + setup->stage.draw = i915->draw; + setup->stage.point = setup_point; + setup->stage.line = setup_line; + setup->stage.tri = setup_tri; + setup->stage.flush = setup_flush; + setup->stage.reset_stipple_counter = reset_stipple_counter; + setup->stage.destroy = render_destroy; + + return &setup->stage; +} diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c new file mode 100644 index 00000000000..e069773fd4e --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -0,0 +1,254 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/draw/draw_vbuf.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_state.h" + + +/** + * Primitive renderer for i915. + */ +struct i915_vbuf_render { + struct vbuf_render base; + + struct i915_context *i915; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Hardware primitive */ + unsigned hwprim; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct i915_vbuf_render * +i915_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct i915_vbuf_render *)render; +} + + +static const struct vertex_info * +i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + return &i915->current.vertex_info; +} + + +static void * +i915_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_winsys *winsys = i915->pipe.winsys; + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + + /* FIXME: handle failure */ + assert(!i915->vbo); + i915->vbo = winsys->buffer_create(winsys, 64, I915_BUFFER_USAGE_LIT_VERTEX, + size); + + i915->dirty |= I915_NEW_VBO; + + return winsys->buffer_map(winsys, + i915->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); +} + + +static void +i915_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + switch(prim) { + case PIPE_PRIM_POINTS: + i915_render->hwprim = PRIM3D_POINTLIST; + break; + case PIPE_PRIM_LINES: + i915_render->hwprim = PRIM3D_LINELIST; + break; + case PIPE_PRIM_TRIANGLES: + i915_render->hwprim = PRIM3D_TRILIST; + break; + default: + assert(0); + } +} + + +static void +i915_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + + assert(nr_indices); + + assert((i915->dirty & ~I915_NEW_VBO) == 0); + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + FLUSH_BATCH(); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + assert(0); + return; + } + } + + OUT_BATCH( _3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + for (i = 0; i + 1 < nr_indices; i += 2) { + OUT_BATCH( indices[i] | + (indices[i + 1] << 16) ); + } + if (i < nr_indices) { + OUT_BATCH( indices[i] ); + } +} + + +static void +i915_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_winsys *winsys = i915->pipe.winsys; + + assert(i915->vbo); + winsys->buffer_unmap(winsys, i915->vbo); + pipe_buffer_reference(winsys, &i915->vbo, NULL); +} + + +static void +i915_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + FREE(i915_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +i915_vbuf_render_create( struct i915_context *i915 ) +{ + struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); + + i915_render->i915 = i915; + + i915_render->base.max_vertex_buffer_bytes = 128*1024; + + /* NOTE: it must be such that state and vertices indices fit in a single + * batch buffer. + */ + i915_render->base.max_indices = 16*1024; + + i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; + i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; + i915_render->base.set_primitive = i915_vbuf_render_set_primitive; + i915_render->base.draw = i915_vbuf_render_draw; + i915_render->base.release_vertices = i915_vbuf_render_release_vertices; + i915_render->base.destroy = i915_vbuf_render_destroy; + + return &i915_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = i915_vbuf_render_create(i915); + if(!render) + return NULL; + + stage = draw_vbuf_stage( i915->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/i915simple/i915_reg.h b/src/gallium/drivers/i915simple/i915_reg.h new file mode 100644 index 00000000000..04620fec681 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_reg.h @@ -0,0 +1,978 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_REG_H +#define I915_REG_H + + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + + + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + + + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + + +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLOR_BUF_8BIT 0 +#define COLOR_BUF_RGB555 (1<<8) +#define COLOR_BUF_RGB565 (2<<8) +#define COLOR_BUF_ARGB8888 (3<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + + + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + + + + + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + + + +#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT) +#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT) +#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT) +#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT) + + + + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ + +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ +#define _3DSTATE_MAP_PALETTE_LOAD_32 (CMD_3D|(0x1d<<24)|(0x8f<<16)) +/* subsequent dwords up to length (max 16) are ARGB8888 color values */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK (0xf<<18) +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) (((x)&0xff)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE (1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16) + + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n) (1<<(n)) +#define PS2_CONST_X(n) (n) +#define PS3_CONST_Y(n) (n) +#define PS4_CONST_Z(n) (n) +#define PS5_CONST_W(n) (n) + +/* p222 */ + + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_NR_MASK 0xf + + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + + + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + + +/* p207 */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244 */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_DEFAULT_Z ((0x3<<29)|(0x1d<<24)|(0x98<<16)) +#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) +#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) + + +#define MI_FLUSH ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE (1<<0) +#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) + + +#define CMD_3D (0x3<<29) + + +#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define I915PACKCOLOR4444(r,g,b,a) \ + ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define I915PACKCOLOR1555(r,g,b,a) \ + ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ + ((a) ? 0x8000 : 0)) + +#define I915PACKCOLOR565(r,g,b) \ + ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define I915PACKCOLOR8888(r,g,b,a) \ + ((a<<24) | (r<<16) | (g<<8) | b) + + + + +#define BR00_BITBLT_CLIENT 0x40000000 +#define BR00_OP_COLOR_BLT 0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN 0x80000000 + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define MI_WAIT_FOR_EVENT ((0x3<<23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_BATCH_BUFFER (0x30<<23) +#define MI_BATCH_BUFFER_START (0x31<<23) +#define MI_BATCH_BUFFER_END (0xa<<23) + + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c new file mode 100644 index 00000000000..abd5571b885 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -0,0 +1,694 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/draw/draw_context.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_state_inlines.h" + + +/* The i915 (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned +translate_wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */ + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return TEXCOORDMODE_CLAMP_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return TEXCOORDMODE_CLAMP_BORDER; +// case PIPE_TEX_WRAP_MIRRORED_REPEAT: +// return TEXCOORDMODE_MIRROR; + default: + return TEXCOORDMODE_WRAP; + } +} + +static unsigned translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return FILTER_LINEAR; + default: + assert(0); + return FILTER_NEAREST; + } +} + +static unsigned translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + return MIPFILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return MIPFILTER_LINEAR; + default: + assert(0); + return MIPFILTER_NONE; + } +} + + +/* None of this state is actually used for anything yet. + */ +static void * +i915_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state ); + + { + unsigned eqRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + unsigned eqA = blend->alpha_func; + unsigned srcA = blend->alpha_src_factor; + unsigned dstA = blend->alpha_dst_factor; + + /* Special handling for MIN/MAX filter modes handled at + * state_tracker level. + */ + + if (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB) { + + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_ENABLE | + IAB_MODIFY_FUNC | + IAB_MODIFY_SRC_FACTOR | + IAB_MODIFY_DST_FACTOR | + SRC_ABLND_FACT(i915_translate_blend_factor(srcA)) | + DST_ABLND_FACT(i915_translate_blend_factor(dstA)) | + (i915_translate_blend_func(eqA) << IAB_FUNC_SHIFT)); + } + else { + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + 0); + } + } + + cso_data->modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(i915_translate_logic_op(blend->logicop_func))); + + if (blend->logicop_enable) + cso_data->LIS5 |= S5_LOGICOP_ENABLE; + + if (blend->dither) + cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; + + if ((blend->colormask & PIPE_MASK_R) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_RED; + + if ((blend->colormask & PIPE_MASK_G) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_GREEN; + + if ((blend->colormask & PIPE_MASK_B) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_BLUE; + + if ((blend->colormask & PIPE_MASK_A) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA; + + if (blend->blend_enable) { + unsigned funcRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE | + SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) | + DST_BLND_FACT(i915_translate_blend_factor(dstRGB)) | + (i915_translate_blend_func(funcRGB) << S6_CBUF_BLEND_FUNC_SHIFT)); + } + + return cso_data; +} + +static void i915_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->blend = (struct i915_blend_state*)blend; + + i915->dirty |= I915_NEW_BLEND; +} + + +static void i915_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void i915_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->blend_color = *blend_color; + + i915->dirty |= I915_NEW_BLEND; +} + +static void * +i915_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct i915_sampler_state *cso = CALLOC_STRUCT( i915_sampler_state ); + const unsigned ws = sampler->wrap_s; + const unsigned wt = sampler->wrap_t; + const unsigned wr = sampler->wrap_r; + unsigned minFilt, magFilt; + unsigned mipFilt; + + cso->templ = sampler; + + mipFilt = translate_mip_filter(sampler->min_mip_filter); + if (sampler->max_anisotropy > 1.0) { + minFilt = FILTER_ANISOTROPIC; + magFilt = FILTER_ANISOTROPIC; + if (sampler->max_anisotropy > 2.0) { + cso->state[0] |= SS2_MAX_ANISO_4; + } + } + else { + minFilt = translate_img_filter( sampler->min_img_filter ); + magFilt = translate_img_filter( sampler->mag_img_filter ); + } + + { + int b = (int) (sampler->lod_bias * 16.0); + b = CLAMP(b, -256, 255); + cso->state[0] |= ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); + } + + /* Shadow: + */ + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + { + cso->state[0] |= (SS2_SHADOW_ENABLE | + i915_translate_compare_func(sampler->compare_func)); + + minFilt = FILTER_4X4_FLAT; + magFilt = FILTER_4X4_FLAT; + } + + cso->state[0] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | + (mipFilt << SS2_MIP_FILTER_SHIFT) | + (magFilt << SS2_MAG_FILTER_SHIFT)); + + cso->state[1] |= + ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); + + if (sampler->normalized_coords) + cso->state[1] |= SS3_NORMALIZED_COORDS; + + if (0) /* XXX not tested yet */ + { + int minlod = (int) (16.0 * sampler->min_lod); + minlod = CLAMP(minlod, 0, 16 * 11); + cso->state[1] |= (minlod << SS3_MIN_LOD_SHIFT); + } + + { + ubyte r = float_to_ubyte(sampler->border_color[0]); + ubyte g = float_to_ubyte(sampler->border_color[1]); + ubyte b = float_to_ubyte(sampler->border_color[2]); + ubyte a = float_to_ubyte(sampler->border_color[3]); + cso->state[2] = I915PACKCOLOR8888(r, g, b, a); + } + return cso; +} + +static void i915_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct i915_context *i915 = i915_context(pipe); + + assert(unit < PIPE_MAX_SAMPLERS); + i915->sampler[unit] = (const struct i915_sampler_state*)sampler; + + i915->dirty |= I915_NEW_SAMPLER; +} + +static void i915_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE(sampler); +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + +static void * +i915_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state ); + + { + int testmask = depth_stencil->stencil[0].value_mask & 0xff; + int writemask = depth_stencil->stencil[0].write_mask & 0xff; + + cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(testmask) | + ENABLE_STENCIL_WRITE_MASK | + STENCIL_WRITE_MASK(writemask)); + } + + if (depth_stencil->stencil[0].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[0].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[0].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[0].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[0].zpass_op); + int ref = depth_stencil->stencil[0].ref_value & 0xff; + + cso->stencil_LIS5 |= (S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE | + (ref << S5_STENCIL_REF_SHIFT) | + (test << S5_STENCIL_TEST_FUNC_SHIFT) | + (fop << S5_STENCIL_FAIL_SHIFT) | + (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT)); + } + + if (depth_stencil->stencil[1].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[1].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[1].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op); + int ref = depth_stencil->stencil[1].ref_value & 0xff; + int tmask = depth_stencil->stencil[1].value_mask & 0xff; + int wmask = depth_stencil->stencil[1].write_mask & 0xff; + + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_FUNCS | + BFO_ENABLE_STENCIL_TWO_SIDE | + BFO_ENABLE_STENCIL_REF | + BFO_STENCIL_TWO_SIDE | + (ref << BFO_STENCIL_REF_SHIFT) | + (test << BFO_STENCIL_TEST_SHIFT) | + (fop << BFO_STENCIL_FAIL_SHIFT) | + (dfop << BFO_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << BFO_STENCIL_PASS_Z_PASS_SHIFT)); + + cso->bfo[1] = (_3DSTATE_BACKFACE_STENCIL_MASKS | + BFM_ENABLE_STENCIL_TEST_MASK | + BFM_ENABLE_STENCIL_WRITE_MASK | + (tmask << BFM_STENCIL_TEST_MASK_SHIFT) | + (wmask << BFM_STENCIL_WRITE_MASK_SHIFT)); + } + else { + /* This actually disables two-side stencil: The bit set is a + * modify-enable bit to indicate we are changing the two-side + * setting. Then there is a symbolic zero to show that we are + * setting the flag to zero/off. + */ + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_TWO_SIDE | + 0); + cso->bfo[1] = 0; + } + + if (depth_stencil->depth.enabled) { + int func = i915_translate_compare_func(depth_stencil->depth.func); + + cso->depth_LIS6 |= (S6_DEPTH_TEST_ENABLE | + (func << S6_DEPTH_TEST_FUNC_SHIFT)); + + if (depth_stencil->depth.writemask) + cso->depth_LIS6 |= S6_DEPTH_WRITE_ENABLE; + } + + if (depth_stencil->alpha.enabled) { + int test = i915_translate_compare_func(depth_stencil->alpha.func); + ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref); + + cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE | + (test << S6_ALPHA_TEST_FUNC_SHIFT) | + (((unsigned) refByte) << S6_ALPHA_REF_SHIFT)); + } + + return cso; +} + +static void i915_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; + + i915->dirty |= I915_NEW_DEPTH_STENCIL; +} + +static void i915_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + FREE(depth_stencil); +} + + +static void i915_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct i915_context *i915 = i915_context(pipe); + + memcpy( &i915->scissor, scissor, sizeof(*scissor) ); + i915->dirty |= I915_NEW_SCISSOR; +} + + +static void i915_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +static void * i915_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + return 0; +} + +static void i915_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->fs = (struct pipe_shader_state *)fs; + + i915->dirty |= I915_NEW_FS; +} + +static void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + /*do nothing*/ +} + +static void * +i915_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + return draw_create_vertex_shader(i915->draw, templ); +} + +static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); +} + +static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_delete_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); +} + +static void i915_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* Make a copy of shader constants. + * During fragment program translation we may add additional + * constants to the array. + * + * We want to consider the situation where some user constants + * (ex: a material color) may change frequently but the shader program + * stays the same. In that case we should only be updating the first + * N constants, leaving any extras from shader translation alone. + */ + { + void *mapped; + if (buf->size && + (mapped = ws->buffer_map(ws, buf->buffer, + PIPE_BUFFER_USAGE_CPU_READ))) { + memcpy(i915->current.constants[shader], mapped, buf->size); + ws->buffer_unmap(ws, buf->buffer); + i915->current.num_user_constants[shader] + = buf->size / (4 * sizeof(float)); + } + else { + i915->current.num_user_constants[shader] = 0; + } + } + + i915->dirty |= I915_NEW_CONSTANTS; +} + + +static void i915_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->texture[sampler] = (struct i915_texture*)texture; /* ptr, not struct */ + + i915->dirty |= I915_NEW_TEXTURE; +} + + + +static void i915_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->framebuffer = *fb; /* struct copy */ + + i915->dirty |= I915_NEW_FRAMEBUFFER; +} + + + +static void i915_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct i915_context *i915 = i915_context(pipe); + + draw_set_clip_state(i915->draw, clip); + + i915->dirty |= I915_NEW_CLIP; +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void i915_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->viewport = *viewport; /* struct copy */ + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(i915->draw, &i915->viewport); + + i915->dirty |= I915_NEW_VIEWPORT; +} + + +static void * +i915_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); + + cso->templ = rasterizer; + cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + cso->light_twoside = rasterizer->light_twoside; + cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; + cso->ds[1].f = rasterizer->offset_scale; + if (rasterizer->poly_stipple_enable) { + cso->st |= ST1_ENABLE; + } + + if (rasterizer->scissor) + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT; + else + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT; + + switch (rasterizer->cull_mode) { + case PIPE_WINDING_NONE: + cso->LIS4 |= S4_CULLMODE_NONE; + break; + case PIPE_WINDING_CW: + cso->LIS4 |= S4_CULLMODE_CW; + break; + case PIPE_WINDING_CCW: + cso->LIS4 |= S4_CULLMODE_CCW; + break; + case PIPE_WINDING_BOTH: + cso->LIS4 |= S4_CULLMODE_BOTH; + break; + } + + { + int line_width = CLAMP((int)(rasterizer->line_width * 2), 1, 0xf); + + cso->LIS4 |= line_width << S4_LINE_WIDTH_SHIFT; + + if (rasterizer->line_smooth) + cso->LIS4 |= S4_LINE_ANTIALIAS_ENABLE; + } + + { + int point_size = CLAMP((int) rasterizer->point_size, 1, 0xff); + + cso->LIS4 |= point_size << S4_POINT_WIDTH_SHIFT; + } + + if (rasterizer->flatshade) { + cso->LIS4 |= (S4_FLATSHADE_ALPHA | + S4_FLATSHADE_COLOR | + S4_FLATSHADE_SPECULAR); + } + + cso->LIS7 = fui( rasterizer->offset_units ); + + + return cso; +} + +static void i915_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->rasterizer = (struct i915_rasterizer_state *)setup; + + /* pass-through to draw module */ + draw_set_rasterizer_state(i915->draw, i915->rasterizer->templ); + + i915->dirty |= I915_NEW_RASTERIZER; +} + +static void i915_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + FREE(setup); +} + +static void i915_set_vertex_buffer( struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer ) +{ + struct i915_context *i915 = i915_context(pipe); + i915->vertex_buffer[index] = *buffer; + /* pass-through to draw module */ + draw_set_vertex_buffer(i915->draw, index, buffer); +} + +static void i915_set_vertex_element( struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *element) +{ + struct i915_context *i915 = i915_context(pipe); + /* pass-through to draw module */ + draw_set_vertex_element(i915->draw, index, element); +} + + + +void +i915_init_state_functions( struct i915_context *i915 ) +{ + i915->pipe.create_blend_state = i915_create_blend_state; + i915->pipe.bind_blend_state = i915_bind_blend_state; + i915->pipe.delete_blend_state = i915_delete_blend_state; + + i915->pipe.create_sampler_state = i915_create_sampler_state; + i915->pipe.bind_sampler_state = i915_bind_sampler_state; + i915->pipe.delete_sampler_state = i915_delete_sampler_state; + + i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; + i915->pipe.bind_depth_stencil_alpha_state = i915_bind_depth_stencil_state; + i915->pipe.delete_depth_stencil_alpha_state = i915_delete_depth_stencil_state; + + i915->pipe.create_rasterizer_state = i915_create_rasterizer_state; + i915->pipe.bind_rasterizer_state = i915_bind_rasterizer_state; + i915->pipe.delete_rasterizer_state = i915_delete_rasterizer_state; + i915->pipe.create_fs_state = i915_create_fs_state; + i915->pipe.bind_fs_state = i915_bind_fs_state; + i915->pipe.delete_fs_state = i915_delete_fs_state; + i915->pipe.create_vs_state = i915_create_vs_state; + i915->pipe.bind_vs_state = i915_bind_vs_state; + i915->pipe.delete_vs_state = i915_delete_vs_state; + + i915->pipe.set_blend_color = i915_set_blend_color; + i915->pipe.set_clip_state = i915_set_clip_state; + i915->pipe.set_constant_buffer = i915_set_constant_buffer; + i915->pipe.set_framebuffer_state = i915_set_framebuffer_state; + + i915->pipe.set_polygon_stipple = i915_set_polygon_stipple; + i915->pipe.set_scissor_state = i915_set_scissor_state; + i915->pipe.set_sampler_texture = i915_set_sampler_texture; + i915->pipe.set_viewport_state = i915_set_viewport_state; + i915->pipe.set_vertex_buffer = i915_set_vertex_buffer; + i915->pipe.set_vertex_element = i915_set_vertex_element; +} diff --git a/src/gallium/drivers/i915simple/i915_state.h b/src/gallium/drivers/i915simple/i915_state.h new file mode 100644 index 00000000000..86c6b0027d5 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_STATE_H +#define I915_STATE_H + +struct i915_context; + + +struct i915_tracked_state { + unsigned dirty; + void (*update)( struct i915_context * ); +}; + +void i915_update_immediate( struct i915_context *i915 ); +void i915_update_dynamic( struct i915_context *i915 ); +void i915_update_derived( struct i915_context *i915 ); +void i915_update_samplers( struct i915_context *i915 ); +void i915_update_textures(struct i915_context *i915); + +void i915_emit_hardware_state( struct i915_context *i915 ); + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c new file mode 100644 index 00000000000..653983e4a99 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -0,0 +1,177 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_vertex.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "i915_fpc.h" +#include "pipe/p_shader_tokens.h" + + +/** + * Determine which post-transform / pre-rasterization vertex attributes + * we need. + * Derived from: fs, setup states. + */ +static void calculate_vertex_layout( struct i915_context *i915 ) +{ + const struct pipe_shader_state *fs = i915->fs; + const enum interp_mode colorInterp = i915->rasterizer->color_interp; + struct vertex_info vinfo; + uint front0 = 0, back0 = 0, front1 = 0, back1 = 0; + boolean needW = 0; + uint i; + boolean texCoords[8]; + uint src = 0; + + memset(texCoords, 0, sizeof(texCoords)); + memset(&vinfo, 0, sizeof(vinfo)); + + /* pos */ + draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src++); + /* Note: we'll set the S4_VFMT_XYZ[W] bits below */ + + for (i = 0; i < fs->num_inputs; i++) { + switch (fs->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + if (fs->input_semantic_index[i] == 0) { + front0 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); + vinfo.hwfmt[0] |= S4_VFMT_COLOR; + } + else { + assert(fs->input_semantic_index[i] == 1); + front1 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); + vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; + } + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + { + const uint unit = fs->input_semantic_index[i]; + uint hwtc; + texCoords[unit] = TRUE; + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + hwtc = TEXCOORDFMT_4D; + needW = TRUE; + vinfo.hwfmt[1] |= hwtc << (unit * 4); + } + break; + case TGSI_SEMANTIC_FOG: + debug_printf("i915 fogcoord not implemented yet\n"); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++); + break; + default: + assert(0); + } + + } + + /* finish up texcoord fields */ + for (i = 0; i < 8; i++) { + if (!texCoords[i]) { + const uint hwtc = TEXCOORDFMT_NOT_PRESENT; + vinfo.hwfmt[1] |= hwtc << (i* 4); + } + } + + /* go back and fill in the vertex position info now that we have needW */ + if (needW) { + vinfo.hwfmt[0] |= S4_VFMT_XYZW; + vinfo.emit[0] = EMIT_4F; + } + else { + vinfo.hwfmt[0] |= S4_VFMT_XYZ; + vinfo.emit[0] = EMIT_3F; + } + + /* Additional attributes required for setup: Just twosided + * lighting. Edgeflag is dealt with specially by setting bits in + * the vertex header. + */ + if (i915->rasterizer->light_twoside) { + if (front0) { + back0 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + } + if (back0) { + back1 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + } + } + + draw_compute_vertex_size(&vinfo); + + if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { + /* Need to set this flag so that the LIS2/4 registers get set. + * It also means the i915_update_immediate() function must be called + * after this one, in i915_update_derived(). + */ + i915->dirty |= I915_NEW_VERTEX_FORMAT; + + memcpy(&i915->current.vertex_info, &vinfo, sizeof(vinfo)); + } +} + + + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void i915_update_derived( struct i915_context *i915 ) +{ + if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS)) + calculate_vertex_layout( i915 ); + + if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) + i915_update_samplers(i915); + + if (i915->dirty & I915_NEW_TEXTURE) + i915_update_textures(i915); + + if (i915->dirty) + i915_update_immediate( i915 ); + + if (i915->dirty) + i915_update_dynamic( i915 ); + + if (i915->dirty & I915_NEW_FS) { + i915_translate_fragment_program(i915); + i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ + } + + /* HW emit currently references framebuffer state directly: + */ + if (i915->dirty & I915_NEW_FRAMEBUFFER) + i915->hardware_dirty |= I915_HW_STATIC; + + i915->dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c new file mode 100644 index 00000000000..8cfbdddd19b --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c @@ -0,0 +1,308 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_batch.h" +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "pipe/p_util.h" + +#define FILE_DEBUG_FLAG DEBUG_STATE + +/* State that we have chosen to store in the DYNAMIC segment of the + * i915 indirect state mechanism. + * + * Can't cache these in the way we do the static state, as there is no + * start/size in the command packet, instead an 'end' value that gets + * incremented. + * + * Additionally, there seems to be a requirement to re-issue the full + * (active) state every time a 4kb boundary is crossed. + */ + +static INLINE void set_dynamic_indirect( struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords ) +{ + unsigned i; + + for (i = 0; i < dwords; i++) + i915->current.dynamic[offset + i] = src[i]; + + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + +/*********************************************************************** + * Modes4: stencil masks and logicop + */ +static void upload_MODES4( struct i915_context *i915 ) +{ + unsigned modes4 = 0; + + /* I915_NEW_STENCIL */ + modes4 |= i915->depth_stencil->stencil_modes4; + /* I915_NEW_BLEND */ + modes4 |= i915->blend->modes4; + + /* Always, so that we know when state is in-active: + */ + set_dynamic_indirect( i915, + I915_DYNAMIC_MODES4, + &modes4, + 1 ); +} + +const struct i915_tracked_state i915_upload_MODES4 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + upload_MODES4 +}; + + + + +/*********************************************************************** + */ + +static void upload_BFO( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_BFO_0, + &(i915->depth_stencil->bfo[0]), + 2 ); +} + +const struct i915_tracked_state i915_upload_BFO = { + I915_NEW_DEPTH_STENCIL, + upload_BFO +}; + + +/*********************************************************************** + */ + + +static void upload_BLENDCOLOR( struct i915_context *i915 ) +{ + unsigned bc[2]; + + memset( bc, 0, sizeof(bc) ); + + /* I915_NEW_BLEND {_COLOR} + */ + { + const float *color = i915->blend_color.color; + + bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; + bc[1] = pack_ui32_float4( color[0], + color[1], + color[2], + color[3] ); + } + + set_dynamic_indirect( i915, + I915_DYNAMIC_BC_0, + bc, + 2 ); +} + +const struct i915_tracked_state i915_upload_BLENDCOLOR = { + I915_NEW_BLEND, + upload_BLENDCOLOR +}; + +/*********************************************************************** + */ + + +static void upload_IAB( struct i915_context *i915 ) +{ + unsigned iab = i915->blend->iab; + + + set_dynamic_indirect( i915, + I915_DYNAMIC_IAB, + &iab, + 1 ); +} + +const struct i915_tracked_state i915_upload_IAB = { + I915_NEW_BLEND, + upload_IAB +}; + + +/*********************************************************************** + */ + + + +static void upload_DEPTHSCALE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_DEPTHSCALE_0, + &(i915->rasterizer->ds[0].u), + 2 ); +} + +const struct i915_tracked_state i915_upload_DEPTHSCALE = { + I915_NEW_RASTERIZER, + upload_DEPTHSCALE +}; + + + +/*********************************************************************** + * Polygon stipple + * + * The i915 supports a 4x4 stipple natively, GL wants 32x32. + * Fortunately stipple is usually a repeating pattern. + * + * XXX: does stipple pattern need to be adjusted according to + * the window position? + * + * XXX: possibly need workaround for conform paths test. + */ + +static void upload_STIPPLE( struct i915_context *i915 ) +{ + unsigned st[2]; + + st[0] = _3DSTATE_STIPPLE; + st[1] = 0; + + /* I915_NEW_RASTERIZER + */ + st[1] |= i915->rasterizer->st; + + + /* I915_NEW_STIPPLE + */ + { + const ubyte *mask = (const ubyte *)i915->poly_stipple.stipple; + ubyte p[4]; + + p[0] = mask[12] & 0xf; + p[1] = mask[8] & 0xf; + p[2] = mask[4] & 0xf; + p[3] = mask[0] & 0xf; + + /* Not sure what to do about fallbacks, so for now just dont: + */ + st[1] |= ((p[0] << 0) | + (p[1] << 4) | + (p[2] << 8) | + (p[3] << 12)); + } + + + set_dynamic_indirect( i915, + I915_DYNAMIC_STP_0, + &st[0], + 2 ); +} + + +const struct i915_tracked_state i915_upload_STIPPLE = { + I915_NEW_RASTERIZER | I915_NEW_STIPPLE, + upload_STIPPLE +}; + + + +/*********************************************************************** + * Scissor. + */ +static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_ENA_0, + &(i915->rasterizer->sc[0]), + 1 ); +} + +const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { + I915_NEW_RASTERIZER, + upload_SCISSOR_ENABLE +}; + + + +static void upload_SCISSOR_RECT( struct i915_context *i915 ) +{ + unsigned x1 = i915->scissor.minx; + unsigned y1 = i915->scissor.miny; + unsigned x2 = i915->scissor.maxx; + unsigned y2 = i915->scissor.maxy; + unsigned sc[3]; + + sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; + sc[1] = (y1 << 16) | (x1 & 0xffff); + sc[2] = (y2 << 16) | (x2 & 0xffff); + + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_RECT_0, + &sc[0], + 3 ); +} + + +const struct i915_tracked_state i915_upload_SCISSOR_RECT = { + I915_NEW_SCISSOR, + upload_SCISSOR_RECT +}; + + + + + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_MODES4, + &i915_upload_BFO, + &i915_upload_BLENDCOLOR, + &i915_upload_IAB, + &i915_upload_DEPTHSCALE, + &i915_upload_STIPPLE, + &i915_upload_SCISSOR_ENABLE, + &i915_upload_SCISSOR_RECT +}; + +/* These will be dynamic indirect state commands, but for now just end + * up on the batch buffer with everything else. + */ +void i915_update_dynamic( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} + diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c new file mode 100644 index 00000000000..3339287f498 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -0,0 +1,374 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +static unsigned translate_format( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_R5G6B5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format( enum pipe_format zformat ) +{ + switch (zformat) { + case PIPE_FORMAT_S8Z24_UNORM: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + + +/** + * Examine framebuffer state to determine width, height. + */ +static boolean +framebuffer_size(const struct pipe_framebuffer_state *fb, + uint *width, uint *height) +{ + if (fb->cbufs[0]) { + *width = fb->cbufs[0]->width; + *height = fb->cbufs[0]->height; + return TRUE; + } + else if (fb->zsbuf) { + *width = fb->zsbuf->width; + *height = fb->zsbuf->height; + return TRUE; + } + else { + *width = *height = 0; + return FALSE; + } +} + + +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ + /* XXX: there must be an easier way */ + const unsigned dwords = ( 14 + + 7 + + I915_MAX_DYNAMIC + + 8 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_MAX_CONSTANT*4 + + i915->current.program_len + + 6 + ) * 3/2; /* plus 50% margin */ + const unsigned relocs = ( I915_TEX_UNITS + + 3 + ) * 3/2; /* plus 50% margin */ + +#if 0 + debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); +#endif + + if(!BEGIN_BATCH(dwords, relocs)) { + FLUSH_BATCH(); + assert(BEGIN_BATCH(dwords, relocs)); + } + + /* 14 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_INVARIENT) + { + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D); + + /* Need to initialize this to zero. + */ + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + + /* disable indirect state for now + */ + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); + OUT_BATCH(0); + } + + /* 7 dwords, 1 relocs */ + if (i915->hardware_dirty & I915_HW_IMMEDIATE) + { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | + I1_LOAD_S(1) | + I1_LOAD_S(2) | + I1_LOAD_S(4) | + I1_LOAD_S(5) | + I1_LOAD_S(6) | + (5)); + + if(i915->vbo) + OUT_RELOC(i915->vbo, + I915_BUFFER_ACCESS_READ, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + /* FIXME: we should not do this */ + OUT_BATCH(0); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); + } + + /* I915_MAX_DYNAMIC dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_DYNAMIC) + { + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + OUT_BATCH(i915->current.dynamic[i]); + } + } + + /* 8 dwords, 2 relocs */ + if (i915->hardware_dirty & I915_HW_STATIC) + { + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + + if (cbuf_surface) { + unsigned pitch = (cbuf_surface->pitch * cbuf_surface->cpp); + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(pitch) | /* pitch in bytes */ + BUF_3D_USE_FENCE); + + OUT_RELOC(cbuf_surface->buffer, + I915_BUFFER_ACCESS_WRITE, + 0); + } + + /* What happens if no zbuf?? + */ + if (depth_surface) { + unsigned zpitch = (depth_surface->pitch * depth_surface->cpp); + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_DEPTH | + BUF_3D_PITCH(zpitch) | /* pitch in bytes */ + BUF_3D_USE_FENCE); + + OUT_RELOC(depth_surface->buffer, + I915_BUFFER_ACCESS_WRITE, + 0); + } + + { + unsigned cformat, zformat = 0; + + if (cbuf_surface) + cformat = cbuf_surface->format; + else + cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */ + cformat = translate_format(cformat); + + if (depth_surface) + zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat ); + } + } + +#if 01 + /* texture images */ + /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ + if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) + { + /* XXX: we were refering to sampler state + * (current.sampler_enable_nr) below, but only checking + * I915_HW_MAP above. Should probably calculate the enabled + * flags separately - but there will be further rework of + * state so perhaps not necessary yet. + */ + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct pipe_buffer *buf = + i915->texture[unit]->buffer; + uint offset = 0; + assert(buf); + + count++; + + OUT_RELOC(buf, + I915_BUFFER_ACCESS_READ, + offset); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ + } + } + assert(count == nr); + } + } +#endif + +#if 01 + /* samplers */ + /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_SAMPLER) + { + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<<i)) { + OUT_BATCH( i915->current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); + } + } + } + } +#endif + + /* constants */ + /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + const uint nr = i915->current.num_constants[PIPE_SHADER_FRAGMENT]; + assert(nr <= I915_MAX_CONSTANT); + if (nr > 0) { + const uint *c + = (const uint *) i915->current.constants[PIPE_SHADER_FRAGMENT]; + uint i; + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + for (i = 0; i < nr; i++) { + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + } + } + } + + /* Fragment program */ + /* i915->current.program_len dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + uint i; + /* we should always have, at least, a pass-through program */ + assert(i915->current.program_len > 0); + for (i = 0; i < i915->current.program_len; i++) { + OUT_BATCH(i915->current.program[i]); + } + } + + /* drawing surface size */ + /* 6 dwords, 0 relocs */ + { + uint w, h; + boolean k = framebuffer_size(&i915->framebuffer, &w, &h); + assert(k); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); + OUT_BATCH(0); + OUT_BATCH(0); + } + + + i915->hardware_dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c new file mode 100644 index 00000000000..07031fc6c5b --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -0,0 +1,221 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "p_util.h" + + +/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. + * Would like to opportunistically recombine all these fragments into + * a single packet containing only what has changed, but for now emit + * as multiple packets. + */ + + + + +/*********************************************************************** + * S0,S1: Vertex buffer state. + */ +static void upload_S0S1(struct i915_context *i915) +{ + unsigned LIS0, LIS1; + + /* INTEL_NEW_VBO */ + /* TODO: re-use vertex buffers here? */ + LIS0 = 0; + + /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + */ + { + unsigned vertex_size = i915->current.vertex_info.size; + + LIS1 = ((vertex_size << 24) | + (vertex_size << 16)); + } + + /* INTEL_NEW_VBO */ + /* TODO: use a vertex generation number to track vbo changes */ + if (1 || + i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || + i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) + { + i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; + i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S0S1 = { + I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, + upload_S0S1 +}; + + + + +/*********************************************************************** + * S4: Vertex format, rasterization state + */ +static void upload_S2S4(struct i915_context *i915) +{ + unsigned LIS2, LIS4; + + /* I915_NEW_VERTEX_FORMAT */ + { + LIS2 = i915->current.vertex_info.hwfmt[1]; + LIS4 = i915->current.vertex_info.hwfmt[0]; + /* + debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); + */ + assert(LIS4); /* should never be zero? */ + } + + LIS4 |= i915->rasterizer->LIS4; + + if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || + LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { + + i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; + i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + + +const struct i915_tracked_state i915_upload_S2S4 = { + I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, + upload_S2S4 +}; + + + +/*********************************************************************** + * + */ +static void upload_S5( struct i915_context *i915 ) +{ + unsigned LIS5 = 0; + + LIS5 |= i915->depth_stencil->stencil_LIS5; + + LIS5 |= i915->blend->LIS5; + +#if 0 + /* I915_NEW_RASTERIZER */ + if (i915->state.Polygon->OffsetFill) { + LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; + } +#endif + + + if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { + i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S5 = { + (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), + upload_S5 +}; + + +/*********************************************************************** + */ +static void upload_S6( struct i915_context *i915 ) +{ + unsigned LIS6 = (S6_COLOR_WRITE_ENABLE | + (2 << S6_TRISTRIP_PV_SHIFT)); + + /* I915_NEW_BLEND + */ + LIS6 |= i915->blend->LIS6; + + /* I915_NEW_DEPTH + */ + LIS6 |= i915->depth_stencil->depth_LIS6; + + if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { + i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S6 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + upload_S6 +}; + + +/*********************************************************************** + */ +static void upload_S7( struct i915_context *i915 ) +{ + unsigned LIS7; + + /* I915_NEW_RASTERIZER + */ + LIS7 = i915->rasterizer->LIS7; + + if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { + i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S7 = { + I915_NEW_RASTERIZER, + upload_S7 +}; + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_S0S1, + &i915_upload_S2S4, + &i915_upload_S5, + &i915_upload_S6, + &i915_upload_S7 +}; + +/* + */ +void i915_update_immediate( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h new file mode 100644 index 00000000000..0934ac79a41 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_inlines.h @@ -0,0 +1,230 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_STATE_INLINES_H +#define I915_STATE_INLINES_H + +#include "p_compiler.h" +#include "p_defines.h" +#include "i915_reg.h" + + +static INLINE unsigned +i915_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_NEVER; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LESS; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_ALWAYS; + default: + return COMPAREFUNC_ALWAYS; + } +} + +static INLINE unsigned +i915_translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return STENCILOP_INVERT; + default: + return STENCILOP_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return BLENDFACT_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BLENDFACT_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BLENDFACT_DST_COLR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BLENDFACT_INV_DST_COLR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BLENDFACT_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BLENDFACT_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BLENDFACT_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BLENDFACT_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BLENDFACT_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BLENDFACT_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BLENDFACT_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BLENDFACT_INV_CONST_ALPHA; + default: + return BLENDFACT_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_func(unsigned mode) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BLENDFUNC_ADD; + case PIPE_BLEND_MIN: + return BLENDFUNC_MIN; + case PIPE_BLEND_MAX: + return BLENDFUNC_MAX; + case PIPE_BLEND_SUBTRACT: + return BLENDFUNC_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLENDFUNC_REVERSE_SUBTRACT; + default: + return 0; + } +} + + +static INLINE unsigned +i915_translate_logic_op(unsigned opcode) +{ + switch (opcode) { + case PIPE_LOGICOP_CLEAR: + return LOGICOP_CLEAR; + case PIPE_LOGICOP_AND: + return LOGICOP_AND; + case PIPE_LOGICOP_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case PIPE_LOGICOP_COPY: + return LOGICOP_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return LOGICOP_COPY_INV; + case PIPE_LOGICOP_AND_INVERTED: + return LOGICOP_AND_INV; + case PIPE_LOGICOP_NOOP: + return LOGICOP_NOOP; + case PIPE_LOGICOP_XOR: + return LOGICOP_XOR; + case PIPE_LOGICOP_OR: + return LOGICOP_OR; + case PIPE_LOGICOP_OR_INVERTED: + return LOGICOP_OR_INV; + case PIPE_LOGICOP_NOR: + return LOGICOP_NOR; + case PIPE_LOGICOP_EQUIV: + return LOGICOP_EQUIV; + case PIPE_LOGICOP_INVERT: + return LOGICOP_INV; + case PIPE_LOGICOP_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case PIPE_LOGICOP_NAND: + return LOGICOP_NAND; + case PIPE_LOGICOP_SET: + return LOGICOP_SET; + default: + return LOGICOP_SET; + } +} + + + +static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr ) +{ + boolean ok; + + switch (hw_prim) { + case PRIM3D_POINTLIST: + ok = (nr >= 1); + assert(ok); + break; + case PRIM3D_LINELIST: + ok = (nr >= 2) && (nr % 2) == 0; + assert(ok); + break; + case PRIM3D_LINESTRIP: + ok = (nr >= 2); + assert(ok); + break; + case PRIM3D_TRILIST: + ok = (nr >= 3) && (nr % 3) == 0; + assert(ok); + break; + case PRIM3D_TRISTRIP: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_TRIFAN: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_POLY: + ok = (nr >= 3); + assert(ok); + break; + default: + assert(0); + ok = 0; + break; + } + + return ok; +} + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c new file mode 100644 index 00000000000..9c1a5bbbd65 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -0,0 +1,231 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" + + +/** + * Compute i915 texture sampling state. + * + * Recalculate all state from scratch. Perhaps not the most + * efficient, but this has gotten complex enough that we need + * something which is understandable and reliable. + * \param state returns the 3 words of compute state + */ +static void update_sampler(struct i915_context *i915, + uint unit, + const struct i915_sampler_state *sampler, + const struct i915_texture *tex, + unsigned state[3] ) +{ + const struct pipe_texture *pt = &tex->base; + + /* Need to do this after updating the maps, which call the + * intel_finalize_mipmap_tree and hence can update firstLevel: + */ + state[0] = sampler->state[0]; + state[1] = sampler->state[1]; + state[2] = sampler->state[2]; + + if (pt->format == PIPE_FORMAT_YCBCR || + pt->format == PIPE_FORMAT_YCBCR_REV) + state[0] |= SS2_COLORSPACE_CONVERSION; + + /* 3D textures don't seem to respect the border color. + * Fallback if there's ever a danger that they might refer to + * it. + * + * Effectively this means fallback on 3D clamp or + * clamp_to_border. + * + * XXX: Check if this is true on i945. + * XXX: Check if this bug got fixed in release silicon. + */ +#if 0 + { + const unsigned ws = sampler->templ->wrap_s; + const unsigned wt = sampler->templ->wrap_t; + const unsigned wr = sampler->templ->wrap_r; + if (pt->target == PIPE_TEXTURE_3D && + (sampler->templ->min_img_filter != PIPE_TEX_FILTER_NEAREST || + sampler->templ->mag_img_filter != PIPE_TEX_FILTER_NEAREST) && + (ws == PIPE_TEX_WRAP_CLAMP || + wt == PIPE_TEX_WRAP_CLAMP || + wr == PIPE_TEX_WRAP_CLAMP || + ws == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wt == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { + if (i915->strict_conformance) { + assert(0); + /* sampler->fallback = true; */ + /* TODO */ + } + } + } +#endif + + state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); +} + + +void i915_update_samplers( struct i915_context *i915 ) +{ + uint unit; + + i915->current.sampler_enable_nr = 0; + i915->current.sampler_enable_flags = 0x0; + + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + update_sampler( i915, + unit, + i915->sampler[unit], /* sampler state */ + i915->texture[unit], /* texture */ + i915->current.sampler[unit] /* the result */ + ); + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); + } + } + + i915->hardware_dirty |= I915_HW_SAMPLER; +} + + +static uint +translate_texture_format(enum pipe_format pipeFormat) +{ + switch (pipeFormat) { + case PIPE_FORMAT_U_L8: + return MAPSURF_8BIT | MT_8BIT_L8; + case PIPE_FORMAT_U_I8: + return MAPSURF_8BIT | MT_8BIT_I8; + case PIPE_FORMAT_U_A8: + return MAPSURF_8BIT | MT_8BIT_A8; + case PIPE_FORMAT_U_A8_L8: + return MAPSURF_16BIT | MT_16BIT_AY88; + case PIPE_FORMAT_R5G6B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_RGB565; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB1555; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB4444; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return MAPSURF_32BIT | MT_32BIT_ARGB8888; + case PIPE_FORMAT_YCBCR_REV: + return (MAPSURF_422 | MT_422_YCRCB_NORMAL); + case PIPE_FORMAT_YCBCR: + return (MAPSURF_422 | MT_422_YCRCB_SWAPY); +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); +#endif + case PIPE_FORMAT_Z16_UNORM: + return (MAPSURF_16BIT | MT_16BIT_L16); +#if 0 + case PIPE_FORMAT_RGBA_DXT1: + case PIPE_FORMAT_RGB_DXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); + case PIPE_FORMAT_RGBA_DXT3: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); + case PIPE_FORMAT_RGBA_DXT5: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); +#endif + case PIPE_FORMAT_S8Z24_UNORM: + return (MAPSURF_32BIT | MT_32BIT_xL824); + default: + debug_printf("i915: translate_texture_format() bad image format %x\n", + pipeFormat); + assert(0); + return 0; + } +} + + +static void +i915_update_texture(struct i915_context *i915, uint unit, + uint state[6]) +{ + const struct i915_texture *tex = i915->texture[unit]; + const struct pipe_texture *pt = &tex->base; + uint format, pitch; + const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + const uint num_levels = pt->last_level; + + assert(tex); + assert(width); + assert(height); + assert(depth); + + format = translate_texture_format(pt->format); + pitch = tex->pitch * pt->cpp; + + assert(format); + assert(pitch); + + /* MS3 state */ + state[0] = + (((height - 1) << MS3_HEIGHT_SHIFT) + | ((width - 1) << MS3_WIDTH_SHIFT) + | format + | MS3_USE_FENCE_REGS); + + /* MS4 state */ + state[1] = + ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) + | MS4_CUBE_FACE_ENA_MASK + | ((num_levels * 4) << MS4_MAX_LOD_SHIFT) + | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); +} + + +void +i915_update_textures(struct i915_context *i915) +{ + uint unit; + + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + i915_update_texture(i915, unit, i915->current.texbuffer[unit]); + } + } + + i915->hardware_dirty |= I915_HW_MAP; +} diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c new file mode 100644 index 00000000000..c713bf72086 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_strings.c @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_reg.h" + + +static const char *i915_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char *i915_get_name( struct pipe_context *pipe ) +{ + static char buffer[128]; + const char *chipset; + + switch (i915_context(pipe)->pci_id) { + case PCI_CHIP_I915_G: + chipset = "915G"; + break; + case PCI_CHIP_I915_GM: + chipset = "915GM"; + break; + case PCI_CHIP_I945_G: + chipset = "945G"; + break; + case PCI_CHIP_I945_GM: + chipset = "945GM"; + break; + case PCI_CHIP_I945_GME: + chipset = "945GME"; + break; + case PCI_CHIP_G33_G: + chipset = "G33"; + break; + case PCI_CHIP_Q35_G: + chipset = "Q35"; + break; + case PCI_CHIP_Q33_G: + chipset = "Q33"; + break; + default: + chipset = "unknown"; + break; + } + + sprintf(buffer, "pipe/i915 (chipset: %s)", chipset); + return buffer; +} + + +void +i915_init_string_functions(struct i915_context *i915) +{ + i915->pipe.get_name = i915_get_name; + i915->pipe.get_vendor = i915_get_vendor; +} diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c new file mode 100644 index 00000000000..de0cc5fe06a --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -0,0 +1,191 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_blit.h" +#include "i915_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" + + +/* + * XXX note: same as code in sp_surface.c + */ +static struct pipe_surface * +i915_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct i915_texture *tex = (struct i915_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face] * pt->cpp; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice] * pt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = tex->pitch; + ps->offset = offset; + } + return ps; +} + + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +i915_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert( dst != src ); + assert( dst->cpp == src->cpp ); + + if (0) { + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src), + do_flip ? -(int) src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); + } + else { + i915_copy_blit( i915_context(pipe), + do_flip, + dst->cpp, + (short) src->pitch, src->buffer, src->offset, + (short) dst->pitch, dst->buffer, dst->offset, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); + } +} + +/* Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +static void +i915_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + if (0) { + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + switch (dst->cpp) { + case 1: { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); + } + else { + i915_fill_blit( i915_context(pipe), + dst->cpp, + (short) dst->pitch, + dst->buffer, dst->offset, + (short) dstx, (short) dsty, + (short) width, (short) height, + value ); + } +} + + +void +i915_init_surface_functions(struct i915_context *i915) +{ + i915->pipe.get_tex_surface = i915_get_tex_surface; + + i915->pipe.surface_copy = i915_surface_copy; + i915->pipe.surface_fill = i915_surface_fill; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c new file mode 100644 index 00000000000..6d37ae3d747 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -0,0 +1,536 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "i915_context.h" +#include "i915_texture.h" +#include "i915_debug.h" + + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + + +static void +i915_miptree_set_level_info(struct i915_texture *tex, + unsigned level, + unsigned nr_images, + unsigned x, unsigned y, unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + tex->level_offset[level] = (x + y * tex->pitch) * pt->cpp; + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + + +static void +i915_miptree_set_image_offset(struct i915_texture *tex, + unsigned level, unsigned img, unsigned x, unsigned y) +{ + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = (x + y * tex->pitch); + + /* + DBG("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); + */ +} + + +static void +i945_miptree_layout_2d( struct i915_texture *tex ) +{ + struct pipe_texture *pt = &tex->base; + int align_h = 2, align_w = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + + tex->pitch = pt->width[0]; + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap level. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap level out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_width = align_int(minify(pt->width[0]), align_w) + + minify(minify(pt->width[0])); + + if (mip1_width > pt->width[0]) + tex->pitch = mip1_width; + } + + /* Pitch must be a whole number of dwords, even though we + * express it in texels. + */ + tex->pitch = align_int(tex->pitch * pt->cpp, 4) / pt->cpp; + tex->total_height = 0; + + for (level = 0; level <= pt->last_level; level++) { + unsigned img_height; + + i915_miptree_set_level_info(tex, level, 1, x, y, width, height, 1); + + if (pt->compressed) + img_height = MAX2(1, height/4); + else + img_height = align_int(height, align_h); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_height = MAX2(tex->total_height, y + img_height); + + /* Layout_below: step right after second mipmap level. + */ + if (level == 1) { + x += align_int(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } +} + + +static const int initial_offsets[6][2] = { + {0, 0}, + {0, 2}, + {1, 0}, + {1, 2}, + {1, 1}, + {1, 3} +}; + +static const int step_offsets[6][2] = { + {0, 2}, + {0, 2}, + {-1, 2}, + {-1, 2}, + {-1, 1}, + {-1, 1} +}; + + +static boolean +i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: { + const unsigned dim = pt->width[0]; + unsigned face; + unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + + assert(lvlWidth == lvlHeight); /* cubemap images are square */ + + /* double pitch for cube layouts */ + tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; + tex->total_height = dim * 4; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, + 0, 0, + /*OLD: tex->pitch, tex->total_height,*/ + lvlWidth, lvlHeight, + 1); + lvlWidth /= 2; + lvlHeight /= 2; + } + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * dim; + unsigned y = initial_offsets[face][1] * dim; + unsigned d = dim; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + d >>= 1; + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + } + } + break; + } + case PIPE_TEXTURE_3D:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned stack_height = 0; + + /* Calculate the size of a single slice. + */ + tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; + + /* XXX: hardware expects/requires 9 levels at minimum. + */ + for (level = 0; level <= MAX2(8, pt->last_level); + level++) { + i915_miptree_set_level_info(tex, level, depth, 0, tex->total_height, + width, height, depth); + + + stack_height += MAX2(2, height); + + width = minify(width); + height = minify(height); + depth = minify(depth); + } + + /* Fixup depth image_offsets: + */ + depth = pt->depth[0]; + for (level = 0; level <= pt->last_level; level++) { + unsigned i; + for (i = 0; i < depth; i++) + i915_miptree_set_image_offset(tex, level, i, + 0, i * stack_height); + + depth = minify(depth); + } + + + /* Multiply slice size by texture depth for total size. It's + * remarkable how wasteful of memory the i915 texture layouts + * are. They are largely fixed in the i945. + */ + tex->total_height = stack_height * pt->depth[0]; + break; + } + + default:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned img_height; + + tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; + tex->total_height = 0; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 1, + 0, tex->total_height, + width, height, 1); + + if (pt->compressed) + img_height = MAX2(1, height / 4); + else + img_height = (MAX2(2, height) + 1) & ~1; + + tex->total_height += img_height; + + width = minify(width); + height = minify(height); + } + break; + } + } + /* + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_height, pt->cpp, tex->pitch * tex->total_height * pt->cpp); + */ + + return TRUE; +} + + +static boolean +i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE:{ + const unsigned dim = pt->width[0]; + unsigned face; + unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + + assert(lvlWidth == lvlHeight); /* cubemap images are square */ + + /* Depending on the size of the largest images, pitch can be + * determined either by the old-style packing of cubemap faces, + * or the final row of 4x4, 2x2 and 1x1 faces below this. + */ + if (dim > 32) + tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; + else + tex->pitch = 14 * 8; + + tex->total_height = dim * 4 + 4; + + /* Set all the levels to effectively occupy the whole rectangular region. + */ + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, + 0, 0, + lvlWidth, lvlHeight, 1); + lvlWidth /= 2; + lvlHeight /= 2; + } + + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * dim; + unsigned y = initial_offsets[face][1] * dim; + unsigned d = dim; + + if (dim == 4 && face >= 4) { + y = tex->total_height - 4; + x = (face - 4) * 8; + } + else if (dim < 4 && (face > 0)) { + y = tex->total_height - 4; + x = face * 8; + } + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + + d >>= 1; + + switch (d) { + case 4: + switch (face) { + case PIPE_TEX_FACE_POS_X: + case PIPE_TEX_FACE_NEG_X: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + case PIPE_TEX_FACE_POS_Y: + case PIPE_TEX_FACE_NEG_Y: + y += 12; + x -= 8; + break; + case PIPE_TEX_FACE_POS_Z: + case PIPE_TEX_FACE_NEG_Z: + y = tex->total_height - 4; + x = (face - 4) * 8; + break; + } + + case 2: + y = tex->total_height - 4; + x = 16 + face * 8; + break; + + case 1: + x += 48; + break; + + default: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + } + } + } + break; + } + case PIPE_TEXTURE_3D:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + unsigned level; + + tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; + tex->total_height = 0; + + pack_y_pitch = MAX2(pt->height[0], 2); + pack_x_pitch = tex->pitch; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; + int x = 0; + int y = 0; + unsigned q, j; + + i915_miptree_set_level_info(tex, level, nr_images, + 0, tex->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + i915_miptree_set_image_offset(tex, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_height += y; + + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= tex->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + } + + width = minify(width); + height = minify(height); + depth = minify(depth); + } + break; + } + + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: +// case PIPE_TEXTURE_RECTANGLE: + i945_miptree_layout_2d(tex); + break; + default: + assert(0); + return FALSE; + } + + /* + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_height, pt->cpp, tex->pitch * tex->total_height * pt->cpp); + */ + + return TRUE; +} + + +struct pipe_texture * +i915_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) +{ + struct i915_texture *tex = CALLOC_STRUCT(i915_texture); + + if (tex) { + struct i915_context *i915 = i915_context(pipe); + + tex->base = *templat; + + if (i915->flags.is_i945 ? i945_miptree_layout(pipe, tex) : + i915_miptree_layout(pipe, tex)) + tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + return &tex->base; +} + + +void +i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct i915_texture *tex = (struct i915_texture *)*pt; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + pipe_buffer_reference(pipe->winsys, &tex->buffer, NULL); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + FREE(tex->image_offset[i]); + + FREE(tex); + } + *pt = NULL; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h new file mode 100644 index 00000000000..330d111dc78 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -0,0 +1,17 @@ + +#ifndef I915_TEXTURE_H +#define I915_TEXTURE_H + +struct pipe_context; +struct pipe_texture; + + +struct pipe_texture * +i915_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); + +extern void +i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + + +#endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h new file mode 100644 index 00000000000..fe49710852b --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -0,0 +1,115 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i915simple requires any window system + * hosting it to implement. This is the only include file in i915simple + * which is public. + * + */ + +#ifndef I915_WINSYS_H +#define I915_WINSYS_H + + +#include "pipe/p_defines.h" + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct pipe_buffer; +struct pipe_winsys; + + +/** + * Additional winsys interface for i915simple. + * + * It is an over-simple batchbuffer mechanism. Will want to improve the + * performance of this, perhaps based on the cmdstream stuff. It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct i915_winsys { + + /** + * Reserve space on batch buffer. + * + * Returns a null pointer if there is insufficient space in the batch buffer + * to hold the requested number of dwords and relocations. + * + * The number of dwords should also include the number of relocations. + */ + unsigned *(*batch_start)( struct i915_winsys *sws, + unsigned dwords, + unsigned relocs ); + + void (*batch_dword)( struct i915_winsys *sws, + unsigned dword ); + + /** + * Emit a relocation to a buffer. + * + * Used not only when the buffer addresses are not pinned, but also to + * ensure refered buffers will not be destroyed until the current batch + * buffer execution is finished. + * + * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and + * I915_BUFFER_ACCESS_READ macros. + */ + void (*batch_reloc)( struct i915_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta ); + + void (*batch_flush)( struct i915_winsys *sws ); + void (*batch_finish)( struct i915_winsys *sws ); +}; + +#define I915_BUFFER_ACCESS_WRITE 0x1 +#define I915_BUFFER_ACCESS_READ 0x2 + +#define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *i915_create( struct pipe_winsys *, + struct i915_winsys *, + unsigned pci_id ); + + +#endif diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile new file mode 100644 index 00000000000..48c00ab50b8 --- /dev/null +++ b/src/gallium/drivers/i965simple/Makefile @@ -0,0 +1,66 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i965simple + +DRIVER_SOURCES = \ + brw_blit.c \ + brw_flush.c \ + brw_strings.c \ + brw_surface.c \ + brw_cc.c \ + brw_clip.c \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_util.c \ + brw_context.c \ + brw_curbe.c \ + brw_draw.c \ + brw_draw_upload.c \ + brw_eu.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_gs.c \ + brw_gs_emit.c \ + brw_gs_state.c \ + brw_misc_state.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf_state.c \ + brw_shader_info.c \ + brw_state.c \ + brw_state_batch.c \ + brw_state_cache.c \ + brw_state_pool.c \ + brw_state_upload.c \ + brw_tex_layout.c \ + brw_urb.c \ + brw_util.c \ + brw_vs.c \ + brw_vs_emit.c \ + brw_vs_state.c \ + brw_wm.c \ + brw_wm_iz.c \ + brw_wm_decl.c \ + brw_wm_glsl.c \ + brw_wm_sampler_state.c \ + brw_wm_state.c \ + brw_wm_surface_state.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(COMMON_BM_SOURCES) \ + $(MINIGLX_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +DRIVER_DEFINES = -I. + +include ../Makefile.template + +symlinks: diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript new file mode 100644 index 00000000000..74621de84c9 --- /dev/null +++ b/src/gallium/drivers/i965simple/SConscript @@ -0,0 +1,55 @@ +Import('*') + +env = env.Clone() + +i965simple = env.ConvenienceLibrary( + target = 'i965simple', + source = [ + 'brw_blit.c', + 'brw_cc.c', + 'brw_clip.c', + 'brw_clip_line.c', + 'brw_clip_point.c', + 'brw_clip_state.c', + 'brw_clip_tri.c', + 'brw_clip_util.c', + 'brw_context.c', + 'brw_curbe.c', + 'brw_draw.c', + 'brw_draw_upload.c', + 'brw_eu.c', + 'brw_eu_debug.c', + 'brw_eu_emit.c', + 'brw_eu_util.c', + 'brw_flush.c', + 'brw_gs.c', + 'brw_gs_emit.c', + 'brw_gs_state.c', + 'brw_misc_state.c', + 'brw_sf.c', + 'brw_sf_emit.c', + 'brw_sf_state.c', + 'brw_shader_info.c', + 'brw_state.c', + 'brw_state_batch.c', + 'brw_state_cache.c', + 'brw_state_pool.c', + 'brw_state_upload.c', + 'brw_strings.c', + 'brw_surface.c', + 'brw_tex_layout.c', + 'brw_urb.c', + 'brw_util.c', + 'brw_vs.c', + 'brw_vs_emit.c', + 'brw_vs_state.c', + 'brw_wm.c', + 'brw_wm_decl.c', + 'brw_wm_glsl.c', + 'brw_wm_iz.c', + 'brw_wm_sampler_state.c', + 'brw_wm_state.c', + 'brw_wm_surface_state.c', + ]) + +Export('i965simple') diff --git a/src/gallium/drivers/i965simple/brw_batch.h b/src/gallium/drivers/i965simple/brw_batch.h new file mode 100644 index 00000000000..5f5932a4883 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_batch.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_BATCH_H +#define BRW_BATCH_H + +#include "brw_winsys.h" + +#define BATCH_LOCALS + +#define INTEL_BATCH_NO_CLIPRECTS 0x1 +#define INTEL_BATCH_CLIPRECTS 0x2 + +#define BEGIN_BATCH( dwords, relocs ) \ + brw->winsys->batch_start(brw->winsys, dwords, relocs) + +#define OUT_BATCH( dword ) \ + brw->winsys->batch_dword(brw->winsys, dword) + +#define OUT_RELOC( buf, flags, delta ) \ + brw->winsys->batch_reloc(brw->winsys, buf, flags, delta) + +#define ADVANCE_BATCH() \ + brw->winsys->batch_end( brw->winsys ) + +/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer. + */ +#define FLUSH_BATCH(fence) do { \ + brw->winsys->batch_flush(brw->winsys, fence); \ + brw->hardware_dirty = ~0; \ +} while (0) + +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s))) + +#endif diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c new file mode 100644 index 00000000000..8494f70493c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <errno.h> + +#include "brw_batch.h" +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void brw_fill_blit(struct brw_context *brw, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + BATCH_LOCALS; + + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (0xF0 << 16) | (1<<24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = (0xF0 << 16) | (1<<24) | (1<<25); + CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (y << 16) | x ); + OUT_BATCH( ((y+h) << 16) | (x+w) ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset ); + OUT_BATCH( color ); + ADVANCE_BATCH(); +} + +static unsigned translate_raster_op(unsigned logicop) +{ + switch(logicop) { + case PIPE_LOGICOP_CLEAR: return 0x00; + case PIPE_LOGICOP_AND: return 0x88; + case PIPE_LOGICOP_AND_REVERSE: return 0x44; + case PIPE_LOGICOP_COPY: return 0xCC; + case PIPE_LOGICOP_AND_INVERTED: return 0x22; + case PIPE_LOGICOP_NOOP: return 0xAA; + case PIPE_LOGICOP_XOR: return 0x66; + case PIPE_LOGICOP_OR: return 0xEE; + case PIPE_LOGICOP_NOR: return 0x11; + case PIPE_LOGICOP_EQUIV: return 0x99; + case PIPE_LOGICOP_INVERT: return 0x55; + case PIPE_LOGICOP_OR_REVERSE: return 0xDD; + case PIPE_LOGICOP_COPY_INVERTED: return 0x33; + case PIPE_LOGICOP_OR_INVERTED: return 0xBB; + case PIPE_LOGICOP_NAND: return 0x77; + case PIPE_LOGICOP_SET: return 0xFF; + default: return 0; + } +} + + +/* Copy BitBlt + */ +void brw_copy_blit(struct brw_context *brw, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + boolean src_tiled, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h, + unsigned logic_op) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + BATCH_LOCALS; + + + DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_x, src_y, + dst_buffer, dst_pitch, dst_x, dst_y, + w,h,logic_op); + + assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 ); + assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 ); + + src_pitch *= cpp; + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | + (1<<25); + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + + if (src_tiled) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + dst_pitch &= 0xffff; + src_pitch &= 0xffff; + + /* Initial y values don't seem to work with negative pitches. If + * we adjust the offsets manually (below), it seems to work fine. + * + * On the other hand, if we always adjust, the hardware doesn't + * know which blit directions to use, so overlapping copypixels get + * the wrong result. + */ + if (dst_pitch > 0 && src_pitch > 0) { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (dst_y << 16) | dst_x ); + OUT_BATCH( (dst_y2 << 16) | dst_x2 ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, + dst_offset ); + OUT_BATCH( (src_y << 16) | src_x ); + OUT_BATCH( src_pitch ); + OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, + src_offset ); + ADVANCE_BATCH(); + } + else { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); + OUT_BATCH( (0 << 16) | dst_x ); + OUT_BATCH( (h << 16) | dst_x2 ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, + dst_offset + dst_y * dst_pitch ); + OUT_BATCH( (src_pitch & 0xffff) ); + OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, + src_offset + src_y * src_pitch ); + ADVANCE_BATCH(); + } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_blit.h b/src/gallium/drivers/i965simple/brw_blit.h new file mode 100644 index 00000000000..111c5d91d39 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.h @@ -0,0 +1,33 @@ +#ifndef BRW_BLIT_H +#define BRW_BLIT_H + +#include "pipe/p_compiler.h" + +struct pipe_buffer; +struct brw_context; + +void brw_fill_blit(struct brw_context *intel, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short x, short y, + short w, short h, + unsigned color); +void brw_copy_blit(struct brw_context *intel, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + boolean src_tiled, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h, + unsigned logic_op); +#endif diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c new file mode 100644 index 00000000000..337e4f95f69 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -0,0 +1,269 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" + + +static int brw_translate_compare_func(int func) +{ + switch(func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_ALWAYS; + } + + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); + return BRW_COMPAREFUNCTION_ALWAYS; +} + +static int brw_translate_stencil_op(int op) +{ + switch(op) { + case PIPE_STENCIL_OP_KEEP: + return BRW_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return BRW_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return BRW_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return BRW_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return BRW_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return BRW_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return BRW_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return BRW_STENCILOP_INVERT; + default: + return BRW_STENCILOP_ZERO; + } +} + + +static int brw_translate_logic_op(int opcode) +{ + switch(opcode) { + case PIPE_LOGICOP_CLEAR: + return BRW_LOGICOPFUNCTION_CLEAR; + case PIPE_LOGICOP_AND: + return BRW_LOGICOPFUNCTION_AND; + case PIPE_LOGICOP_AND_REVERSE: + return BRW_LOGICOPFUNCTION_AND_REVERSE; + case PIPE_LOGICOP_COPY: + return BRW_LOGICOPFUNCTION_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return BRW_LOGICOPFUNCTION_COPY_INVERTED; + case PIPE_LOGICOP_AND_INVERTED: + return BRW_LOGICOPFUNCTION_AND_INVERTED; + case PIPE_LOGICOP_NOOP: + return BRW_LOGICOPFUNCTION_NOOP; + case PIPE_LOGICOP_XOR: + return BRW_LOGICOPFUNCTION_XOR; + case PIPE_LOGICOP_OR: + return BRW_LOGICOPFUNCTION_OR; + case PIPE_LOGICOP_OR_INVERTED: + return BRW_LOGICOPFUNCTION_OR_INVERTED; + case PIPE_LOGICOP_NOR: + return BRW_LOGICOPFUNCTION_NOR; + case PIPE_LOGICOP_EQUIV: + return BRW_LOGICOPFUNCTION_EQUIV; + case PIPE_LOGICOP_INVERT: + return BRW_LOGICOPFUNCTION_INVERT; + case PIPE_LOGICOP_OR_REVERSE: + return BRW_LOGICOPFUNCTION_OR_REVERSE; + case PIPE_LOGICOP_NAND: + return BRW_LOGICOPFUNCTION_NAND; + case PIPE_LOGICOP_SET: + return BRW_LOGICOPFUNCTION_SET; + default: + return BRW_LOGICOPFUNCTION_SET; + } +} + + +static void upload_cc_vp( struct brw_context *brw ) +{ + struct brw_cc_viewport ccv; + + memset(&ccv, 0, sizeof(ccv)); + + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + + brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); +} + +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_cc_vp +}; + + +static void upload_cc_unit( struct brw_context *brw ) +{ + struct brw_cc_unit_state cc; + + memset(&cc, 0, sizeof(cc)); + + /* BRW_NEW_DEPTH_STENCIL */ + if (brw->attribs.DepthStencil->stencil[0].enabled) { + cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil[0].enabled; + cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil[0].func); + cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil[0].fail_op); + cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[0].zfail_op); + cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[0].zpass_op); + cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value; + cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].write_mask; + cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].value_mask; + + if (brw->attribs.DepthStencil->stencil[1].enabled) { + cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled; + cc.cc0.bf_stencil_func = brw_translate_compare_func( + brw->attribs.DepthStencil->stencil[1].func); + cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].fail_op); + cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].zfail_op); + cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].zpass_op); + cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value; + cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].write_mask; + cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].value_mask; + } + + /* Not really sure about this: + */ + if (brw->attribs.DepthStencil->stencil[0].write_mask || + brw->attribs.DepthStencil->stencil[1].write_mask) + cc.cc0.stencil_write_enable = 1; + } + + /* BRW_NEW_BLEND */ + if (brw->attribs.Blend->logicop_enable) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func ); + } + else if (brw->attribs.Blend->blend_enable) { + int eqRGB = brw->attribs.Blend->rgb_func; + int eqA = brw->attribs.Blend->alpha_func; + int srcRGB = brw->attribs.Blend->rgb_src_factor; + int dstRGB = brw->attribs.Blend->rgb_dst_factor; + int srcA = brw->attribs.Blend->alpha_src_factor; + int dstA = brw->attribs.Blend->alpha_dst_factor; + + if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) { + srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE; + } + + if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) { + srcA = dstA = PIPE_BLENDFACTOR_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + /* BRW_NEW_ALPHATEST + */ + if (brw->attribs.DepthStencil->alpha.enabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = + brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], + brw->attribs.DepthStencil->alpha.ref); + + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + } + + if (brw->attribs.Blend->dither) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + + if (brw->attribs.DepthStencil->depth.enabled) { + cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled; + cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func); + cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask; + } + + /* CACHE_NEW_CC_VP */ + cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; + + if (BRW_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; + + brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); +} + +const struct brw_tracked_state brw_cc_unit = { + .dirty = { + .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST, + .cache = CACHE_NEW_CC_VP + }, + .update = upload_cc_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_clip.c b/src/gallium/drivers/i965simple/brw_clip.c new file mode 100644 index 00000000000..268124cc53f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.c @@ -0,0 +1,206 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_clip.h" + +#define FRONT_UNFILLED_BIT 0x1 +#define BACK_UNFILLED_BIT 0x2 + + +static void compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + struct brw_clip_compile c; + const unsigned *program; + unsigned program_size; + unsigned delta; + unsigned i; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.func.single_program_flow = 1; + + c.key = *key; + + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.header_position_offset = ATTR_SIZE; + + for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++) + if (c.key.attrs & (1<<i)) { + c.offset[i] = delta; + delta += ATTR_SIZE; + } + + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Would ideally have the option of producing a program which could + * do all three: + */ + switch (key->primitive) { + case PIPE_PRIM_TRIANGLES: +#if 0 + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else +#endif + brw_emit_tri_clip( &c ); + break; + case PIPE_PRIM_LINES: + brw_emit_line_clip( &c ); + break; + case PIPE_PRIM_POINTS: + brw_emit_point_clip( &c ); + break; + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->clip.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_CLIP_PROG], + key, sizeof(*key), + &brw->clip.prog_data, + &brw->clip.prog_gs_offset); +} + + + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_clip_prog(struct brw_context *brw) +{ + struct brw_clip_prog_key key; + + memset(&key, 0, sizeof(key)); + + /* Populate the key: + */ + /* BRW_NEW_REDUCED_PRIMITIVE */ + key.primitive = brw->reduced_primitive; + /* CACHE_NEW_VS_PROG */ + key.attrs = brw->vs.prog_data->outputs_written; + /* BRW_NEW_RASTER */ + key.do_flat_shading = (brw->attribs.Raster->flatshade); + /* BRW_NEW_CLIP */ + key.nr_userclip = brw->attribs.Clip.nr; /* XXX */ + +#if 0 + key.clip_mode = BRW_CLIPMODE_NORMAL; + + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH) + key.clip_mode = BRW_CLIPMODE_REJECT_ALL; + else { + if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || + brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL) + key.do_unfilled = 1; + + /* Most cases the fixed function units will handle. Cases where + * one or more polygon faces are unfilled will require help: + */ + if (key.do_unfilled) { + key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + + if (brw->attribs.Raster->offset_cw || + brw->attribs.Raster->offset_ccw) { + key.offset_units = brw->attribs.Raster->offset_units; + key.offset_factor = brw->attribs.Raster->offset_scale; + } + key.fill_ccw = brw->attribs.Raster->fill_ccw; + key.fill_cw = brw->attribs.Raster->fill_cw; + key.offset_ccw = brw->attribs.Raster->offset_ccw; + key.offset_cw = brw->attribs.Raster->offset_cw; + if (brw->attribs.Raster->light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; + } + } + } +#else + key.clip_mode = BRW_CLIPMODE_ACCEPT_ALL; +#endif + + if (!search_cache(brw, &key)) + compile_clip_prog( brw, &key ); +} + +const struct brw_tracked_state brw_clip_prog = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_CLIP | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_clip_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h new file mode 100644 index 00000000000..a89d08b7910 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.h @@ -0,0 +1,170 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6) + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + unsigned attrs:32; + unsigned primitive:4; + unsigned nr_userclip:3; + unsigned do_flat_shading:1; + unsigned do_unfilled:1; + unsigned fill_cw:2; /* includes cull information */ + unsigned fill_ccw:2; /* includes cull information */ + unsigned offset_cw:1; + unsigned offset_ccw:1; + unsigned pad0:17; + + unsigned copy_bfc_cw:1; + unsigned copy_bfc_ccw:1; + unsigned clip_mode:3; + unsigned pad1:27; + + float offset_factor; + float offset_units; +}; + + +#define CLIP_LINE 0 +#define CLIP_POINT 1 +#define CLIP_FILL 2 +#define CLIP_CULL 3 + + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_compile func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + } reg; + + /* 3 different ways of expressing vertex size: + */ + unsigned nr_attrs; + unsigned nr_regs; + unsigned nr_bytes; + + unsigned first_tmp; + unsigned last_tmp; + + boolean need_direction; + + unsigned last_mrf; + + unsigned header_position_offset; + unsigned offset[PIPE_ATTRIB_MAX]; +}; + +#define ATTR_SIZE (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + unsigned nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + boolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + boolean allocate, + boolean eot, + unsigned header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, + unsigned to, unsigned from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_clip_line.c b/src/gallium/drivers/i965simple/brw_clip_line.c new file mode 100644 index 00000000000..75d9e5fcda2 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_line.c @@ -0,0 +1,245 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p<MAX_PLANES;p++) { + * if (clipmask & (1 << p)) { + * float dp0 = DOTPROD( vtx0, plane[p] ); + * float dp1 = DOTPROD( vtx1, plane[p] ); + * + * if (IS_NEGATIVE(dp1)) { + * float t = dp1 / (dp1 - dp0); + * if (t > t1) t1 = t; + * } else { + * float t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *is_negative; + struct brw_instruction *is_neg2; + struct brw_instruction *not_culled; + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + /* -ve rhw workaround */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + +#if 0 + /* dp = DP4(vtx->position, plane) + */ + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + + /* if (IS_NEGATIVE(dp1)) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); +#else + #warning "disabled" +#endif + is_negative = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + is_negative = brw_ELSE(p, is_negative); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_neg2); + } + brw_ENDIF(p, is_negative); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + not_culled = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); + + brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, not_culled); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + + if (c->key.do_flat_shading) + brw_clip_copy_colors(c, 0, 1); + + clip_and_emit_line(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_point.c b/src/gallium/drivers/i965simple/brw_clip_point.c new file mode 100644 index 00000000000..6fce7210d1b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_point.c @@ -0,0 +1,47 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c new file mode 100644 index 00000000000..ea5c05a2796 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_state.c @@ -0,0 +1,92 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "pipe/p_util.h" + + +static void upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_state clip; + + memset(&clip, 0, sizeof(clip)); + + /* CACHE_NEW_CLIP_PROG */ + clip.thread0.grf_reg_count = + align(brw->clip.prog_data->total_grf, 16) / 16 - 1; + clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; + clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; + clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; + clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; + + /* BRW_NEW_CURBE_OFFSETS */ + clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + + /* BRW_NEW_URB_FENCE */ + clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; + clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + clip.thread4.max_threads = 1; /* 2 threads */ + + if (BRW_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + + /* CONSTANT */ + clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip.thread1.single_program_flow = 1; + clip.thread3.dispatch_grf_start_reg = 1; + clip.thread3.urb_entry_read_offset = 0; + clip.clip5.userclip_enable_flags = 0x7f; + clip.clip5.userclip_must_clip = 1; + clip.clip5.guard_band_enable = 0; + clip.clip5.viewport_z_clip_enable = 1; + clip.clip5.viewport_xy_clip_enable = 1; + clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip6.clipper_viewport_state_ptr = 0; + clip.viewport_xmin = -1; + clip.viewport_xmax = 1; + clip.viewport_ymin = -1; + clip.viewport_ymax = 1; + + brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); +} + + +const struct brw_tracked_state brw_clip_unit = { + .dirty = { + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_CLIP_PROG + }, + .update = upload_clip_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_clip_tri.c b/src/gallium/drivers/i965simple/brw_clip_tri.c new file mode 100644 index 00000000000..c5da7b825e0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_tri.c @@ -0,0 +1,566 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + unsigned nr_verts ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->nr_attrs & 1) { + for (j = 0; j < 3; j++) { + unsigned delta = c->nr_attrs*16 + 32; + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + struct brw_instruction *is_rev; + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + is_rev = brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + is_rev = brw_ELSE(p, is_rev); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p, is_rev); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + is_poly = brw_ELSE(p, is_poly); + { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } + brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *vertex_loop; + struct brw_instruction *next_test; + struct brw_instruction *prev_test; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + vertex_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + /* IS_NEGATIVE(prev) */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + prev_test = brw_IF(p, BRW_EXECUTE_1); + { + /* IS_POSITIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + + } + prev_test = brw_ELSE(p, prev_test); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + /* IS_NEGATIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + } + brw_ENDIF(p, prev_test); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, vertex_loop); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + + /* && (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); +#else + #warning "disabled" +#endif +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop, *if_insn; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + + brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); + } + brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p, do_clip); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ +#if 0 + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + + /* test nearz, xmin, ymin plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +#else + #warning "disabled" +#endif +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + + /* if -ve rhw workaround bit is set, + do cliptest */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_unfilled.c b/src/gallium/drivers/i965simple/brw_clip_unfilled.c new file mode 100644 index 00000000000..b774a76dd63 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_unfilled.c @@ -0,0 +1,477 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0, negate(v2)); + brw_ADD(p, f, v1, negate(v2)); + + /* Take their crossproduct: + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + unsigned conditional; + + assert (!(c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL)); + + if (c->key.fill_ccw == CLIP_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + unsigned conditional; + + /* Do we have any colors to copy? + */ + if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && + !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) + return; + + /* In some wierd degnerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting wierd GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + unsigned i; + + for (i = 0; i < 3; i++) { + if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); + } + } + brw_ENDIF(p, ccw); +} + + + + +/* + float iz = 1.0 / dir.z; + float ac = dir.x * iz; + float bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), dir, get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_compile *p = &c->func; + struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); + struct brw_reg z = get_element(pos, 2); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + boolean do_offset) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_edge; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a seperate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_edge = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, draw_edge); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, + boolean do_offset ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_point; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_point = brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); + } + brw_ENDIF(p, draw_point); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + unsigned mode, + boolean do_offset ) +{ + switch (mode) { + case CLIP_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case CLIP_LINE: + emit_lines(c, do_offset); + break; + + case CLIP_POINT: + emit_points(c, do_offset); + break; + + case CLIP_CULL: + assert(0); + break; + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != CLIP_CULL && + c->key.fill_cw != CLIP_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + ccw = brw_ELSE(p, ccw); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p, ccw); + } + else if (c->key.fill_cw != CLIP_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != CLIP_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + + assert(c->offset[VERT_RESULT_EDGE]); + + if (c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p, do_clip); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} + + + diff --git a/src/gallium/drivers/i965simple/brw_clip_util.c b/src/gallium/drivers/i965simple/brw_clip_util.c new file mode 100644 index 00000000000..6d58ceafff3 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_util.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_compile *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + + release_tmp(c, tmp); +#else + #warning "disabled" +#endif +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + boolean force_edgeflag) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + unsigned i; + + /* Just copy the vertex header: + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + /* Iterate over each attribute (could be done in pairs?) + */ + for (i = 0; i < c->nr_attrs; i++) { + unsigned delta = i*16 + 32; + + if (delta == c->offset[VERT_RESULT_EDGE]) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } + else { + /* Interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + */ + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t0); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t0); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + } + } + + if (i & 1) { + unsigned delta = i*16 + 32; + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + release_tmp(c, tmp); + + /* Recreate the projected (NDC) coordinate in the new vertex + * header: + */ + brw_clip_project_vertex(c, dest_ptr ); +#else + #warning "disabled" +#endif +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + boolean allocate, + boolean eot, + unsigned header) +{ + struct brw_compile *p = &c->func; + unsigned start = c->last_mrf; + + assert(!(allocate && eot)); + + /* Cycle through mrf regs - probably futile as we have to wait for + * the allocation response anyway. Also, the order this function + * is invoked doesn't correspond to the order the instructions will + * be executed, so it won't have any effect in many cases. + */ +#if 0 + if (start + c->nr_regs + 1 >= MAX_MRF) + start = 0; + + c->last_mrf = start + c->nr_regs + 1; +#endif + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a seperate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a seperate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + start, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + eot, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + 0, /* allocate */ + 0, /* used */ + 0, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, + unsigned to, unsigned from ) +{ +#if 0 + struct brw_compile *p = &c->func; + + if (c->offset[VERT_RESULT_COL0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); + + if (c->offset[VERT_RESULT_COL1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); + + if (c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); +#else + #warning "disabled" +#endif +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c new file mode 100644 index 00000000000..5e58701e91c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -0,0 +1,245 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_vs.h" +#include "brw_tex_layout.h" +#include "brw_winsys.h" + +#include "pipe/p_winsys.h" +#include "pipe/p_context.h" +#include "pipe/p_util.h" + +/*************************************** + * Mesa's Driver Functions + ***************************************/ + +#ifndef BRW_DEBUG +int BRW_DEBUG = (0); +#endif + +static void brw_destroy(struct pipe_context *pipe) +{ + struct brw_context *brw = brw_context(pipe); + + FREE(brw); +} + +static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + int x, y, w, h; + /* FIXME: corny... */ + + x = 0; + y = 0; + w = ps->width; + h = ps->height; + + pipe->surface_fill(pipe, ps, x, y, w, h, clearValue); +} + + +static int +brw_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +brw_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + +static boolean +brw_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ +#if 0 + /* XXX: This is broken -- rewrite if still needed. */ + static const unsigned tex_supported[] = { + PIPE_FORMAT_U_R8_G8_B8_A8, + PIPE_FORMAT_U_A8_R8_G8_B8, + PIPE_FORMAT_U_R5_G6_B5, + PIPE_FORMAT_U_L8, + PIPE_FORMAT_U_A8, + PIPE_FORMAT_U_I8, + PIPE_FORMAT_U_L8_A8, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8_Z24, + }; + + + /* Actually a lot more than this - add later: + */ + static const unsigned render_supported[] = { + PIPE_FORMAT_U_A8_R8_G8_B8, + PIPE_FORMAT_U_R5_G6_B5, + }; + + /* + */ + static const unsigned z_stencil_supported[] = { + PIPE_FORMAT_U_Z16, + PIPE_FORMAT_U_Z32, + PIPE_FORMAT_S8_Z24, + }; + + switch (type) { + case PIPE_RENDER_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + case PIPE_TEX_FORMAT: + *numFormats = Elements(tex_supported); + return render_supported; + + case PIPE_Z_STENCIL_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + default: + *numFormats = 0; + return NULL; + } +#else + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + return TRUE; + default: + return FALSE; + }; + return FALSE; +#endif +} + + + + +struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys, + struct brw_winsys *brw_winsys, + unsigned pci_id) +{ + struct brw_context *brw; + + pipe_winsys->printf(pipe_winsys, + "%s: creating brw_context with pci id 0x%x\n", + __FUNCTION__, pci_id); + + brw = CALLOC_STRUCT(brw_context); + if (brw == NULL) + return NULL; + + brw->winsys = brw_winsys; + brw->pipe.winsys = pipe_winsys; + + brw->pipe.destroy = brw_destroy; + brw->pipe.is_format_supported = brw_is_format_supported; + brw->pipe.get_param = brw_get_param; + brw->pipe.get_paramf = brw_get_paramf; + brw->pipe.clear = brw_clear; + brw->pipe.texture_create = brw_texture_create; + brw->pipe.texture_release = brw_texture_release; + + brw_init_surface_functions(brw); + brw_init_state_functions(brw); + brw_init_flush_functions(brw); + brw_init_string_functions(brw); + brw_init_draw_functions( brw ); + + + brw_init_state( brw ); + + brw->pci_id = pci_id; + brw->dirty = ~0; + brw->hardware_dirty = ~0; + + memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); + + return &brw->pipe; +} + diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h new file mode 100644 index 00000000000..65664d853dd --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -0,0 +1,690 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT 0x1 +#define BRW_NEW_RASTERIZER 0x2 +#define BRW_NEW_FS 0x4 +#define BRW_NEW_BLEND 0x8 +#define BRW_NEW_CLIP 0x10 +#define BRW_NEW_SCISSOR 0x20 +#define BRW_NEW_STIPPLE 0x40 +#define BRW_NEW_FRAMEBUFFER 0x80 +#define BRW_NEW_ALPHA_TEST 0x100 +#define BRW_NEW_DEPTH_STENCIL 0x200 +#define BRW_NEW_SAMPLER 0x400 +#define BRW_NEW_TEXTURE 0x800 +#define BRW_NEW_CONSTANTS 0x1000 +#define BRW_NEW_VBO 0x2000 +#define BRW_NEW_VS 0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE 0x10000 +#define BRW_NEW_PSP 0x20000 +#define BRW_NEW_CURBE_OFFSETS 0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 +#define BRW_NEW_PRIMITIVE 0x100000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 +#define DEBUG_BATCH 0x80000 +#define DEBUG_BUFMGR 0x100000 +#define DEBUG_BLIT 0x200000 +#define DEBUG_REGION 0x400000 +#define DEBUG_MIPTREE 0x800000 + +#define DBG(...) do { \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +#define PRINT(...) do { \ + brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +struct brw_state_flags { + unsigned cache; + unsigned brw; +}; + + +struct brw_shader_info { + int nr_regs[8]; /* TGSI_FILE_* */ +}; + + + +struct brw_vertex_program { + struct pipe_shader_state program; + struct brw_shader_info info; + int id; +}; + + + +struct brw_fragment_program { + struct pipe_shader_state program; + struct brw_shader_info info; + + boolean UsesDepth; + boolean UsesKill; + boolean ComputesDepth; + int id; +}; + + + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + +struct brw_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + + unsigned first_curbe_grf; + unsigned total_grf; + unsigned total_scratch; + + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. + */ + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; +}; + +struct brw_sf_prog_data { + unsigned urb_read_length; + unsigned total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { + unsigned curb_read_length; /* user planes? */ + unsigned clip_mode; + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_gs_prog_data { + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_vs_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + unsigned total_grf; + unsigned outputs_written; + + unsigned inputs_read; + + unsigned max_const; + + float imm_buf[PIPE_MAX_CONSTANT][4]; + unsigned num_imm; + unsigned num_consts; + + /* Used for calculating urb partitions: + */ + unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct pipe_buffer *buffer; + + unsigned size; + unsigned offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + unsigned hash; + unsigned key_size; /* for variable-sized keys */ + const void *key; + + unsigned offset; /* offset within pool's buffer */ + unsigned data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + unsigned id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + unsigned size, n_items; + + unsigned key_size; /* for fixed-size keys */ + unsigned aux_size; + + unsigned last_addr; /* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + + + + +enum brw_mempool_id { + BRW_GS_POOL, + BRW_SS_POOL, + BRW_MAX_POOL +}; + + +struct brw_cached_batch_item { + struct header *header; + unsigned sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where PIPE_ATTRIB_MAX > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) + + + + +struct brw_vertex_info { + unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ +}; + + + + + +struct brw_context +{ + struct pipe_context pipe; + struct brw_winsys *winsys; + + unsigned primitive; + unsigned reduced_primitive; + + boolean emit_state_always; + + struct { + struct brw_state_flags dirty; + } state; + + + struct { + const struct pipe_blend_state *Blend; + const struct pipe_depth_stencil_alpha_state *DepthStencil; + const struct pipe_poly_stipple *PolygonStipple; + const struct pipe_rasterizer_state *Raster; + const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; + const struct brw_vertex_program *VertexProgram; + const struct brw_fragment_program *FragmentProgram; + + struct pipe_clip_state Clip; + struct pipe_blend_color BlendColor; + struct pipe_scissor_state Scissor; + struct pipe_viewport_state Viewport; + struct pipe_framebuffer_state FrameBuffer; + + const struct pipe_constant_buffer *Constants[2]; + const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; + } attribs; + + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; + + struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + + unsigned hardware_dirty; + unsigned dirty; + unsigned pci_id; + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + unsigned vsize; /* vertex size plus header in urb registers */ + unsigned csize; /* constant buffer size in urb registers */ + unsigned sfsize; /* setup data size in urb registers */ + + boolean constrained; + + unsigned nr_vs_entries; + unsigned nr_gs_entries; + unsigned nr_clip_entries; + unsigned nr_sf_entries; + unsigned nr_cs_entries; + +/* unsigned vs_size; */ +/* unsigned gs_size; */ +/* unsigned clip_size; */ +/* unsigned sf_size; */ +/* unsigned cs_size; */ + + unsigned vs_start; + unsigned gs_start; + unsigned clip_start; + unsigned sf_start; + unsigned cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + unsigned wm_start; + unsigned wm_size; + unsigned clip_start; + unsigned clip_size; + unsigned vs_start; + unsigned vs_size; + unsigned total_size; + + unsigned gs_offset; + + float *last_buf; + unsigned last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + boolean prog_active; + unsigned prog_gs_offset; + unsigned state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct pipe_setup_linkage linkage; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + +// struct brw_wm_compiler *compile_data; + + + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache + */ + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + unsigned render_surf; + unsigned nr_surfaces; + + unsigned max_threads; + struct pipe_buffer *scratch_buffer; + unsigned scratch_buffer_size; + + unsigned sampler_count; + unsigned sampler_gs_offset; + + struct brw_surface_binding_table bind; + unsigned bind_ss_offset; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } wm; + + + struct { + unsigned vp_gs_offset; + unsigned state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, + unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c new file mode 100644 index 00000000000..52bbd525c16 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -0,0 +1,368 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_batch.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "pipe/p_state.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#define FILE_DEBUG_FLAG DEBUG_FALLBACKS + +/* Partition the CURBE between the various users of constant values: + */ +static void calculate_curbe_offsets( struct brw_context *brw ) +{ + /* CACHE_NEW_WM_PROG */ + unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); + + /* BRW_NEW_VERTEX_PROGRAM */ + unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); + unsigned nr_clip_regs = 0; + unsigned total_regs; + +#if 0 + /* BRW_NEW_CLIP ? */ + if (brw->attribs.Transform->ClipPlanesEnabled) { + unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); + nr_clip_regs = align(nr_planes * 4, 16); + } +#endif + + + total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + + /* This can happen - what to do? Probably rather than falling + * back, the best thing to do is emit programs which code the + * constants as immediate values. Could do this either as a static + * cap on WM and VS, or adaptively. + * + * Unfortunately, this is currently dependent on the results of the + * program generation process (in the case of wm), so this would + * introduce the need to re-generate programs in the event of a + * curbe allocation failure. + */ + /* Max size is 32 - just large enough to + * hold the 128 parameters allowed by + * the fragment and vertex program + * api's. It's not clear what happens + * when both VP and FP want to use 128 + * parameters, though. + */ + assert(total_regs <= 32); + + /* Lazy resize: + */ + if (nr_fp_regs > brw->curbe.wm_size || + nr_vp_regs > brw->curbe.vs_size || + nr_clip_regs != brw->curbe.clip_size || + (total_regs < brw->curbe.total_size / 4 && + brw->curbe.total_size > 16)) { + + unsigned reg = 0; + + /* Calculate a new layout: + */ + reg = 0; + brw->curbe.wm_start = reg; + brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; + brw->curbe.clip_start = reg; + brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; + brw->curbe.vs_start = reg; + brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; + brw->curbe.total_size = reg; + +#if 0 + if (0) + DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); +#endif + + brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; + } +} + + +const struct brw_tracked_state brw_curbe_offsets = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_VS), + .cache = CACHE_NEW_WM_PROG + }, + .update = calculate_curbe_offsets +}; + + + +/* Define the number of curbes within CS's urb allocation. Multiple + * urb entries -> multiple curbes. These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +void brw_upload_constant_buffer_state(struct brw_context *brw) +{ + struct brw_constant_buffer_state cbs; + memset(&cbs, 0, sizeof(cbs)); + + /* It appears that this is the state packet for the CS unit, ie. the + * urb entries detailed here are housed in the CS range from the + * URB_FENCE command. + */ + cbs.header.opcode = CMD_CONST_BUFFER_STATE; + cbs.header.length = sizeof(cbs)/4 - 2; + + /* BRW_NEW_URB_FENCE */ + cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cbs.bits0.urb_entry_size = brw->urb.csize - 1; + + assert(brw->urb.nr_cs_entries); + BRW_CACHED_BATCH_STRUCT(brw, &cbs); +} + + +static float fixed_plane[6][4] = { + { 0, 0, -1, 1 }, + { 0, 0, 1, 1 }, + { 0, -1, 0, 1 }, + { 0, 1, 0, 1 }, + {-1, 0, 0, 1 }, + { 1, 0, 0, 1 } +}; + +/* Upload a new set of constants. Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static void upload_constant_buffer(struct brw_context *brw) +{ + struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; + unsigned sz = brw->curbe.total_size; + unsigned bufsz = sz * sizeof(float); + float *buf; + unsigned i; + + + if (sz == 0) { + struct brw_constant_buffer cb; + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 0; + cb.bits0.buffer_length = 0; + cb.bits0.buffer_address = 0; + BRW_BATCH_STRUCT(brw, &cb); + + if (brw->curbe.last_buf) { + free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + brw->curbe.last_bufsz = 0; + } + + return; + } + + buf = (float *)malloc(bufsz); + + memset(buf, 0, bufsz); + + if (brw->curbe.wm_size) { + unsigned offset = brw->curbe.wm_start * 16; + + /* First the constant buffer constants: + */ + + /* Then any internally generated constants: + */ + for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) + buf[offset + i] = brw->wm.prog_data->internal_const[i]; + + assert(brw->wm.prog_data->max_const == + brw->wm.prog_data->nr_internal_consts); + } + + + /* The clipplanes are actually delivered to both CLIP and VS units. + * VS uses them to calculate the outcode bitmasks. + */ + if (brw->curbe.clip_size) { + unsigned offset = brw->curbe.clip_start * 16; + unsigned j; + + /* If any planes are going this way, send them all this way: + */ + for (i = 0; i < 6; i++) { + buf[offset + i * 4 + 0] = fixed_plane[i][0]; + buf[offset + i * 4 + 1] = fixed_plane[i][1]; + buf[offset + i * 4 + 2] = fixed_plane[i][2]; + buf[offset + i * 4 + 3] = fixed_plane[i][3]; + } + + /* Clip planes: BRW_NEW_CLIP: + */ + for (j = 0; j < brw->attribs.Clip.nr; j++) { + buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; + i++; + } + } + + + if (brw->curbe.vs_size) { + unsigned offset = brw->curbe.vs_start * 16; + /*unsigned nr = vp->max_const;*/ + const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; + struct pipe_winsys *ws = brw->pipe.winsys; + /* FIXME: buffer size is num_consts + num_immediates */ + if (brw->vs.prog_data->num_consts) { + /* map the vertex constant buffer and copy to curbe: */ + void *data = ws->buffer_map(ws, cbuffer->buffer, 0); + /* FIXME: this is wrong. the cbuffer->size currently + * represents size of consts + immediates. so if we'll + * have both we'll copy over the end of the buffer + * with the subsequent memcpy */ + memcpy(&buf[offset], data, cbuffer->size); + ws->buffer_unmap(ws, cbuffer->buffer); + offset += cbuffer->size; + } + /*immediates*/ + if (brw->vs.prog_data->num_imm) { + memcpy(&buf[offset], brw->vs.prog_data->imm_buf, + brw->vs.prog_data->num_imm * 4 * sizeof(float)); + } + } + + if (1) { + for (i = 0; i < sz; i+=4) + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + } + + if (brw->curbe.last_buf && + bufsz == brw->curbe.last_bufsz && + memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { + free(buf); +/* return; */ + } + else { + if (brw->curbe.last_buf) + free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; + brw->curbe.last_bufsz = bufsz; + + + if (!brw_pool_alloc(pool, + bufsz, + 1 << 6, + &brw->curbe.gs_offset)) { + debug_printf("out of GS memory for curbe\n"); + assert(0); + return; + } + + + /* Copy data to the buffer: + */ + brw->winsys->buffer_subdata_typed(brw->winsys, + pool->buffer, + brw->curbe.gs_offset, + bufsz, + buf, + BRW_CONSTANT_BUFFER ); + } + + /* TODO: only emit the constant_buffer packet when necessary, ie: + - contents have changed + - offset has changed + - hw requirements due to other packets emitted. + */ + { + struct brw_constant_buffer cb; + + memset(&cb, 0, sizeof(cb)); + + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 1; + cb.bits0.buffer_length = sz - 1; + cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ + BRW_BATCH_STRUCT(brw, &cb); + } +} + +/* This tracked state is unique in that the state it monitors varies + * dynamically depending on the parameters tracked by the fragment and + * vertex programs. This is the template used as a starting point, + * each context will maintain a copy of this internally and update as + * required. + */ +const struct brw_tracked_state brw_constant_buffer = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_CONSTANTS | + BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ + BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ + BRW_NEW_CURBE_OFFSETS), + .cache = (CACHE_NEW_WM_PROG) + }, + .update = upload_constant_buffer +}; + diff --git a/src/gallium/drivers/i965simple/brw_defines.h b/src/gallium/drivers/i965simple/brw_defines.h new file mode 100644 index 00000000000..9379a397f63 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_defines.h @@ -0,0 +1,852 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* + */ +#define MI_NOOP 0x00 +#define MI_USER_INTERRUPT 0x02 +#define MI_WAIT_FOR_EVENT 0x03 +#define MI_FLUSH 0x04 +#define MI_REPORT_HEAD 0x07 +#define MI_ARB_ON_OFF 0x08 +#define MI_BATCH_BUFFER_END 0x0A +#define MI_OVERLAY_FLIP 0x11 +#define MI_LOAD_SCAN_LINES_INCL 0x12 +#define MI_LOAD_SCAN_LINES_EXCL 0x13 +#define MI_DISPLAY_BUFFER_INFO 0x14 +#define MI_SET_CONTEXT 0x18 +#define MI_STORE_DATA_IMM 0x20 +#define MI_STORE_DATA_INDEX 0x21 +#define MI_LOAD_REGISTER_IMM 0x22 +#define MI_STORE_REGISTER_MEM 0x24 +#define MI_BATCH_BUFFER_START 0x31 + +#define MI_SYNCHRONOUS_FLIP 0x0 +#define MI_ASYNCHRONOUS_FLIP 0x1 + +#define MI_BUFFER_SECURE 0x0 +#define MI_BUFFER_NONSECURE 0x1 + +#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 +#define MI_ARBITRATE_BETWEEN_INSTS 0x1 +#define MI_NO_ARBITRATION 0x3 + +#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 +#define MI_CONDITION_CODE_WAIT_0 0x1 +#define MI_CONDITION_CODE_WAIT_1 0x2 +#define MI_CONDITION_CODE_WAIT_2 0x3 +#define MI_CONDITION_CODE_WAIT_3 0x4 +#define MI_CONDITION_CODE_WAIT_4 0x5 + +#define MI_DISPLAY_PIPE_A 0x0 +#define MI_DISPLAY_PIPE_B 0x1 + +#define MI_DISPLAY_PLANE_A 0x0 +#define MI_DISPLAY_PLANE_B 0x1 +#define MI_DISPLAY_PLANE_C 0x2 + +#define MI_STANDARD_FLIP 0x0 +#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 +#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 +#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 + +#define MI_PHYSICAL_ADDRESS 0x0 +#define MI_VIRTUAL_ADDRESS 0x1 + +#define MI_BUFFER_MEMORY_MAIN 0x0 +#define MI_BUFFER_MEMORY_GTT 0x2 +#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 + +#define MI_FLIP_CONTINUE 0x0 +#define MI_FLIP_ON 0x1 +#define MI_FLIP_OFF 0x2 + +#define MI_UNTRUSTED_REGISTER_SPACE 0x0 +#define MI_TRUSTED_REGISTER_SPACE 0x1 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define BRW_VFCOMPONENT_NOSTORE 0 +#define BRW_VFCOMPONENT_STORE_SRC 1 +#define BRW_VFCOMPONENT_STORE_0 2 +#define BRW_VFCOMPONENT_STORE_1_FLT 3 +#define BRW_VFCOMPONENT_STORE_1_INT 4 +#define BRW_VFCOMPONENT_STORE_VID 5 +#define BRW_VFCOMPONENT_STORE_IID 6 +#define BRW_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_O 8 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c new file mode 100644 index 00000000000..7598e3dc8af --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.c @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { + _3DPRIM_POINTLIST, + _3DPRIM_LINELIST, + _3DPRIM_LINELOOP, + _3DPRIM_LINESTRIP, + _3DPRIM_TRILIST, + _3DPRIM_TRISTRIP, + _3DPRIM_TRIFAN, + _3DPRIM_QUADLIST, + _3DPRIM_QUADSTRIP, + _3DPRIM_POLYGON +}; + + +static const int reduced_prim[PIPE_PRIM_POLYGON+1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +/* When the primitive changes, set a state bit and re-validate. Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities). That may not be realistic however. + */ +static void brw_set_prim(struct brw_context *brw, int prim) +{ + PRINT("PRIM: %d\n", prim); + + /* Slight optimization to avoid the GS program when not needed: + */ + if (prim == PIPE_PRIM_QUAD_STRIP && + brw->attribs.Raster->flatshade && + brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL && + brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL) + prim = PIPE_PRIM_TRIANGLE_STRIP; + + if (prim != brw->primitive) { + brw->primitive = prim; + brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + + if (reduced_prim[prim] != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim[prim]; + brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; + } + + brw_validate_state(brw); + } + +} + + +static unsigned trim(int prim, unsigned length) +{ + if (prim == PIPE_PRIM_QUAD_STRIP) + return length > 3 ? (length - length % 2) : 0; + else if (prim == PIPE_PRIM_QUADS) + return length - length % 4; + else + return length; +} + + + +static boolean brw_emit_prim( struct brw_context *brw, + boolean indexed, + unsigned start, + unsigned count ) + +{ + struct brw_3d_primitive prim_packet; + + if (BRW_DEBUG & DEBUG_PRIMS) + PRINT("PRIM: %d %d %d\n", brw->primitive, start, count); + + prim_packet.header.opcode = CMD_3D_PRIM; + prim_packet.header.length = sizeof(prim_packet)/4 - 2; + prim_packet.header.pad = 0; + prim_packet.header.topology = hw_prim[brw->primitive]; + prim_packet.header.indexed = indexed; + + prim_packet.verts_per_instance = trim(brw->primitive, count); + prim_packet.start_vert_location = start; + prim_packet.instance_count = 1; + prim_packet.start_instance_location = 0; + prim_packet.base_vert_location = 0; + + if (prim_packet.verts_per_instance == 0) + return TRUE; + + return brw_batchbuffer_data( brw->winsys, + &prim_packet, + sizeof(prim_packet) ); +} + + +/* May fail if out of video memory for texture or vbo upload, or on + * fallback conditions. + */ +static boolean brw_try_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned mode, + unsigned start, + unsigned count ) +{ + struct brw_context *brw = brw_context(pipe); + + /* Set the first primitive ahead of validate_state: + */ + brw_set_prim(brw, mode); + + /* Upload index, vertex data: + */ + if (index_buffer && + !brw_upload_indices( brw, index_buffer, index_size, start, count )) + return FALSE; + + if (!brw_upload_vertex_buffers(brw)) + return FALSE; + + if (!brw_upload_vertex_elements( brw )) + return FALSE; + + /* XXX: Need to separate validate and upload of state. + */ + if (brw->state.dirty.brw) + brw_validate_state( brw ); + + if (!brw_emit_prim(brw, index_buffer != NULL, + start, count)) + return FALSE; + + return TRUE; +} + + + +static boolean brw_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count ) +{ + if (!brw_try_draw_elements( pipe, + indexBuffer, + indexSize, + mode, start, count )) + { + /* flush ? */ + + if (!brw_try_draw_elements( pipe, + indexBuffer, + indexSize, + mode, start, + count )) { + assert(0); + return FALSE; + } + } + + return TRUE; +} + + + +static boolean brw_draw_arrays( struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count ) +{ + if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { + /* flush ? */ + + if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { + assert(0); + return FALSE; + } + } + + return TRUE; +} + + + +void brw_init_draw_functions( struct brw_context *brw ) +{ + brw->pipe.draw_arrays = brw_draw_arrays; + brw->pipe.draw_elements = brw_draw_elements; +} + + diff --git a/src/gallium/drivers/i965simple/brw_draw.h b/src/gallium/drivers/i965simple/brw_draw.h new file mode 100644 index 00000000000..62fe0d5d0ee --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.h @@ -0,0 +1,55 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "pipe/p_context.h" + +struct brw_context; + + + +void brw_init_draw_functions( struct brw_context *brw ); + + +boolean brw_upload_vertices( struct brw_context *brw, + unsigned min_index, + unsigned max_index ); + +boolean brw_upload_indices(struct brw_context *brw, + const struct pipe_buffer *index_buffer, + int ib_size, int start, int count); + +boolean brw_upload_vertex_buffers( struct brw_context *brw ); +boolean brw_upload_vertex_elements( struct brw_context *brw ); + +unsigned brw_translate_surface_format( unsigned id ); + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c new file mode 100644 index 00000000000..aa85d93866a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + +struct brw_array_state { + union header_union header; + + struct { + union { + struct { + unsigned pitch:11; + unsigned pad:15; + unsigned access_type:1; + unsigned vb_index:5; + } bits; + unsigned dword; + } vb0; + + struct pipe_buffer *buffer; + unsigned offset; + + unsigned max_index; + unsigned instance_data_step_rate; + + } vb[BRW_VBP_MAX]; +}; + + + +unsigned brw_translate_surface_format( unsigned id ) +{ + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; + } +} + +static unsigned get_index_type(int type) +{ + switch (type) { + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; + default: assert(0); return 0; + } +} + + +boolean brw_upload_vertex_buffers( struct brw_context *brw ) +{ + struct brw_array_state vbp; + unsigned nr_enabled = 0; + unsigned i; + + memset(&vbp, 0, sizeof(vbp)); + + /* This is a hardware limit: + */ + + for (i = 0; i < BRW_VEP_MAX; i++) + { + if (brw->vb.vbo_array[i] == NULL) { + nr_enabled = i; + break; + } + + vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch; + vbp.vb[i].vb0.bits.pad = 0; + vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; + vbp.vb[i].vb0.bits.vb_index = i; + vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; + vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; + vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; + } + + + vbp.header.bits.length = (1 + nr_enabled * 4) - 2; + vbp.header.bits.opcode = CMD_VERTEX_BUFFER; + + BEGIN_BATCH(vbp.header.bits.length+2, 0); + OUT_BATCH( vbp.header.dword ); + + for (i = 0; i < nr_enabled; i++) { + OUT_BATCH( vbp.vb[i].vb0.dword ); + OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_USAGE_GPU_READ, + vbp.vb[i].offset); + OUT_BATCH( vbp.vb[i].max_index ); + OUT_BATCH( vbp.vb[i].instance_data_step_rate ); + } + ADVANCE_BATCH(); + return TRUE; +} + + + +boolean brw_upload_vertex_elements( struct brw_context *brw ) +{ + struct brw_vertex_element_packet vep; + + unsigned i; + unsigned nr_enabled = brw->attribs.VertexProgram->program.num_inputs; + + memset(&vep, 0, sizeof(vep)); + + for (i = 0; i < nr_enabled; i++) + vep.ve[i] = brw->vb.inputs[i]; + + + vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; + vep.header.opcode = CMD_VERTEX_ELEMENT; + brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); + + return TRUE; +} + +boolean brw_upload_indices( struct brw_context *brw, + const struct pipe_buffer *index_buffer, + int ib_size, int start, int count) +{ + /* Emit the indexbuffer packet: + */ + { + struct brw_indexbuffer ib; + + memset(&ib, 0, sizeof(ib)); + + ib.header.bits.opcode = CMD_INDEX_BUFFER; + ib.header.bits.length = sizeof(ib)/4 - 2; + ib.header.bits.index_format = get_index_type(ib_size); + ib.header.bits.cut_index_enable = 0; + + + BEGIN_BATCH(4, 0); + OUT_BATCH( ib.header.dword ); + OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start); + OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start + count); + OUT_BATCH( 0 ); + ADVANCE_BATCH(); + } + return TRUE; +} diff --git a/src/gallium/drivers/i965simple/brw_eu.c b/src/gallium/drivers/i965simple/brw_eu.c new file mode 100644 index 00000000000..e2002d1821f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.c @@ -0,0 +1,130 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ + p->current->header.destreg__conditonalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, boolean compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, unsigned value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, unsigned value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_compile *p ) +{ + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); +} + + +const unsigned *brw_get_program( struct brw_compile *p, + unsigned *sz ) +{ + unsigned i; + + for (i = 0; i < 8; i++) + brw_NOP(p); + + *sz = p->nr_insn * sizeof(struct brw_instruction); + return (const unsigned *)p->store; +} + diff --git a/src/gallium/drivers/i965simple/brw_eu.h b/src/gallium/drivers/i965simple/brw_eu.h new file mode 100644 index 00000000000..23151ae9ed6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.h @@ -0,0 +1,888 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "brw_structs.h" +#include "brw_defines.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* src only, align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 1200 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + unsigned nr_insn; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + unsigned flag_value; + boolean single_program_flow; +}; + + + +static __inline int type_sz( unsigned type ) +{ + switch( type ) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +static __inline struct brw_reg brw_reg( unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + + struct brw_reg reg; + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +static __inline struct brw_reg brw_vec16_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + +static __inline struct brw_reg brw_vec8_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec4_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec2_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + TGSI_WRITEMASK_XY); +} + +static __inline struct brw_reg brw_vec1_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + TGSI_WRITEMASK_X); +} + + +static __inline struct brw_reg retype( struct brw_reg reg, + unsigned type ) +{ + reg.type = type; + return reg; +} + +static __inline struct brw_reg suboffset( struct brw_reg reg, + unsigned delta ) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static __inline struct brw_reg offset( struct brw_reg reg, + unsigned delta ) +{ + reg.nr += delta; + return reg; +} + + +static __inline struct brw_reg byte_offset( struct brw_reg reg, + unsigned bytes ) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +static __inline struct brw_reg brw_uw16_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw8_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw1_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_imm_reg( unsigned type ) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +static __inline struct brw_reg brw_imm_f( float f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +static __inline struct brw_reg brw_imm_d( int d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +static __inline struct brw_reg brw_imm_ud( unsigned ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +static __inline struct brw_reg brw_imm_uw( ushort uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw; + return imm; +} + +static __inline struct brw_reg brw_imm_w( short w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w; + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/* Vector of eight signed half-byte values: + */ +static __inline struct brw_reg brw_imm_v( unsigned v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/* Vector of four 8-bit float values: + */ +static __inline struct brw_reg brw_imm_vf( unsigned v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static __inline struct brw_reg brw_imm_vf4( unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + + +static __inline struct brw_reg brw_address( struct brw_reg reg ) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + + +static __inline struct brw_reg brw_vec1_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec8_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec4_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static __inline struct brw_reg brw_vec2_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_uw8_grf( unsigned nr, + unsigned subnr ) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_null_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static __inline struct brw_reg brw_address_reg( unsigned subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static __inline struct brw_reg brw_ip_reg( void ) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + TGSI_WRITEMASK_XYZW); /* NOTE! */ +} + +static __inline struct brw_reg brw_acc_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + + +static __inline struct brw_reg brw_flag_reg( void ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + + +static __inline struct brw_reg brw_mask_reg( unsigned subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static __inline struct brw_reg brw_message_reg( unsigned nr ) +{ + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static __inline unsigned cvt( unsigned val ) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static __inline struct brw_reg stride( struct brw_reg reg, + unsigned vstride, + unsigned width, + unsigned hstride ) +{ + + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + +static __inline struct brw_reg vec16( struct brw_reg reg ) +{ + return stride(reg, 16,16,1); +} + +static __inline struct brw_reg vec8( struct brw_reg reg ) +{ + return stride(reg, 8,8,1); +} + +static __inline struct brw_reg vec4( struct brw_reg reg ) +{ + return stride(reg, 4,4,1); +} + +static __inline struct brw_reg vec2( struct brw_reg reg ) +{ + return stride(reg, 2,2,1); +} + +static __inline struct brw_reg vec1( struct brw_reg reg ) +{ + return stride(reg, 0,1,0); +} + +static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt ) +{ + return vec1(suboffset(reg, elt)); +} + +static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static __inline struct brw_reg brw_swizzle( struct brw_reg reg, + unsigned x, + unsigned y, + unsigned z, + unsigned w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, + unsigned x ) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static __inline struct brw_reg brw_writemask( struct brw_reg reg, + unsigned mask ) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, + unsigned mask ) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static __inline struct brw_reg negate( struct brw_reg reg ) +{ + reg.negate ^= 1; + return reg; +} + +static __inline struct brw_reg brw_abs( struct brw_reg reg ) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** + */ +static __inline struct brw_reg brw_vec4_indirect( unsigned subnr, + int offset ) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg brw_vec1_indirect( unsigned subnr, + int offset ) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset ) +{ + ptr.addr_offset += offset; + return ptr; +} + +static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset ) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +static __inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, unsigned value ); +void brw_set_saturate( struct brw_compile *p, unsigned value ); +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ); +void brw_set_compression_control( struct brw_compile *p, boolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ); +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ); + +void brw_init_compile( struct brw_compile *p ); +const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); + + +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ); + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ); + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean eot, + boolean writes_complete, + unsigned offset, + unsigned swizzle); + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + boolean eot); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot); + +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision ); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ); + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + unsigned scratch_offset ); + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + unsigned msg_reg_nr, + unsigned scratch_offset ); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); +#endif diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c new file mode 100644 index 00000000000..4a94ddefa6a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_debug.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" + +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + debug_printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + debug_printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else { + debug_printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1<<hwreg.width, + hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, + type[hwreg.type]); + } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_eu_emit.c b/src/gallium/drivers/i965simple/brw_eu_emit.c new file mode 100644 index 00000000000..400a80b6fba --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_emit.c @@ -0,0 +1,1080 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, + struct brw_reg dest ) +{ + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_instruction *insn, + unsigned msg_length, + unsigned response_length, + unsigned function, + unsigned integer_type, + boolean low_precision, + boolean saturate, + unsigned dataType ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; +} + +static void brw_set_urb_message( struct brw_instruction *insn, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean end_of_thread, + boolean complete, + unsigned offset, + unsigned swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_write_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + unsigned pixel_scoreboard_clear, + unsigned response_length, + unsigned end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_read_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length, + unsigned end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_read.binding_table_index = binding_table_index; + insn->bits3.dp_read.msg_control = msg_control; + insn->bits3.dp_read.msg_type = msg_type; + insn->bits3.dp_read.target_cache = target_cache; + insn->bits3.dp_read.response_length = response_length; + insn->bits3.dp_read.msg_length = msg_length; + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits3.dp_read.end_of_thread = end_of_thread; +} + +static void brw_set_sampler_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, + unsigned opcode ) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditonalmod) { + p->current->header.destreg__conditonalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + + +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + return insn; +} + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.mask_control = BRW_MASK_DISABLE; + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + insn->header.mask_control = BRW_MASK_DISABLE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = (landing - jmp_insn) - 1; +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditonalmod = conditional; + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/* Invert 8 values + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/* Use 2 send instructions to invert 16 elements + */ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision ) +{ + struct brw_instruction *insn; + unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditonalmod = msg_reg_nr+1; + + brw_set_dest(insn, offset(dest,1)); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + + + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + unsigned msg_reg_nr, + unsigned scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + unsigned msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + + brw_set_dp_write_message(insn, + 255, /* bti */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } + +} + + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + unsigned scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); /* UW? */ + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(insn, + 255, /* bti */ + 3, /* msg_control */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + boolean eot) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_dp_write_message(insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot) +{ + boolean need_stall = 0; + + if(writemask == 0) { +/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != TGSI_WRITEMASK_XYZW) { + unsigned dst_offset = 0; + unsigned i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; +/* debug_printf("need stall %x %x\n", newmask , writemask); */ + } + else { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + newmask = ~newmask & TGSI_WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + response_length = len * 2; + } + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_sampler_message(insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot); + } + + if (need_stall) + { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, FALSE); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean eot, + boolean writes_complete, + unsigned offset, + unsigned swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_urb_message(insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + diff --git a/src/gallium/drivers/i965simple/brw_eu_util.c b/src/gallium/drivers/i965simple/brw_eu_util.c new file mode 100644 index 00000000000..3a65b141f07 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count) +{ + unsigned i; + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c new file mode 100644 index 00000000000..5216c680cf6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_flush.c @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_batch.h" + + +/** + * In future we may want a fence-like interface instead of finish. + */ +static void brw_flush( struct pipe_context *pipe, + unsigned flags ) +{ + struct brw_context *brw = brw_context(pipe); + struct pipe_fence_handle *fence; + + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + struct brw_mi_flush flush; + + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush.flags |= BRW_INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush.flags |= BRW_FLUSH_READ_CACHE; + + BRW_BATCH_STRUCT(brw, &flush); + } + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH( &fence ); + + if (flags & PIPE_FLUSH_WAIT) { +// brw->winsys->wait_fence(brw->winsys, fence); + } +} + + + +void brw_init_flush_functions( struct brw_context *brw ) +{ + brw->pipe.flush = brw_flush; +} diff --git a/src/gallium/drivers/i965simple/brw_gs.c b/src/gallium/drivers/i965simple/brw_gs.c new file mode 100644 index 00000000000..de60868ccca --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.c @@ -0,0 +1,196 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static void compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + struct brw_gs_compile c; + const unsigned *program; + unsigned program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.func.single_program_flow = 1; + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Note that primitives which don't require a GS program have + * already been weeded out by this stage: + */ + switch (key->primitive) { + case PIPE_PRIM_QUADS: + brw_gs_quads( &c ); + break; + case PIPE_PRIM_QUAD_STRIP: + brw_gs_quad_strip( &c ); + break; + case PIPE_PRIM_LINE_LOOP: + brw_gs_lines( &c ); + break; + case PIPE_PRIM_LINES: + if (key->hint_gs_always) + brw_gs_lines( &c ); + else { + return; + } + break; + case PIPE_PRIM_TRIANGLES: + if (key->hint_gs_always) + brw_gs_tris( &c ); + else { + return; + } + break; + case PIPE_PRIM_POINTS: + if (key->hint_gs_always) + brw_gs_points( &c ); + else { + return; + } + break; + default: + return; + } + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->gs.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_GS_PROG], + key, sizeof(*key), + &brw->gs.prog_data, + &brw->gs.prog_gs_offset); +} + + +static const int gs_prim[PIPE_PRIM_POLYGON+1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_LOOP, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG */ + key->attrs = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = gs_prim[brw->primitive]; + + key->hint_gs_always = 0; /* debug code? */ + + key->need_gs_prog = (key->hint_gs_always || + brw->primitive == PIPE_PRIM_QUADS || + brw->primitive == PIPE_PRIM_QUAD_STRIP || + brw->primitive == PIPE_PRIM_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_gs_prog( struct brw_context *brw ) +{ + struct brw_gs_prog_key key; + + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_GS_PROG; + brw->gs.prog_active = key.need_gs_prog; + } + + if (brw->gs.prog_active) { + if (!search_cache(brw, &key)) + compile_gs_prog( brw, &key ); + } +} + + +const struct brw_tracked_state brw_gs_prog = { + .dirty = { + .brw = BRW_NEW_PRIMITIVE, + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_gs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_gs.h b/src/gallium/drivers/i965simple/brw_gs.h new file mode 100644 index 00000000000..f09141c6aa1 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.h @@ -0,0 +1,75 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_gs_prog_key { + unsigned attrs:32; + unsigned primitive:4; + unsigned hint_gs_always:1; + unsigned need_gs_prog:1; + unsigned pad:26; +}; + +struct brw_gs_compile { + struct brw_compile func; + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_GS_VERTS]; + } reg; + + /* 3 different ways of expressing vertex size: + */ + unsigned nr_attrs; + unsigned nr_regs; + unsigned nr_bytes; +}; + +#define ATTR_SIZE (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_gs_emit.c b/src/gallium/drivers/i965simple/brw_gs_emit.c new file mode 100644 index 00000000000..c3cc90b10f8 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_emit.c @@ -0,0 +1,148 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, + unsigned nr_verts ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + boolean last, + unsigned header) +{ + struct brw_compile *p = &c->func; + boolean allocate = !last; + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + + /* Send each vertex as a seperate write to the urb. This is + * different to the concept in brw_sf_emit.c, where subsequent + * writes are used to build up a single urb entry. Each of these + * writes instantiates a seperate urb entry, and a new one must be + * allocated each time. + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response length */ + allocate ? 0 : 1, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_quad_strip( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 3); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 2); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c new file mode 100644 index 00000000000..3932e9e9394 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_state.c @@ -0,0 +1,89 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "pipe/p_util.h" + + + +static void upload_gs_unit( struct brw_context *brw ) +{ + struct brw_gs_unit_state gs; + + memset(&gs, 0, sizeof(gs)); + + /* CACHE_NEW_GS_PROG */ + if (brw->gs.prog_active) { + gs.thread0.grf_reg_count = + align(brw->gs.prog_data->total_grf, 16) / 16 - 1; + gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; + gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } + else { + gs.thread0.grf_reg_count = 0; + gs.thread0.kernel_start_pointer = 0; + gs.thread3.urb_entry_read_length = 1; + } + + /* BRW_NEW_URB_FENCE */ + gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; + gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + + gs.thread4.max_threads = 0; /* Hardware requirement */ + + if (BRW_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + /* CONSTANT */ + gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs.thread1.single_program_flow = 1; + gs.thread3.dispatch_grf_start_reg = 1; + gs.thread3.const_urb_entry_read_offset = 0; + gs.thread3.const_urb_entry_read_length = 0; + gs.thread3.urb_entry_read_offset = 0; + + + brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); +} + + +const struct brw_tracked_state brw_gs_unit = { + .dirty = { + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_GS_PROG + }, + .update = upload_gs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c new file mode 100644 index 00000000000..925049ecc19 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -0,0 +1,486 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_batch.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static void upload_blend_constant_color(struct brw_context *brw) +{ + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0]; + bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1]; + bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2]; + bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .brw = BRW_NEW_BLEND, + .cache = 0 + }, + .update = upload_blend_constant_color +}; + + +/*********************************************************************** + * Drawing rectangle + */ +static void upload_drawing_rect(struct brw_context *brw) +{ + struct brw_drawrect bdr; + + memset(&bdr, 0, sizeof(bdr)); + bdr.header.opcode = CMD_DRAW_RECT; + bdr.header.length = sizeof(bdr)/4 - 2; + bdr.xmin = 0; + bdr.ymin = 0; + bdr.xmax = brw->attribs.FrameBuffer.cbufs[0]->width; + bdr.ymax = brw->attribs.FrameBuffer.cbufs[0]->height; + bdr.xorg = 0; + bdr.yorg = 0; + + /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted + * uncached in brw_draw.c: + */ + BRW_BATCH_STRUCT(brw, &bdr); +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_drawing_rect +}; + +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is the BRW_SS_POOL cache buffer. + */ +static void upload_binding_table_pointers(struct brw_context *brw) +{ + struct brw_binding_table_pointers btp; + memset(&btp, 0, sizeof(btp)); + + btp.header.opcode = CMD_BINDING_TABLE_PTRS; + btp.header.length = sizeof(btp)/4 - 2; + btp.vs = 0; + btp.gs = 0; + btp.clp = 0; + btp.sf = 0; + btp.wm = brw->wm.bind_ss_offset; + + BRW_CACHED_BATCH_STRUCT(brw, &btp); +} + +const struct brw_tracked_state brw_binding_table_pointers = { + .dirty = { + .brw = 0, + .cache = CACHE_NEW_SURF_BIND + }, + .update = upload_binding_table_pointers, +}; + + +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. + */ +static void upload_pipelined_state_pointers(struct brw_context *brw ) +{ + struct brw_pipelined_state_pointers psp; + memset(&psp, 0, sizeof(psp)); + + psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; + psp.header.length = sizeof(psp)/4 - 2; + + psp.vs.offset = brw->vs.state_gs_offset >> 5; + psp.sf.offset = brw->sf.state_gs_offset >> 5; + psp.wm.offset = brw->wm.state_gs_offset >> 5; + psp.cc.offset = brw->cc.state_gs_offset >> 5; + + /* GS gets turned on and off regularly. Need to re-emit URB fence + * after this occurs. + */ + if (brw->gs.prog_active) { + psp.gs.offset = brw->gs.state_gs_offset >> 5; + psp.gs.enable = 1; + } + + if (0) { + psp.clp.offset = brw->clip.state_gs_offset >> 5; + psp.clp.enable = 1; + } + + + if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) + brw->state.dirty.brw |= BRW_NEW_PSP; +} + +const struct brw_tracked_state brw_pipelined_state_pointers = { + .dirty = { + .brw = 0, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_pipelined_state_pointers +}; + +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ + upload_pipelined_state_pointers(brw); + brw_upload_urb_fence(brw); + brw_upload_constant_buffer_state(brw); +} + + +const struct brw_tracked_state brw_psp_urb_cbs = { + .dirty = { + .brw = BRW_NEW_URB_FENCE, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_psp_urb_cbs +}; + +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. + */ +static void upload_depthbuffer(struct brw_context *brw) +{ + struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zsbuf; + + BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); + if (depth_surface == NULL) { + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + unsigned int format; + + switch (depth_surface->cpp) { + case 2: + format = BRW_DEPTHFORMAT_D16_UNORM; + break; + case 4: + if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT) + format = BRW_DEPTHFORMAT_D32_FLOAT; + else + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + break; + default: + assert(0); + return; + } + + OUT_BATCH(((depth_surface->pitch * depth_surface->cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | +// (depth_surface->region->tiled << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(depth_surface->buffer, + PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((depth_surface->pitch - 1) << 6) | + ((depth_surface->height - 1) << 19)); + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_depthbuffer = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_depthbuffer, +}; + + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static void upload_polygon_stipple(struct brw_context *brw) +{ + struct brw_polygon_stipple bps; + unsigned i; + + memset(&bps, 0, sizeof(bps)); + bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps.header.length = sizeof(bps)/4-2; + + /* XXX: state tracker should send *all* state down initially! + */ + if (brw->attribs.PolygonStipple) + for (i = 0; i < 32; i++) + bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */ + + BRW_CACHED_BATCH_STRUCT(brw, &bps); +} + +const struct brw_tracked_state brw_polygon_stipple = { + .dirty = { + .brw = BRW_NEW_STIPPLE, + .cache = 0 + }, + .update = upload_polygon_stipple +}; + + +/*********************************************************************** + * Line stipple packet + */ + +static void upload_line_stipple(struct brw_context *brw) +{ + struct brw_line_stipple bls; + float tmp; + int tmpi; + + memset(&bls, 0, sizeof(bls)); + bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.length = sizeof(bls)/4 - 2; + + bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern; + bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor; + + tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor; + tmpi = tmp * (1<<13); + + + bls.bits1.inverse_repeat_count = tmpi; + + BRW_CACHED_BATCH_STRUCT(brw, &bls); +} + +const struct brw_tracked_state brw_line_stipple = { + .dirty = { + .brw = BRW_NEW_STIPPLE, + .cache = 0 + }, + .update = upload_line_stipple +}; + + +/*********************************************************************** + * Misc constant state packets + */ + +static void upload_pipe_control(struct brw_context *brw) +{ + struct brw_pipe_control pc; + + return; + + memset(&pc, 0, sizeof(pc)); + + pc.header.opcode = CMD_PIPE_CONTROL; + pc.header.length = sizeof(pc)/4 - 2; + pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; + + pc.header.instruction_state_cache_flush_enable = 1; + + pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; + + BRW_BATCH_STRUCT(brw, &pc); +} + +const struct brw_tracked_state brw_pipe_control = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_pipe_control +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static void upload_invarient_state( struct brw_context *brw ) +{ + { + struct brw_mi_flush flush; + + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + flush.flags = BRW_FLUSH_STATE_CACHE | BRW_FLUSH_READ_CACHE; + BRW_BATCH_STRUCT(brw, &flush); + } + + { + /* 0x61040000 Pipeline Select */ + /* PipelineSelect : 0 */ + struct brw_pipeline_select ps; + + memset(&ps, 0, sizeof(ps)); + ps.header.opcode = CMD_PIPELINE_SELECT; + ps.header.pipeline_select = 0; + BRW_BATCH_STRUCT(brw, &ps); + } + + { + struct brw_global_depth_offset_clamp gdo; + memset(&gdo, 0, sizeof(gdo)); + + /* Disable depth offset clamping. + */ + gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.length = sizeof(gdo)/4 - 2; + gdo.depth_offset_clamp = 0.0; + + BRW_BATCH_STRUCT(brw, &gdo); + } + + + /* 0x61020000 State Instruction Pointer */ + { + struct brw_system_instruction_pointer sip; + memset(&sip, 0, sizeof(sip)); + + sip.header.opcode = CMD_STATE_INSN_POINTER; + sip.header.length = 0; + sip.bits0.pad = 0; + sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); + } + + + { + struct brw_vf_statistics vfs; + memset(&vfs, 0, sizeof(vfs)); + + vfs.opcode = CMD_VF_STATISTICS; + if (BRW_DEBUG & DEBUG_STATS) + vfs.statistics_enable = 1; + + BRW_BATCH_STRUCT(brw, &vfs); + } + + + { + struct brw_polygon_stipple_offset bpso; + + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + bpso.bits0.x_offset = 0; + bpso.bits0.y_offset = 0; + + BRW_BATCH_STRUCT(brw, &bpso); + } +} + +const struct brw_tracked_state brw_invarient_state = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_invarient_state +}; + +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. + */ +static void upload_state_base_address( struct brw_context *brw ) +{ + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, + PIPE_BUFFER_USAGE_GPU_READ, + 1); /* General state base address */ + OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, + PIPE_BUFFER_USAGE_GPU_READ, + 1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); +} + + +const struct brw_tracked_state brw_state_base_address = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_state_base_address +}; diff --git a/src/gallium/drivers/i965simple/brw_reg.h b/src/gallium/drivers/i965simple/brw_reg.h new file mode 100644 index 00000000000..9e885c3b3b7 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_reg.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) + +/* Stalls command execution waiting for the given events to have occurred. */ +#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +/* Primitive dispatch on 830-945 */ +#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) + +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) + +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c new file mode 100644 index 00000000000..7c83b81c858 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" +#include "tgsi/util/tgsi_parse.h" + + +static void compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + struct brw_sf_compile c; + const unsigned *program; + unsigned program_size; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.key = *key; + + + c.nr_attrs = c.key.vp_output_count; + c.nr_attr_regs = (c.nr_attrs+1)/2; + + c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ + c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c ); + break; + case SF_POINTS: + c.nr_verts = 1; + brw_emit_point_setup( &c ); + break; + + case SF_UNFILLED_TRIS: + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->sf.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_SF_PROG], + key, sizeof(*key), + &brw->sf.prog_data, + &brw->sf.prog_gs_offset); +} + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_sf_prog( struct brw_context *brw ) +{ + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct brw_sf_prog_key key; + struct tgsi_parse_context parse; + int i, done = 0; + + + memset(&key, 0, sizeof(key)); + + /* Populate the key, noting state dependencies: + */ + /* CACHE_NEW_VS_PROG */ + key.vp_output_count = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_FS */ + key.fp_input_count = brw->attribs.FragmentProgram->info.nr_regs[TGSI_FILE_INPUT]; + + + /* BRW_NEW_REDUCED_PRIMITIVE */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_TRIANGLES: +// if (key.attrs & (1<<VERT_RESULT_EDGE)) +// key.primitive = SF_UNFILLED_TRIS; +// else + key.primitive = SF_TRIANGLES; + break; + case PIPE_PRIM_LINES: + key.primitive = SF_LINES; + break; + case PIPE_PRIM_POINTS: + key.primitive = SF_POINTS; + break; + } + + + + /* Scan fp inputs to figure out what interpolation modes are + * required for each incoming vp output. There is an assumption + * that the state tracker makes sure there is a 1:1 linkage between + * these sets of attributes (XXX: position??) + */ + tgsi_parse_init( &parse, fs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + int interp_mode = parse.FullToken.FullDeclaration.Interpolation.Interpolate; + //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; + //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + + debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode); + + switch (interp_mode) { + case TGSI_INTERPOLATE_CONSTANT: + for (i = first; i <= last; i++) + key.const_mask |= (1 << i); + break; + case TGSI_INTERPOLATE_LINEAR: + for (i = first; i <= last; i++) + key.linear_mask |= (1 << i); + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + for (i = first; i <= last; i++) + key.persp_mask |= (1 << i); + break; + default: + break; + } + + /* Also need stuff for flat shading, twosided color. + */ + + } + break; + default: + done = 1; + break; + } + } + + /* Hack: Adjust for position. Optimize away when not required (ie + * for perspective interpolation). + */ + key.persp_mask <<= 1; + key.linear_mask <<= 1; + key.linear_mask |= 1; + key.const_mask <<= 1; + + debug_printf("key.persp_mask: %x\n", key.persp_mask); + debug_printf("key.linear_mask: %x\n", key.linear_mask); + debug_printf("key.const_mask: %x\n", key.const_mask); + + +// key.do_point_sprite = brw->attribs.Point->PointSprite; +// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; + +// key.do_flat_shading = (brw->attribs.Raster->flatshade); +// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + +// if (key.do_twoside_color) +// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); + + + if (!search_cache(brw, &key)) + compile_sf_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_sf_prog = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_REDUCED_PRIMITIVE | + BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = upload_sf_prog +}; + + + +#if 0 +/* Build a struct like the one we'd like the state tracker to pass to + * us. + */ +static void update_sf_linkage( struct brw_context *brw ) +{ + const struct brw_vertex_program *vs = brw->attribs.VertexProgram; + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct pipe_setup_linkage state; + struct tgsi_parse_context parse; + + int i, j; + int nr_vp_outputs = 0; + int done = 0; + + struct { + unsigned semantic:8; + unsigned semantic_index:16; + } fp_semantic[32], vp_semantic[32]; + + memset(&state, 0, sizeof(state)); + + state.fp_input_count = 0; + + + + + + + assert(state.fp_input_count == fs->program.num_inputs); + + + /* Then scan vp outputs + */ + done = 0; + tgsi_parse_init( &parse, vs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + + for (i = first; i < last; i++) { + vp_semantic[i].semantic = + parse.FullToken.FullDeclaration.Semantic.SemanticName; + vp_semantic[i].semantic_index = + parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + } + + assert(last > nr_vp_outputs); + nr_vp_outputs = last; + } + break; + default: + done = 1; + break; + } + } + + + /* Now match based on semantic information. + */ + for (i = 0; i< state.fp_input_count; i++) { + for (j = 0; j < nr_vp_outputs; j++) { + if (fp_semantic[i].semantic == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].vp_output = j; + } + } + if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { + for (j = 0; j < nr_vp_outputs; j++) { + if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].bf_vp_output = j; + } + } + } + } + + if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { + brw->sf.linkage = state; + brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; + } +} + + +const struct brw_tracked_state brw_sf_linkage = { + .dirty = { + .brw = (BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = update_sf_linkage +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf.h b/src/gallium/drivers/i965simple/brw_sf.h new file mode 100644 index 00000000000..b7ada475604 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.h @@ -0,0 +1,122 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + +#include "brw_context.h" +#include "brw_eu.h" + + +#define SF_POINTS 0 +#define SF_LINES 1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS 3 + + + +struct brw_sf_prog_key { + unsigned vp_output_count:5; + unsigned fp_input_count:5; + + unsigned primitive:2; + unsigned do_twoside_color:1; + unsigned do_flat_shading:1; + unsigned frontface_ccw:1; + unsigned do_point_sprite:1; + + /* Interpolation masks; + */ + unsigned linear_mask; + unsigned persp_mask; + unsigned const_mask; + + +// int SpriteOrigin; +}; + +struct brw_sf_point_tex { + boolean CoordReplace; +}; + +struct brw_sf_compile { + struct brw_compile func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + unsigned nr_verts; + unsigned nr_attrs; + unsigned nr_attr_regs; + unsigned nr_setup_attrs; + unsigned nr_setup_regs; +#if 0 + ubyte attr_to_idx[VERT_RESULT_MAX]; + ubyte idx_to_attr[VERT_RESULT_MAX]; + struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; +#endif +}; + + +void brw_emit_tri_setup( struct brw_sf_compile *c ); +void brw_emit_line_setup( struct brw_sf_compile *c ); +void brw_emit_point_setup( struct brw_sf_compile *c ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf_emit.c b/src/gallium/drivers/i965simple/brw_sf_emit.c new file mode 100644 index 00000000000..78d6fa5e9e5 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_emit.c @@ -0,0 +1,382 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + unsigned reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + brw_push_insn_state(p); + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + + brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + brw_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->det, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + +} + +#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ + FRAG_BIT_COL0 | \ + FRAG_BIT_COL1) + +static boolean calculate_masks( struct brw_sf_compile *c, + unsigned reg, + ushort *pc, + ushort *pc_persp, + ushort *pc_linear) +{ + boolean is_last_attr = (reg == c->nr_setup_regs - 1); + unsigned persp_mask = c->key.persp_mask; + unsigned linear_mask = c->key.linear_mask; + + debug_printf("persp_mask: %x\n", persp_mask); + debug_printf("linear_mask: %x\n", linear_mask); + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + if (persp_mask & (1 << (reg*2))) + *pc_persp = 0xf; + + if (linear_mask & (1 << (reg*2))) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (reg*2+1 < c->nr_setup_attrs) { + *pc |= 0xf0; + + if (persp_mask & (1 << (reg*2+1))) + *pc_persp |= 0xf0; + + if (linear_mask & (1 << (reg*2+1))) + *pc_linear |= 0xf0; + } + + debug_printf("pc: %x\n", *pc); + debug_printf("pc_persp: %x\n", *pc_persp); + debug_printf("pc_linear: %x\n", *pc_linear); + + + return is_last_attr; +} + + + +void brw_emit_tri_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + debug_printf("%s START ==============\n", __FUNCTION__); + + c->nr_verts = 3; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + ushort pc = 0, pc_persp = 0, pc_linear = 0; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } + + debug_printf("%s DONE ==============\n", __FUNCTION__); + +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + + c->nr_verts = 2; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + ushort pc, pc_persp, pc_linear; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + c->nr_verts = 1; + alloc_regs(c); + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + ushort pc, pc_persp, pc_linear; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + brw_set_predicate_control_flag_value(p, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c new file mode 100644 index 00000000000..9acd3ea61b2 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_state.c @@ -0,0 +1,180 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "pipe/p_util.h" + +static void upload_sf_vp(struct brw_context *brw) +{ + struct brw_sf_viewport sfv; + + memset(&sfv, 0, sizeof(sfv)); + + + /* BRW_NEW_VIEWPORT */ + { + const float *scale = brw->attribs.Viewport.scale; + const float *trans = brw->attribs.Viewport.translate; + + sfv.viewport.m00 = scale[0]; + sfv.viewport.m11 = scale[1]; + sfv.viewport.m22 = scale[2]; + sfv.viewport.m30 = trans[0]; + sfv.viewport.m31 = trans[1]; + sfv.viewport.m32 = trans[2]; + } + + /* _NEW_SCISSOR */ + sfv.scissor.xmin = brw->attribs.Scissor.minx; + sfv.scissor.xmax = brw->attribs.Scissor.maxx - 1; + sfv.scissor.ymin = brw->attribs.Scissor.miny; + sfv.scissor.ymax = brw->attribs.Scissor.maxy - 1; + + brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); +} + +const struct brw_tracked_state brw_sf_vp = { + .dirty = { + .brw = (BRW_NEW_SCISSOR | + BRW_NEW_VIEWPORT), + .cache = 0 + }, + .update = upload_sf_vp +}; + +static void upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_state sf; + memset(&sf, 0, sizeof(sf)); + + /* CACHE_NEW_SF_PROG */ + sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; + sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; + sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + sf.thread3.dispatch_grf_start_reg = 3; + sf.thread3.urb_entry_read_offset = 1; + + /* BRW_NEW_URB_FENCE */ + sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; + sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; + sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + sf.thread4.max_threads = 0; + + if (BRW_DEBUG & DEBUG_STATS) + sf.thread4.stats_enable = 1; + + /* CACHE_NEW_SF_VP */ + sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; + sf.sf5.viewport_transform = 1; + + /* BRW_NEW_RASTER */ + if (brw->attribs.Raster->scissor) + sf.sf6.scissor = 1; + +#if 0 + if (brw->attribs.Polygon->FrontFace == GL_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + + if (brw->attribs.Polygon->CullFlag) { + switch (brw->attribs.Polygon->CullFaceMode) { + case GL_FRONT: + sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + break; + case GL_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BACK; + break; + case GL_FRONT_AND_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + default: + assert(0); + break; + } + } + else + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#else + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#endif + + sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); + + sf.sf6.line_endcap_aa_region_width = 1; + if (brw->attribs.Raster->line_smooth) + sf.sf6.aa_enable = 1; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; + + sf.sf6.point_rast_rule = 1; /* opengl conventions */ + + sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; + sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); + sf.sf7.use_point_size_state = !brw->attribs.Raster->point_size_per_vertex; + + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + */ + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + sf.sf7.line_last_pixel_enable = 0; + + /* Set bias for OpenGL rasterization rules: + */ + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + + brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); +} + + +const struct brw_tracked_state brw_sf_unit = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_URB_FENCE), + .cache = (CACHE_NEW_SF_VP | + CACHE_NEW_SF_PROG) + }, + .update = upload_sf_unit +}; + + diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c new file mode 100644 index 00000000000..431b45466a6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -0,0 +1,49 @@ + +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + + + + +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ) +{ + struct tgsi_parse_context parse; + int done = 0; + + tgsi_parse_init( &parse, tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned last = decl->u.DeclarationRange.Last; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + // Broken by crazy wpos init: + //assert( info->nr_regs[decl->Declaration.File] <= last); + + info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], + last+1); + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c new file mode 100644 index 00000000000..95dfce88e4a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -0,0 +1,424 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin <zack@tungstengraphics.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_dump.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL ) \ +do { \ + struct TYPE *x = malloc(sizeof(*x)); \ + memcpy(x, VAL, sizeof(*x) ); \ + return x; \ +} while (0) + +/************************************************************************ + * Blend + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Blend = (struct pipe_blend_state*)blend; + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.BlendColor = *blend_color; + + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Samplers[unit] = sampler; + brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + free(sampler); +} + + +/************************************************************************ + * Depth stencil + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); + brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + + /* XXX: Do I have to duplicate the tokens as well?? + */ + brw_fp->program = *shader; + brw_fp->id = brw_context(pipe)->program_id++; + + brw_shader_info(shader->tokens, + &brw_fp->info); + + tgsi_dump(shader->tokens, 0); + + + return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; + brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + FREE(shader); +} + + +/************************************************************************ + * Vertex shader and other TNL state + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + + /* XXX: Do I have to duplicate the tokens as well?? + */ + brw_vp->program = *shader; + brw_vp->id = brw_context(pipe)->program_id++; + brw_shader_info(shader->tokens, + &brw_vp->info); + + tgsi_dump(shader->tokens, 0); + + return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; + brw->state.dirty.brw |= BRW_NEW_VS; + + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + FREE(shader); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Viewport = *viewport; /* struct copy */ + brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffer( struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer ) +{ + struct brw_context *brw = brw_context(pipe); + brw->vb.vbo_array[index] = buffer; +} + +static void brw_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *element) +{ + /* flush ? */ + struct brw_context *brw = brw_context(pipe); + + assert(index < PIPE_ATTRIB_MAX); + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); + + el.ve0.src_offset = element->src_offset; + el.ve0.src_format = brw_translate_surface_format(element->src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = element->vertex_buffer_index; + + el.ve1.dst_offset = index * 4; + + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (element->nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[index] = el; +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(buf == 0 || index == 0); + + brw->attribs.Constants[shader] = buf; + brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_texture(struct pipe_context *pipe, + unsigned unit, + struct pipe_texture *texture) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Texture[unit] = (struct brw_texture*)texture; /* ptr, not struct */ + + brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FrameBuffer = *fb; /* struct copy */ + + brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + + /* Also pass-through to draw module: + */ + //draw_set_rasterizer_state(brw->draw, setup); + + brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ + brw->pipe.create_blend_state = brw_create_blend_state; + brw->pipe.bind_blend_state = brw_bind_blend_state; + brw->pipe.delete_blend_state = brw_delete_blend_state; + + brw->pipe.create_sampler_state = brw_create_sampler_state; + brw->pipe.bind_sampler_state = brw_bind_sampler_state; + brw->pipe.delete_sampler_state = brw_delete_sampler_state; + + brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + + brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; + brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; + brw->pipe.create_fs_state = brw_create_fs_state; + brw->pipe.bind_fs_state = brw_bind_fs_state; + brw->pipe.delete_fs_state = brw_delete_fs_state; + brw->pipe.create_vs_state = brw_create_vs_state; + brw->pipe.bind_vs_state = brw_bind_vs_state; + brw->pipe.delete_vs_state = brw_delete_vs_state; + + brw->pipe.set_blend_color = brw_set_blend_color; + brw->pipe.set_clip_state = brw_set_clip_state; + brw->pipe.set_constant_buffer = brw_set_constant_buffer; + brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +// brw->pipe.set_feedback_state = brw_set_feedback_state; +// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + + brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; + brw->pipe.set_scissor_state = brw_set_scissor_state; + brw->pipe.set_sampler_texture = brw_set_sampler_texture; + brw->pipe.set_viewport_state = brw_set_viewport_state; + brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; + brw->pipe.set_vertex_element = brw_set_vertex_element; +} diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h new file mode 100644 index 00000000000..258e9a556e2 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.h @@ -0,0 +1,158 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "brw_context.h" +#include "brw_winsys.h" + + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_constant_buffer_state; +const struct brw_tracked_state brw_constant_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple_offset; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_active_vertprog; +const struct brw_tracked_state brw_tnl_vertprog; +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_clear_surface_cache; +const struct brw_tracked_state brw_clear_batch_cache; + +/*********************************************************************** + * brw_state_cache.c + */ +unsigned brw_cache_data(struct brw_cache *cache, + const void *data ); + +unsigned brw_cache_data_sz(struct brw_cache *cache, + const void *data, + unsigned data_sz); + +unsigned brw_upload_cache( struct brw_cache *cache, + const void *key, + unsigned key_sz, + const void *data, + unsigned data_sz, + const void *aux, + void *aux_return ); + +boolean brw_search_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + void *aux_return, + unsigned *offset_return); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); + +static inline struct pipe_buffer *brw_cache_buffer(struct brw_context *brw, + enum brw_cache_id id) +{ + return brw->cache[id].pool->buffer; +} + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +boolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + unsigned sz ); + +void brw_destroy_batch_cache( struct brw_context *brw ); + + +/*********************************************************************** + * brw_state_pool.c + */ +void brw_init_pools( struct brw_context *brw ); +void brw_destroy_pools( struct brw_context *brw ); + +boolean brw_pool_alloc( struct brw_mem_pool *pool, + unsigned size, + unsigned alignment, + unsigned *offset_return); + +void brw_pool_fence( struct brw_context *brw, + struct brw_mem_pool *pool, + unsigned fence ); + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ); + +void brw_clear_all_caches( struct brw_context *brw ); +void brw_invalidate_pools( struct brw_context *brw ); +void brw_clear_batch_cache_flush( struct brw_context *brw ); + + +/* brw_shader_info.c + */ + +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c new file mode 100644 index 00000000000..35db76b5941 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_batch.c @@ -0,0 +1,113 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_state.h" +#include "brw_winsys.h" + +#include "pipe/p_util.h" + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +boolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + unsigned sz ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + struct header *newheader = (struct header *)data; + + if (brw->emit_state_always) { + brw_batchbuffer_data(brw->winsys, data, sz); + return TRUE; + } + + while (item) { + if (item->header->opcode == newheader->opcode) { + if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) + return FALSE; + if (item->sz != sz) { + FREE(item->header); + item->header = MALLOC(sz); + item->sz = sz; + } + goto emit; + } + item = item->next; + } + + assert(!item); + item = CALLOC_STRUCT(brw_cached_batch_item); + item->header = MALLOC(sz); + item->sz = sz; + item->next = brw->cached_batch_items; + brw->cached_batch_items = item; + +emit: + memcpy(item->header, newheader, sz); + brw_batchbuffer_data(brw->winsys, data, sz); + return TRUE; +} + +static void clear_batch_cache( struct brw_context *brw ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + + while (item) { + struct brw_cached_batch_item *next = item->next; + free((void *)item->header); + free(item); + item = next; + } + + brw->cached_batch_items = NULL; + + + brw_clear_all_caches(brw); + + brw_invalidate_pools(brw); +} + +void brw_clear_batch_cache_flush( struct brw_context *brw ) +{ + clear_batch_cache(brw); + +/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ + + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ + clear_batch_cache(brw); +} diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c new file mode 100644 index 00000000000..b3a5124461d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_cache.c @@ -0,0 +1,443 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_state.h" + +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + +#include "pipe/p_util.h" + + + +/*********************************************************************** + * Check cache for uploaded version of struct, else upload new one. + * Fail when memory is exhausted. + * + * XXX: FIXME: Currently search is so slow it would be quicker to + * regenerate the data every time... + */ + +static unsigned hash_key( const void *key, unsigned key_size ) +{ + unsigned *ikey = (unsigned *)key; + unsigned hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} + +static struct brw_cache_item *search_cache( struct brw_cache *cache, + unsigned hash, + const void *key, + unsigned key_size) +{ + struct brw_cache_item *c; + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && + c->key_size == key_size && + memcmp(c->key, key, key_size) == 0) + return c; + } + + return NULL; +} + + +static void rehash( struct brw_cache *cache ) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + unsigned size, i; + + size = cache->size * 3; + items = (struct brw_cache_item**) MALLOC(size * sizeof(*items)); + memset(items, 0, size * sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + + +boolean brw_search_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + void *aux_return, + unsigned *offset_return) +{ + struct brw_cache_item *item; + unsigned addr = 0; + unsigned hash = hash_key(key, key_size); + + item = search_cache(cache, hash, key, key_size); + + if (item) { + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + *offset_return = addr = item->offset; + } + + if (item == NULL || addr != cache->last_addr) { + cache->brw->state.dirty.cache |= 1<<cache->id; + cache->last_addr = addr; + } + + return item != NULL; +} + +unsigned brw_upload_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + const void *data, + unsigned data_size, + const void *aux, + void *aux_return ) +{ + unsigned offset; + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + unsigned hash = hash_key(key, key_size); + void *tmp = MALLOC(key_size + cache->aux_size); + + if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { + /* Should not be possible: + */ + debug_printf("brw_pool_alloc failed\n"); + exit(1); + } + + memcpy(tmp, key, key_size); + + if (cache->aux_size) + memcpy(tmp+key_size, aux, cache->aux_size); + + item->key = tmp; + item->hash = hash; + item->key_size = key_size; + item->offset = offset; + item->data_size = data_size; + + if (++cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + + if (aux_return) { + assert(cache->aux_size); + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + } + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n", + cache->name, + data_size, + (void*)cache->pool->buffer, + offset); + + /* Copy data to the buffer: + */ + cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys, + cache->pool->buffer, + offset, + data_size, + data, + cache->id); + + cache->brw->state.dirty.cache |= 1<<cache->id; + cache->last_addr = offset; + + return offset; +} + +/* This doesn't really work with aux data. Use search/upload instead + */ +unsigned brw_cache_data_sz(struct brw_cache *cache, + const void *data, + unsigned data_size) +{ + unsigned addr; + + if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { + addr = brw_upload_cache(cache, + data, data_size, + data, data_size, + NULL, NULL); + } + + return addr; +} + +unsigned brw_cache_data(struct brw_cache *cache, + const void *data) +{ + return brw_cache_data_sz(cache, data, cache->key_size); +} + +enum pool_type { + DW_SURFACE_STATE, + DW_GENERAL_STATE +}; + +static void brw_init_cache( struct brw_context *brw, + const char *name, + unsigned id, + unsigned key_size, + unsigned aux_size, + enum pool_type pool_type) +{ + struct brw_cache *cache = &brw->cache[id]; + cache->brw = brw; + cache->id = id; + cache->name = name; + cache->items = NULL; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + CALLOC(cache->size, sizeof(struct brw_cache_item)); + + + cache->key_size = key_size; + cache->aux_size = aux_size; + switch (pool_type) { + case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; + case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; + default: assert(0); break; + } +} + +void brw_init_caches( struct brw_context *brw ) +{ + + brw_init_cache(brw, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0, + DW_SURFACE_STATE); + + brw_init_cache(brw, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + sizeof(struct brw_surface_binding_table), + 0, + DW_SURFACE_STATE); +} + + +/* When we lose hardware context, need to invalidate the surface cache + * as these structs must be explicitly re-uploaded. They are subject + * to fixup by the memory manager as they contain absolute agp + * offsets, so we need to ensure there is a fresh version of the + * struct available to receive the fixup. + * + * XXX: Need to ensure that there aren't two versions of a surface or + * bufferobj with different backing data active in the same buffer at + * once? Otherwise the cache could confuse them. Maybe better not to + * cache at all? + * + * --> Isn't this the same as saying need to ensure batch is flushed + * before new data is uploaded to an existing buffer? We + * already try to make sure of that. + */ +static void clear_cache( struct brw_cache *cache ) +{ + struct brw_cache_item *c, *next; + unsigned i; + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + next = c->next; + free((void *)c->key); + free(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; +} + +void brw_clear_all_caches( struct brw_context *brw ) +{ + int i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); + + if (brw->curbe.last_buf) { + FREE(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + } + + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + + + +void brw_destroy_caches( struct brw_context *brw ) +{ + unsigned i; + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c new file mode 100644 index 00000000000..f3174bfe0ae --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -0,0 +1,137 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +/** @file brw_state_pool.c + * Implements the state pool allocator. + * + * For the 965, we create two state pools for state cache entries. Objects + * will be allocated into the pools depending on which state base address + * their pointer is relative to in other 965 state. + * + * The state pools are relatively simple: As objects are allocated, increment + * the offset to allocate space. When the pool is "full" (rather, close to + * full), we reset the pool and reset the state cache entries that point into + * the pool. + */ + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "brw_context.h" +#include "brw_state.h" + +boolean brw_pool_alloc( struct brw_mem_pool *pool, + unsigned size, + unsigned alignment, + unsigned *offset_return) +{ + unsigned fixup = align(pool->offset, alignment) - pool->offset; + + size = align(size, 4); + + if (pool->offset + fixup + size >= pool->size) { + debug_printf("%s failed\n", __FUNCTION__); + assert(0); + exit(0); + } + + pool->offset += fixup; + *offset_return = pool->offset; + pool->offset += size; + + return TRUE; +} + +static +void brw_invalidate_pool( struct brw_mem_pool *pool ) +{ + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("\n\n\n %s \n\n\n", __FUNCTION__); + + pool->offset = 0; + + brw_clear_all_caches(pool->brw); +} + + +static void brw_init_pool( struct brw_context *brw, + unsigned pool_id, + unsigned size ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pool->size = size; + pool->brw = brw; + + pool->buffer = brw->pipe.winsys->buffer_create(brw->pipe.winsys, + 4096, + 0 /* DRM_BO_FLAG_MEM_TT */, + size); +} + +static void brw_destroy_pool( struct brw_context *brw, + unsigned pool_id ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pipe_buffer_reference( pool->brw->pipe.winsys, + &pool->buffer, + NULL ); +} + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ) +{ + if (pool->offset > (pool->size * 3) / 4) { + brw->state.dirty.brw |= BRW_NEW_SCENE; + } + +} + +void brw_init_pools( struct brw_context *brw ) +{ + brw_init_pool(brw, BRW_GS_POOL, 0x80000); + brw_init_pool(brw, BRW_SS_POOL, 0x80000); +} + +void brw_destroy_pools( struct brw_context *brw ) +{ + brw_destroy_pool(brw, BRW_GS_POOL); + brw_destroy_pool(brw, BRW_SS_POOL); +} + + +void brw_invalidate_pools( struct brw_context *brw ) +{ + brw_invalidate_pool(&brw->pool[BRW_GS_POOL]); + brw_invalidate_pool(&brw->pool[BRW_SS_POOL]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c new file mode 100644 index 00000000000..e727601e1e7 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_upload.c @@ -0,0 +1,202 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" + +#include "pipe/p_util.h" + +/* This is used to initialize brw->state.atoms[]. We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +const struct brw_tracked_state *atoms[] = +{ + &brw_vs_prog, + &brw_gs_prog, + &brw_clip_prog, + &brw_sf_prog, + &brw_wm_prog, + + /* Once all the programs are done, we know how large urb entry + * sizes need to be and can decide if we need to change the urb + * layout. + */ + &brw_curbe_offsets, + &brw_recalculate_urb_fence, + + + &brw_cc_vp, + &brw_cc_unit, + + &brw_wm_surfaces, /* must do before samplers */ + &brw_wm_samplers, + + &brw_wm_unit, + &brw_sf_vp, + &brw_sf_unit, + &brw_vs_unit, /* always required, enabled or not */ + &brw_clip_unit, + &brw_gs_unit, + + /* Command packets: + */ + &brw_invarient_state, + &brw_state_base_address, + &brw_pipe_control, + + &brw_binding_table_pointers, + &brw_blend_constant_color, + + &brw_drawing_rect, + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_line_stipple, + + &brw_psp_urb_cbs, + + &brw_constant_buffer +}; + + +void brw_init_state( struct brw_context *brw ) +{ + brw_init_pools(brw); + brw_init_caches(brw); + + brw->state.dirty.brw = ~0; + brw->emit_state_always = 0; +} + + +void brw_destroy_state( struct brw_context *brw ) +{ + brw_destroy_caches(brw); + brw_destroy_batch_cache(brw); + brw_destroy_pools(brw); +} + +/*********************************************************************** + */ + +static boolean check_state( const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + return ((a->brw & b->brw) || + (a->cache & b->cache)); +} + +static void accumulate_state( struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + a->brw |= b->brw; + a->cache |= b->cache; +} + + +static void xor_states( struct brw_state_flags *result, + const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + result->brw = a->brw ^ b->brw; + result->cache = a->cache ^ b->cache; +} + + +/*********************************************************************** + * Emit all state: + */ +void brw_validate_state( struct brw_context *brw ) +{ + struct brw_state_flags *state = &brw->state.dirty; + unsigned i; + + if (brw->emit_state_always) + state->brw |= ~0; + + if (state->cache == 0 && + state->brw == 0) + return; + + if (brw->state.dirty.brw & BRW_NEW_SCENE) + brw_clear_batch_cache_flush(brw); + + if (BRW_DEBUG) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + struct brw_state_flags examined, prev; + memset(&examined, 0, sizeof(examined)); + prev = *state; + + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + struct brw_state_flags generated; + + assert(atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) { + atom->update( brw ); + } + + accumulate_state(&examined, &atom->dirty); + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, &prev, state); + assert(!check_state(&examined, &generated)); + prev = *state; + } + } + else { + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + + assert(atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) + atom->update( brw ); + } + } + + memset(state, 0, sizeof(*state)); +} diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i965simple/brw_strings.c new file mode 100644 index 00000000000..29a41ed1e9b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_strings.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "brw_context.h" +#include "brw_reg.h" + + +static const char *brw_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char *brw_get_name( struct pipe_context *pipe ) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_context(pipe)->pci_id) { + case PCI_CHIP_I965_Q: + chipset = "Intel(R) 965Q"; + break; + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_G_1: + chipset = "Intel(R) 965G"; + break; + case PCI_CHIP_I965_GM: + chipset = "Intel(R) 965GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "Intel(R) 965GME/GLE"; + break; + default: + chipset = "unknown"; + break; + } + + sprintf(buffer, "pipe/i965 (chipset: %s)", chipset); + return buffer; +} + + +void +brw_init_string_functions(struct brw_context *brw) +{ + brw->pipe.get_name = brw_get_name; + brw->pipe.get_vendor = brw_get_vendor; +} diff --git a/src/gallium/drivers/i965simple/brw_structs.h b/src/gallium/drivers/i965simple/brw_structs.h new file mode 100644 index 00000000000..bbb087e95d6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_structs.h @@ -0,0 +1,1348 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +#include "pipe/p_compiler.h" + +/* Command packets: + */ +struct header +{ + unsigned length:16; + unsigned opcode:16; +}; + + +union header_union +{ + struct header bits; + unsigned dword; +}; + +struct brw_3d_control +{ + struct + { + unsigned length:8; + unsigned notify_enable:1; + unsigned pad:3; + unsigned wc_flush_enable:1; + unsigned depth_stall_enable:1; + unsigned operation:2; + unsigned opcode:16; + } header; + + struct + { + unsigned pad:2; + unsigned dest_addr_type:1; + unsigned dest_addr:29; + } dest; + + unsigned dword2; + unsigned dword3; +}; + + +struct brw_3d_primitive +{ + struct + { + unsigned length:8; + unsigned pad:2; + unsigned topology:5; + unsigned indexed:1; + unsigned opcode:16; + } header; + + unsigned verts_per_instance; + unsigned start_vert_location; + unsigned instance_count; + unsigned start_instance_location; + unsigned base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush +{ + unsigned flags:4; + unsigned pad:12; + unsigned opcode:16; +}; + +struct brw_vf_statistics +{ + unsigned statistics_enable:1; + unsigned pad:15; + unsigned opcode:16; +}; + + + +struct brw_binding_table_pointers +{ + struct header header; + unsigned vs; + unsigned gs; + unsigned clp; + unsigned sf; + unsigned wm; +}; + + +struct brw_blend_constant_color +{ + struct header header; + float blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ + union header_union header; + + union { + struct { + unsigned pitch:18; + unsigned format:3; + unsigned pad:4; + unsigned depth_offset_disable:1; + unsigned tile_walk:1; + unsigned tiled_surface:1; + unsigned pad2:1; + unsigned surface_type:3; + } bits; + unsigned dword; + } dword1; + + unsigned dword2_base_addr; + + union { + struct { + unsigned pad:1; + unsigned mipmap_layout:1; + unsigned lod:4; + unsigned width:13; + unsigned height:13; + } bits; + unsigned dword; + } dword3; + + union { + struct { + unsigned pad:12; + unsigned min_array_element:9; + unsigned depth:11; + } bits; + unsigned dword; + } dword4; +}; + +struct brw_drawrect +{ + struct header header; + unsigned xmin:16; + unsigned ymin:16; + unsigned xmax:16; + unsigned ymax:16; + unsigned xorg:16; + unsigned yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ + struct header header; + float depth_offset_clamp; +}; + +struct brw_indexbuffer +{ + union { + struct + { + unsigned length:8; + unsigned index_format:2; + unsigned cut_index_enable:1; + unsigned pad:5; + unsigned opcode:16; + } bits; + unsigned dword; + + } header; + + unsigned buffer_start; + unsigned buffer_end; +}; + + +struct brw_line_stipple +{ + struct header header; + + struct + { + unsigned pattern:16; + unsigned pad:16; + } bits0; + + struct + { + unsigned repeat_count:9; + unsigned pad:7; + unsigned inverse_repeat_count:16; + } bits1; +}; + + +struct brw_pipelined_state_pointers +{ + struct header header; + + struct { + unsigned pad:5; + unsigned offset:27; + } vs; + + struct + { + unsigned enable:1; + unsigned pad:4; + unsigned offset:27; + } gs; + + struct + { + unsigned enable:1; + unsigned pad:4; + unsigned offset:27; + } clp; + + struct + { + unsigned pad:5; + unsigned offset:27; + } sf; + + struct + { + unsigned pad:5; + unsigned offset:27; + } wm; + + struct + { + unsigned pad:5; + unsigned offset:27; /* KW: check me! */ + } cc; +}; + + +struct brw_polygon_stipple_offset +{ + struct header header; + + struct { + unsigned y_offset:5; + unsigned pad:3; + unsigned x_offset:5; + unsigned pad0:19; + } bits0; +}; + + + +struct brw_polygon_stipple +{ + struct header header; + unsigned stipple[32]; +}; + + + +struct brw_pipeline_select +{ + struct + { + unsigned pipeline_select:1; + unsigned pad:15; + unsigned opcode:16; + } header; +}; + + +struct brw_pipe_control +{ + struct + { + unsigned length:8; + unsigned notify_enable:1; + unsigned pad:2; + unsigned instruction_state_cache_flush_enable:1; + unsigned write_cache_flush_enable:1; + unsigned depth_stall_enable:1; + unsigned post_sync_operation:2; + + unsigned opcode:16; + } header; + + struct + { + unsigned pad:2; + unsigned dest_addr_type:1; + unsigned dest_addr:29; + } bits1; + + unsigned data0; + unsigned data1; +}; + + +struct brw_urb_fence +{ + struct + { + unsigned length:8; + unsigned vs_realloc:1; + unsigned gs_realloc:1; + unsigned clp_realloc:1; + unsigned sf_realloc:1; + unsigned vfe_realloc:1; + unsigned cs_realloc:1; + unsigned pad:2; + unsigned opcode:16; + } header; + + struct + { + unsigned vs_fence:10; + unsigned gs_fence:10; + unsigned clp_fence:10; + unsigned pad:2; + } bits0; + + struct + { + unsigned sf_fence:10; + unsigned vf_fence:10; + unsigned cs_fence:10; + unsigned pad:2; + } bits1; +}; + +struct brw_constant_buffer_state /* previously brw_command_streamer */ +{ + struct header header; + + struct + { + unsigned nr_urb_entries:3; + unsigned pad:1; + unsigned urb_entry_size:5; + unsigned pad0:23; + } bits0; +}; + +struct brw_constant_buffer +{ + struct + { + unsigned length:8; + unsigned valid:1; + unsigned pad:7; + unsigned opcode:16; + } header; + + struct + { + unsigned buffer_length:6; + unsigned buffer_address:26; + } bits0; +}; + +struct brw_state_base_address +{ + struct header header; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned general_state_address:27; + } bits0; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned surface_state_address:27; + } bits1; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned indirect_object_state_address:27; + } bits2; + + struct + { + unsigned modify_enable:1; + unsigned pad:11; + unsigned general_state_upper_bound:20; + } bits3; + + struct + { + unsigned modify_enable:1; + unsigned pad:11; + unsigned indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch +{ + struct header header; + + struct + { + unsigned prefetch_count:3; + unsigned pad:3; + unsigned prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer +{ + struct header header; + + struct + { + unsigned pad:4; + unsigned system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + unsigned pad0:1; + unsigned grf_reg_count:3; + unsigned pad1:2; + unsigned kernel_start_pointer:26; +}; + +struct thread1 +{ + unsigned ext_halt_exception_enable:1; + unsigned sw_exception_enable:1; + unsigned mask_stack_exception_enable:1; + unsigned timeout_exception_enable:1; + unsigned illegal_op_exception_enable:1; + unsigned pad0:3; + unsigned depth_coef_urb_read_offset:6; /* WM only */ + unsigned pad1:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad3:5; + unsigned single_program_flow:1; +}; + +struct thread2 +{ + unsigned per_thread_scratch_space:4; + unsigned pad0:6; + unsigned scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + unsigned dispatch_grf_start_reg:4; + unsigned urb_entry_read_offset:6; + unsigned pad0:1; + unsigned urb_entry_read_length:6; + unsigned pad1:1; + unsigned const_urb_entry_read_offset:6; + unsigned pad2:1; + unsigned const_urb_entry_read_length:6; + unsigned pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct + { + unsigned pad0:7; + unsigned sw_exception_enable:1; + unsigned pad1:3; + unsigned mask_stack_exception_enable:1; + unsigned pad2:1; + unsigned illegal_op_exception_enable:1; + unsigned pad3:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad4:5; + unsigned single_program_flow:1; + } thread1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:9; + unsigned gs_output_stats:1; /* not always */ + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:1; /* may be less */ + unsigned pad3:6; + } thread4; + + struct + { + unsigned pad0:13; + unsigned clip_mode:3; + unsigned userclip_enable_flags:8; + unsigned userclip_must_clip:1; + unsigned pad1:1; + unsigned guard_band_enable:1; + unsigned viewport_z_clip_enable:1; + unsigned viewport_xy_clip_enable:1; + unsigned vertex_position_space:1; + unsigned api_mode:1; + unsigned pad2:1; + } clip5; + + struct + { + unsigned pad0:5; + unsigned clipper_viewport_state_ptr:27; + } clip6; + + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ + struct + { + unsigned pad0:3; + unsigned bf_stencil_pass_depth_pass_op:3; + unsigned bf_stencil_pass_depth_fail_op:3; + unsigned bf_stencil_fail_op:3; + unsigned bf_stencil_func:3; + unsigned bf_stencil_enable:1; + unsigned pad1:2; + unsigned stencil_write_enable:1; + unsigned stencil_pass_depth_pass_op:3; + unsigned stencil_pass_depth_fail_op:3; + unsigned stencil_fail_op:3; + unsigned stencil_func:3; + unsigned stencil_enable:1; + } cc0; + + + struct + { + unsigned bf_stencil_ref:8; + unsigned stencil_write_mask:8; + unsigned stencil_test_mask:8; + unsigned stencil_ref:8; + } cc1; + + + struct + { + unsigned logicop_enable:1; + unsigned pad0:10; + unsigned depth_write_enable:1; + unsigned depth_test_function:3; + unsigned depth_test:1; + unsigned bf_stencil_write_mask:8; + unsigned bf_stencil_test_mask:8; + } cc2; + + + struct + { + unsigned pad0:8; + unsigned alpha_test_func:3; + unsigned alpha_test:1; + unsigned blend_enable:1; + unsigned ia_blend_enable:1; + unsigned pad1:1; + unsigned alpha_test_format:1; + unsigned pad2:16; + } cc3; + + struct + { + unsigned pad0:5; + unsigned cc_viewport_state_offset:27; + } cc4; + + struct + { + unsigned pad0:2; + unsigned ia_dest_blend_factor:5; + unsigned ia_src_blend_factor:5; + unsigned ia_blend_function:3; + unsigned statistics_enable:1; + unsigned logicop_func:4; + unsigned pad1:11; + unsigned dither_enable:1; + } cc5; + + struct + { + unsigned clamp_post_alpha_blend:1; + unsigned clamp_pre_alpha_blend:1; + unsigned clamp_range:2; + unsigned pad0:11; + unsigned y_dither_offset:2; + unsigned x_dither_offset:2; + unsigned dest_blend_factor:5; + unsigned src_blend_factor:5; + unsigned blend_function:3; + } cc6; + + struct { + union { + float f; + ubyte ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:6; + unsigned pad3:1; + } thread4; + + struct + { + unsigned front_winding:1; + unsigned viewport_transform:1; + unsigned pad0:3; + unsigned sf_viewport_state_offset:27; + } sf5; + + struct + { + unsigned pad0:9; + unsigned dest_org_vbias:4; + unsigned dest_org_hbias:4; + unsigned scissor:1; + unsigned disable_2x2_trifilter:1; + unsigned disable_zero_pix_trifilter:1; + unsigned point_rast_rule:2; + unsigned line_endcap_aa_region_width:2; + unsigned line_width:4; + unsigned fast_scissor_disable:1; + unsigned cull_mode:2; + unsigned aa_enable:1; + } sf6; + + struct + { + unsigned point_size:11; + unsigned use_point_size_state:1; + unsigned subpixel_precision:1; + unsigned sprite_point:1; + unsigned pad0:11; + unsigned trifan_pv:2; + unsigned linestrip_pv:2; + unsigned tristrip_pv:2; + unsigned line_last_pixel_enable:1; + } sf7; + +}; + + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:1; + unsigned pad3:6; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } gs5; + + + struct + { + unsigned max_vp_index:4; + unsigned pad0:26; + unsigned reorder_enable:1; + unsigned pad1:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:4; + unsigned pad3:3; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } vs5; + + struct + { + unsigned vs_enable:1; + unsigned vert_cache_disable:1; + unsigned pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned stats_enable:1; + unsigned pad0:1; + unsigned sampler_count:3; + unsigned sampler_state_pointer:27; + } wm4; + + struct + { + unsigned enable_8_pix:1; + unsigned enable_16_pix:1; + unsigned enable_32_pix:1; + unsigned pad0:7; + unsigned legacy_global_depth_bias:1; + unsigned line_stipple:1; + unsigned depth_offset:1; + unsigned polygon_stipple:1; + unsigned line_aa_region_width:2; + unsigned line_endcap_aa_region_width:2; + unsigned early_depth_test:1; + unsigned thread_dispatch_enable:1; + unsigned program_uses_depth:1; + unsigned program_computes_depth:1; + unsigned program_uses_killpixel:1; + unsigned legacy_line_rast: 1; + unsigned pad1:1; + unsigned max_threads:6; + unsigned pad2:1; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; +}; + +struct brw_sampler_default_color { + float color[4]; +}; + +struct brw_sampler_state +{ + + struct + { + unsigned shadow_function:3; + unsigned lod_bias:11; + unsigned min_filter:3; + unsigned mag_filter:3; + unsigned mip_filter:2; + unsigned base_level:5; + unsigned pad:1; + unsigned lod_preclamp:1; + unsigned default_color_mode:1; + unsigned pad0:1; + unsigned disable:1; + } ss0; + + struct + { + unsigned r_wrap_mode:3; + unsigned t_wrap_mode:3; + unsigned s_wrap_mode:3; + unsigned pad:3; + unsigned max_lod:10; + unsigned min_lod:10; + } ss1; + + + struct + { + unsigned pad:5; + unsigned default_color_pointer:27; + } ss2; + + struct + { + unsigned pad:19; + unsigned max_aniso:3; + unsigned chroma_key_mode:1; + unsigned chroma_key_index:2; + unsigned chroma_key_enable:1; + unsigned monochrome_filter_width:3; + unsigned monochrome_filter_height:3; + } ss3; +}; + + +struct brw_clipper_viewport +{ + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct brw_cc_viewport +{ + float min_depth; + float max_depth; +}; + +struct brw_sf_viewport +{ + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ + struct { + unsigned cube_pos_z:1; + unsigned cube_neg_z:1; + unsigned cube_pos_y:1; + unsigned cube_neg_y:1; + unsigned cube_pos_x:1; + unsigned cube_neg_x:1; + unsigned pad:4; + unsigned mipmap_layout_mode:1; + unsigned vert_line_stride_ofs:1; + unsigned vert_line_stride:1; + unsigned color_blend:1; + unsigned writedisable_blue:1; + unsigned writedisable_green:1; + unsigned writedisable_red:1; + unsigned writedisable_alpha:1; + unsigned surface_format:9; + unsigned data_return_format:1; + unsigned pad0:1; + unsigned surface_type:3; + } ss0; + + struct { + unsigned base_addr; + } ss1; + + struct { + unsigned pad:2; + unsigned mip_count:4; + unsigned width:13; + unsigned height:13; + } ss2; + + struct { + unsigned tile_walk:1; + unsigned tiled_surface:1; + unsigned pad:1; + unsigned pitch:18; + unsigned depth:11; + } ss3; + + struct { + unsigned pad:19; + unsigned min_array_elt:9; + unsigned min_lod:4; + } ss4; +}; + + + +struct brw_vertex_buffer_state +{ + struct { + unsigned pitch:11; + unsigned pad:15; + unsigned access_type:1; + unsigned vb_index:5; + } vb0; + + unsigned start_addr; + unsigned max_index; +#if 1 + unsigned instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ + struct + { + unsigned src_offset:11; + unsigned pad:5; + unsigned src_format:9; + unsigned pad0:1; + unsigned valid:1; + unsigned vertex_buffer_index:5; + } ve0; + + struct + { + unsigned dst_offset:8; + unsigned pad:8; + unsigned vfcomponent3:4; + unsigned vfcomponent2:4; + unsigned vfcomponent1:4; + unsigned vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + unsigned opcode:7; + unsigned pad:1; + unsigned access_mode:1; + unsigned mask_control:1; + unsigned dependency_control:2; + unsigned compression_control:2; + unsigned thread_control:2; + unsigned predicate_control:4; + unsigned predicate_inverse:1; + unsigned execution_size:3; + unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned pad0:2; + unsigned debug_control:1; + unsigned saturate:1; + } header; + + union { + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_subreg_nr:5; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad0:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:1; + unsigned dest_reg_nr:8; + unsigned pad1:2; + unsigned dest_address_mode:1; + } da16; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad0:6; + unsigned dest_writemask:4; + int dest_indirect_offset:6; + unsigned dest_subreg_nr:3; + unsigned pad1:2; + unsigned dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + unsigned src0_subreg_nr:5; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad:6; + } da1; + + struct + { + int src0_indirect_offset:10; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad:6; + } ia1; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + unsigned src0_subreg_nr:1; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } da16; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + int src0_indirect_offset:6; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } ia16; + + } bits2; + + union + { + struct + { + unsigned src1_subreg_nr:5; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad0:7; + } da1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + unsigned src1_subreg_nr:1; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } ia1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + int src1_indirect_offset:6; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad2:6; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + unsigned pop_count:4; + unsigned pad0:12; + } if_else; + + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned pad0:8; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } math; + + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned return_format:2; + unsigned msg_type:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler; + + struct brw_urb_immediate urb; + + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read; + + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned pixel_scoreboard_clear:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_write; + + struct { + unsigned pad:16; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } generic; + + int d; + unsigned ud; + } bits3; +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c new file mode 100644 index 00000000000..518845e4b2e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" + + +/* + * XXX note: same as code in sp_surface.c + */ +static struct pipe_surface * +brw_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct brw_texture *tex = (struct brw_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face] * pt->cpp; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice] * pt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->format); + assert(ps->refcount); + pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = tex->pitch; + ps->offset = offset; + } + return ps; +} + + +/* Upload data to a rectangular sub-region. Lots of choices how to do this: + * + * - memcpy by span to current destination + * - upload data as new buffer and blit + * + * Currently always memcpy. + */ +static void +brw_surface_data(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + const void *src, unsigned src_pitch, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + pipe_copy_rect(pipe_surface_map(dst) + dst->offset, + dst->cpp, dst->pitch, + dstx, dsty, width, height, src, src_pitch, srcx, srcy); + + pipe_surface_unmap(dst); +} + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +brw_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert(dst != src); + assert(dst->cpp == src->cpp); + + if (0) { + pipe_copy_rect(pipe_surface_map(dst) + dst->offset, + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src) + src->offset, + do_flip ? -src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); + } + else { + brw_copy_blit(brw_context(pipe), + do_flip, + dst->cpp, + (short) src->pitch, src->buffer, src->offset, FALSE, + (short) dst->pitch, dst->buffer, dst->offset, FALSE, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, + (short) width, (short) height, PIPE_LOGICOP_COPY); + } +} + +/* Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +static void +brw_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + if (0) { + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + switch (dst->cpp) { + case 1: { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); + } + else { + brw_fill_blit(brw_context(pipe), + dst->cpp, + (short) dst->pitch, + dst->buffer, dst->offset, FALSE, + (short) dstx, (short) dsty, + (short) width, (short) height, + value); + } +} + +void +brw_init_surface_functions(struct brw_context *brw) +{ + brw->pipe.get_tex_surface = brw_get_tex_surface; + brw->pipe.surface_copy = brw_surface_copy; + brw->pipe.surface_fill = brw_surface_fill; +} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c new file mode 100644 index 00000000000..90561f1307d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -0,0 +1,353 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "brw_tex_layout.h" + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" + +#include "brw_context.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +#if 0 +unsigned intel_compressed_alignment(unsigned internalFormat) +{ + unsigned alignment = 4; + + switch (internalFormat) { + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + alignment = 8; + break; + + default: + break; + } + + return alignment; +} +#endif + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static boolean brw_miptree_layout(struct pipe_context *, struct brw_texture *); + +static void intel_miptree_set_image_offset(struct brw_texture *tex, + unsigned level, + unsigned img, + unsigned x, unsigned y) +{ + struct pipe_texture *pt = &tex->base; + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = (x + y * tex->pitch) * pt->cpp; +} + +static void intel_miptree_set_level_info(struct brw_texture *tex, + unsigned level, + unsigned nr_images, + unsigned x, unsigned y, + unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + tex->level_offset[level] = (x + y * tex->pitch) * pt->cpp; + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + +static void i945_miptree_layout_2d(struct brw_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned align_h = 2, align_w = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + + tex->pitch = pt->width[0]; + +#if 0 + if (pt->compressed) { + align_w = intel_compressed_alignment(pt->internal_format); + tex->pitch = ALIGN(pt->width[0], align_w); + } +#endif + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_width; + + if (pt->compressed) { + mip1_width = align(minify(pt->width[0]), align_w) + + align(minify(minify(pt->width[0])), align_w); + } else { + mip1_width = align(minify(pt->width[0]), align_w) + + minify(minify(pt->width[0])); + } + + if (mip1_width > tex->pitch) { + tex->pitch = mip1_width; + } + } + + /* Pitch must be a whole number of dwords, even though we + * express it in texels. + */ + tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp; + tex->total_height = 0; + + for (level = 0; level <= pt->last_level; level++) { + unsigned img_height; + + intel_miptree_set_level_info(tex, level, 1, x, y, width, + height, 1); + + if (pt->compressed) + img_height = MAX2(1, height/4); + else + img_height = align(height, align_h); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_height = MAX2(tex->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == 1) { + x += align(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } +} + +static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + /* XXX: these vary depending on image format: + */ +/* int align_w = 4; */ + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_3D: { + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + unsigned level; + unsigned align_h = 2; + unsigned align_w = 4; + + tex->total_height = 0; +#if 0 + if (pt->compressed) { + align_w = intel_compressed_alignment(pt->internal_format); + pt->pitch = align(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else +#endif + { + tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp; + pack_y_pitch = align(pt->height[0], align_h); + } + + pack_x_pitch = tex->pitch; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; + int x = 0; + int y = 0; + uint q, j; + + intel_miptree_set_level_info(tex, level, nr_images, + 0, tex->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + intel_miptree_set_image_offset(tex, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_height += y; + width = minify(width); + height = minify(height); + depth = minify(depth); + + if (pt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= tex->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = align(pack_y_pitch, align_h); + } + } + + } + break; + } + + default: + i945_miptree_layout_2d(tex); + break; + } +#if 0 + PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + pt->pitch, + pt->total_height, + pt->cpp, + pt->pitch * pt->total_height * pt->cpp ); +#endif + + return TRUE; +} + + +struct pipe_texture * +brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +{ + struct brw_texture *tex = CALLOC_STRUCT(brw_texture); + + if (tex) { + tex->base = *templat; + + if (brw_miptree_layout(pipe, tex)) + tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + return &tex->base; +} + +void +brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct brw_texture *tex = (struct brw_texture *)*pt; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + pipe_buffer_reference(pipe->winsys, &tex->buffer, NULL); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + free(tex->image_offset[i]); + + free(tex); + } + *pt = NULL; +} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h new file mode 100644 index 00000000000..cfd6b1ef3ae --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -0,0 +1,15 @@ +#ifndef BRW_TEX_LAYOUT_H +#define BRW_TEX_LAYOUT_H + +#include "pipe/p_compiler.h" + +struct pipe_context; +struct pipe_texture; + +extern struct pipe_texture * +brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat); + +extern void +brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_urb.c b/src/gallium/drivers/i965simple/brw_urb.c new file mode 100644 index 00000000000..101a4367b90 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_urb.c @@ -0,0 +1,186 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +//#include "brw_state.h" +#include "brw_batch.h" +#include "brw_defines.h" + +#define VS 0 +#define GS 1 +#define CLP 2 +#define SF 3 +#define CS 4 + +/* XXX: Are the min_entry_size numbers useful? + * XXX: Verify min_nr_entries, esp for VS. + * XXX: Verify SF min_entry_size. + */ +static const struct { + unsigned min_nr_entries; + unsigned preferred_nr_entries; + unsigned min_entry_size; + unsigned max_entry_size; +} limits[CS+1] = { + { 8, 32, 1, 5 }, /* vs */ + { 4, 8, 1, 5 }, /* gs */ + { 6, 8, 1, 5 }, /* clp */ + { 1, 8, 1, 12 }, /* sf */ + { 1, 4, 1, 32 } /* cs */ +}; + + +static boolean check_urb_layout( struct brw_context *brw ) +{ + brw->urb.vs_start = 0; + brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; + brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; + brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; + brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; + + return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; +} + +/* Most minimal update, forces re-emit of URB fence packet after GS + * unit turned on/off. + */ +static void recalculate_urb_fence( struct brw_context *brw ) +{ + unsigned csize = brw->curbe.total_size; + unsigned vsize = brw->vs.prog_data->urb_entry_size; + unsigned sfsize = brw->sf.prog_data->urb_entry_size; + + if (csize < limits[CS].min_entry_size) + csize = limits[CS].min_entry_size; + + if (vsize < limits[VS].min_entry_size) + vsize = limits[VS].min_entry_size; + + if (sfsize < limits[SF].min_entry_size) + sfsize = limits[SF].min_entry_size; + + if (brw->urb.vsize < vsize || + brw->urb.sfsize < sfsize || + brw->urb.csize < csize || + (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || + brw->urb.sfsize > brw->urb.sfsize || + brw->urb.csize > brw->urb.csize))) { + + + brw->urb.csize = csize; + brw->urb.sfsize = sfsize; + brw->urb.vsize = vsize; + + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; + + if (!check_urb_layout(brw)) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + + brw->urb.constrained = 1; + + if (!check_urb_layout(brw)) { + /* This is impossible, given the maximal sizes of urb + * entries and the values for minimum nr of entries + * provided above. + */ + debug_printf("couldn't calculate URB layout!\n"); + exit(1); + } + + if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + debug_printf("URB CONSTRAINED\n"); + } + else + brw->urb.constrained = 0; + + if (BRW_DEBUG & DEBUG_URB) + debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + brw->urb.vs_start, + brw->urb.gs_start, + brw->urb.clip_start, + brw->urb.sf_start, + brw->urb.cs_start, + 256); + + brw->state.dirty.brw |= BRW_NEW_URB_FENCE; + } +} + + +const struct brw_tracked_state brw_recalculate_urb_fence = { + .dirty = { + .brw = BRW_NEW_CURBE_OFFSETS, + .cache = (CACHE_NEW_VS_PROG | + CACHE_NEW_SF_PROG) + }, + .update = recalculate_urb_fence +}; + + + + + +void brw_upload_urb_fence(struct brw_context *brw) +{ + struct brw_urb_fence uf; + memset(&uf, 0, sizeof(uf)); + + uf.header.opcode = CMD_URB_FENCE; + uf.header.length = sizeof(uf)/4-2; + uf.header.vs_realloc = 1; + uf.header.gs_realloc = 1; + uf.header.clp_realloc = 1; + uf.header.sf_realloc = 1; + uf.header.vfe_realloc = 1; + uf.header.cs_realloc = 1; + + /* The ordering below is correct, not the layout in the + * instruction. + * + * There are 256 urb reg pairs in total. + */ + uf.bits0.vs_fence = brw->urb.gs_start; + uf.bits0.gs_fence = brw->urb.clip_start; + uf.bits0.clp_fence = brw->urb.sf_start; + uf.bits1.sf_fence = brw->urb.cs_start; + uf.bits1.cs_fence = 256; + + BRW_BATCH_STRUCT(brw, &uf); +} diff --git a/src/gallium/drivers/i965simple/brw_util.c b/src/gallium/drivers/i965simple/brw_util.c new file mode 100644 index 00000000000..42391d7c8c5 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.c @@ -0,0 +1,104 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_util.h" +#include "brw_defines.h" + +#include "pipe/p_defines.h" + +unsigned brw_count_bits( unsigned val ) +{ + unsigned i; + for (i = 0; val ; val >>= 1) + if (val & 1) + i++; + return i; +} + + +unsigned brw_translate_blend_equation( int mode ) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BRW_BLENDFUNCTION_ADD; + case PIPE_BLEND_MIN: + return BRW_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: + return BRW_BLENDFUNCTION_MAX; + case PIPE_BLEND_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; + } +} + +unsigned brw_translate_blend_factor( int factor ) +{ + switch(factor) { + case PIPE_BLENDFACTOR_ZERO: + return BRW_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BRW_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } +} diff --git a/src/gallium/drivers/i965simple/brw_util.h b/src/gallium/drivers/i965simple/brw_util.h new file mode 100644 index 00000000000..d60e5934dba --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.h @@ -0,0 +1,43 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_UTIL_H +#define BRW_UTIL_H + +#include "pipe/p_state.h" + +extern unsigned brw_count_bits( unsigned val ); +extern unsigned brw_translate_blend_factor( int factor ); +extern unsigned brw_translate_blend_equation( int mode ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c new file mode 100644 index 00000000000..738c6346d5e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.c @@ -0,0 +1,120 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_util.h" +#include "brw_state.h" + + +static void do_vs_prog( struct brw_context *brw, + const struct brw_vertex_program *vp, + struct brw_vs_prog_key *key ) +{ + unsigned program_size; + const unsigned *program; + struct brw_vs_compile c; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + + brw_init_compile(&c.func); + c.vp = vp; + + c.prog_data.outputs_written = vp->program.num_outputs; + c.prog_data.inputs_read = vp->program.num_inputs; + +#if 0 + if (c.key.copy_edgeflag) { + c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE; + c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; + } +#endif + + /* Emit GEN4 code. + */ + brw_vs_emit(&c); + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* + */ + brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->vs.prog_data); +} + + +static void brw_upload_vs_prog( struct brw_context *brw ) +{ + struct brw_vs_prog_key key; + const struct brw_vertex_program *vp = brw->attribs.VertexProgram; + + assert(vp); + + memset(&key, 0, sizeof(key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key.program_string_id = vp->id; + key.nr_userclip = brw->attribs.Clip.nr; + key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || + brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_VS_PROG], + &key, sizeof(key), + &brw->vs.prog_data, + &brw->vs.prog_gs_offset)) + return; + + do_vs_prog(brw, vp, &key); +} + + +/* See brw_vs.c: + */ +const struct brw_tracked_state brw_vs_prog = { + .dirty = { + .brw = BRW_NEW_VS, + .cache = 0 + }, + .update = brw_upload_vs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h new file mode 100644 index 00000000000..0e58f043b02 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.h @@ -0,0 +1,82 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_VS_H +#define BRW_VS_H + + +#include "brw_context.h" +#include "brw_eu.h" + + +struct brw_vs_prog_key { + unsigned program_string_id; + unsigned nr_userclip:4; + unsigned copy_edgeflag:1; + unsigned know_w_is_one:1; + unsigned pad:26; +}; + + +struct brw_vs_compile { + struct brw_compile func; + struct brw_vs_prog_key key; + struct brw_vs_prog_data prog_data; + + struct brw_vertex_program *vp; + + unsigned nr_inputs; + + unsigned first_output; + unsigned nr_outputs; + + unsigned first_tmp; + unsigned last_tmp; + + struct brw_reg r0; + struct brw_reg r1; + struct brw_reg regs[12][128]; + struct brw_reg tmp; + struct brw_reg stack; + + struct { + boolean used_in_src; + struct brw_reg reg; + } output_regs[128]; + + struct brw_reg userplane[6]; + +}; + +void brw_vs_emit( struct brw_vs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c new file mode 100644 index 00000000000..98915ba1016 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -0,0 +1,1332 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_vs.h" + +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + +struct brw_prog_info { + unsigned num_temps; + unsigned num_addrs; + unsigned num_consts; + + unsigned writes_psize; + + unsigned pos_idx; + unsigned result_edge_idx; + unsigned edge_flag_idx; + unsigned psize_idx; +}; + +/* Do things as simply as possible. Allocate and populate all regs + * ahead of time. + */ +static void brw_vs_alloc_regs( struct brw_vs_compile *c, + struct brw_prog_info *info ) +{ + unsigned i, reg = 0, mrf; + unsigned nr_params; + + /* r0 -- reserved as usual + */ + c->r0 = brw_vec8_grf(reg, 0); reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); + } + + /* Deal with curbe alignment: + */ + reg += ((6+c->key.nr_userclip+3)/4)*2; + } + + /* Vertex program parameters from curbe: + */ + nr_params = c->prog_data.max_const; + for (i = 0; i < nr_params; i++) { + c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params+1)/2; + c->prog_data.curb_read_length = reg - 1; + + + + /* Allocate input regs: + */ + c->nr_inputs = c->vp->program.num_inputs; + for (i = 0; i < c->nr_inputs; i++) { + c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + + + /* Allocate outputs: TODO: could organize the non-position outputs + * to go straight into message regs. + */ + c->nr_outputs = 0; + c->first_output = reg; + mrf = 4; + for (i = 0; i < c->vp->program.num_outputs; i++) { + c->nr_outputs++; +#if 0 + if (i == VERT_RESULT_HPOS) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + else if (i == VERT_RESULT_PSIZ) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#else + /*treat pos differently for now */ + if (i == info->pos_idx) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#endif + } + + /* Allocate program temporaries: + */ + for (i = 0; i < info->num_temps; i++) { + c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); + reg++; + } + + /* Address reg(s). Don't try to use the internal address reg until + * deref time. + */ + for (i = 0; i < info->num_addrs; i++) { + c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, + reg, + 0, + BRW_REGISTER_TYPE_D, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + TGSI_WRITEMASK_X); + reg++; + } + + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + + + /* Some opcodes need an internal temporary: + */ + c->first_tmp = reg; + c->last_tmp = reg; /* for allocation purposes */ + + /* Each input reg holds data from two vertices. The + * urb_read_length is the number of registers read from *each* + * vertex urb, so is half the amount: + */ + c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + + c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; + c->prog_data.total_grf = reg; +} + + +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +static void unalias1( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg )) +{ + if (dst.file == arg0.file && dst.nr == arg0.nr) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0); + } +} + +static void unalias2( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) || + (dst.file == arg1.file && dst.nr == arg1.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0, arg1); + } +} + +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + unsigned cond) +{ + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_pop_insn_state(p); +} + +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} + +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} +static void emit_slt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} + +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); +} + +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} + +static void emit_sge( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); +} + +static void emit_max( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg1, arg0); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void emit_min( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +static void emit_math1( struct brw_vs_compile *c, + unsigned function, + struct brw_reg dst, + struct brw_reg arg0, + unsigned precision) +{ + /* There are various odd behaviours with SEND on the simulator. In + * addition there are documented issues with the fact that the GEN4 + * processor doesn't do dependency control properly on SEND + * results. So, on balance, this kludge to get around failures + * with writemasked math results looks like it might be necessary + * whether that turns out to be a simulator bug or not: + */ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + +static void emit_math2( struct brw_vs_compile *c, + unsigned function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + unsigned precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_message_reg(3), arg1); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +static void emit_exp_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + + /* tmp_d = floor(arg0.x) */ + brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); + + /* result[0] = 2.0 ^ tmp */ + + /* Adjust exponent for floating point: + * exp += 127 + */ + brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127)); + + /* Install exponent and sign. + * Excess drops off the edge: + */ + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X), + tmp_d, brw_imm_d(23)); + + release_tmp(c, tmp); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) { + /* result[1] = arg0.x - floor(arg0.x) */ + brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { + /* As with the LOG instruction, we might be better off just + * doing a taylor expansion here, seeing as we have to do all + * the prep work. + * + * If mathbox partial precision is too low, consider also: + * result[3] = result[0] * EXP(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_EXP, + brw_writemask(dst, TGSI_WRITEMASK_Z), + brw_swizzle1(arg0, 0), + BRW_MATH_PRECISION_PARTIAL); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1)); + } +} + + +static void emit_log_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) { + tmp = get_tmp(c); + tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + } + + /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt + * according to spec: + * + * These almost look likey they could be joined up, but not really + * practical: + * + * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 + * result[1].i = (x.i & ((1<<23)-1) + (127<<23) + */ + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) { + brw_AND(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_X), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1U<<31)-1)); + + brw_SHR(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_X), + tmp_ud, + brw_imm_ud(23)); + + brw_ADD(p, + brw_writemask(tmp, TGSI_WRITEMASK_X), + retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ + brw_imm_d(-127)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) { + brw_AND(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1<<23)-1)); + + brw_OR(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), + tmp_ud, + brw_imm_ud(127<<23)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { + /* result[2] = result[0] + LOG2(result[1]); */ + + /* Why bother? The above is just a hint how to do this with a + * taylor series. Maybe we *should* use a taylor series as by + * the time all the above has been done it's almost certainly + * quicker than calling the mathbox, even with low precision. + * + * Options are: + * - result[0] + mathbox.LOG2(result[1]) + * - mathbox.LOG2(arg0.x) + * - result[0] + inline_taylor_approx(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_LOG, + brw_writemask(tmp, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 1), + BRW_MATH_PRECISION_FULL); + + brw_ADD(p, + brw_writemask(tmp, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(tmp, 0)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1)); + } + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + + +/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 + */ +static void emit_dst_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1) +{ + struct brw_compile *p = &c->func; + + /* There must be a better way to do this: + */ + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1); +} + +static void emit_xpd( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg t, + struct brw_reg u) +{ + brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); + brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); +} + + + +static void emit_lit_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1)); + + /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_8); + { + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); + brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + emit_math2(c, + BRW_MATH_FUNCTION_POW, + brw_writemask(dst, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(arg0, 3), + BRW_MATH_PRECISION_PARTIAL); + } + + brw_ENDIF(p, if_insn); +} + + + + + +/* TODO: relative addressing! + */ +static struct brw_reg get_reg( struct brw_vs_compile *c, + unsigned file, + unsigned index ) +{ + switch (file) { + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + case TGSI_FILE_CONSTANT: + assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm]; + case TGSI_FILE_IMMEDIATE: + assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index]; + case TGSI_FILE_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case TGSI_FILE_NULL: /* undef values */ + return brw_null_reg(); + + default: + assert(0); + return brw_null_reg(); + } +} + + + +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + int offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + return vec8(tmp); +} + + +static void emit_arl( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_RNDD(p, tmp, arg0); + brw_MUL(p, dst, tmp, brw_imm_d(16)); + + if (need_tmp) + release_tmp(c, tmp); +} + + +/* Will return mangled results for SWZ op. The emit_swz() function + * ignores this result and recalculates taking extended swizzles into + * account. + */ +static struct brw_reg get_arg( struct brw_vs_compile *c, + struct tgsi_src_register *src ) +{ + struct brw_reg reg; + + if (src->File == TGSI_FILE_NULL) + return brw_null_reg(); + +#if 0 + if (src->RelAddr) + reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); + else +#endif + reg = get_reg(c, src->File, src->Index); + + /* Convert 3-bit swizzle to 2-bit. + */ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX, + src->SwizzleY, + src->SwizzleZ, + src->SwizzleW); + + /* Note this is ok for non-swizzle instructions: + */ + reg.negate = src->Negate ? 1 : 0; + + return reg; +} + + +static struct brw_reg get_dst( struct brw_vs_compile *c, + const struct tgsi_dst_register *dst ) +{ + struct brw_reg reg = get_reg(c, dst->File, dst->Index); + + reg.dw1.bits.writemask = dst->WriteMask; + + return reg; +} + + + + +static void emit_swz( struct brw_vs_compile *c, + struct brw_reg dst, + struct tgsi_src_register src ) +{ + struct brw_compile *p = &c->func; + unsigned zeros_mask = 0; + unsigned ones_mask = 0; + unsigned src_mask = 0; + ubyte src_swz[4]; + boolean need_tmp = (src.Negate && + dst.file != BRW_GENERAL_REGISTER_FILE); + struct brw_reg tmp = dst; + unsigned i; + + if (need_tmp) + tmp = get_tmp(c); + + for (i = 0; i < 4; i++) { + if (dst.dw1.bits.writemask & (1<<i)) { + ubyte s = 0; + switch(i) { + case 0: + s = src.SwizzleX; + break; + s = src.SwizzleY; + case 1: + break; + s = src.SwizzleZ; + case 2: + break; + s = src.SwizzleW; + case 3: + break; + } + switch (s) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + src_mask |= 1<<i; + src_swz[i] = s; + break; + case TGSI_EXTSWIZZLE_ZERO: + zeros_mask |= 1<<i; + break; + case TGSI_EXTSWIZZLE_ONE: + ones_mask |= 1<<i; + break; + } + } + } + + /* Do src first, in case dst aliases src: + */ + if (src_mask) { + struct brw_reg arg0; + +#if 0 + if (src.RelAddr) + arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); + else +#endif + arg0 = get_reg(c, src.File, src.Index); + + arg0 = brw_swizzle(arg0, + src_swz[0], src_swz[1], + src_swz[2], src_swz[3]); + + brw_MOV(p, brw_writemask(tmp, src_mask), arg0); + } + + if (zeros_mask) + brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); + + if (ones_mask) + brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); + + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +/* Post-vertex-program processing. Send the results to the URB. + */ +static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info) +{ + struct brw_compile *p = &c->func; + struct brw_reg m0 = brw_message_reg(0); + struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx]; + struct brw_reg ndc; + + if (c->key.copy_edgeflag) { + brw_MOV(p, + get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx), + get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx)); + } + + + /* Build ndc coords? TODO: Shortcircuit when w is known to be one. + */ + if (!c->key.know_w_is_one) { + ndc = get_tmp(c); + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc); + } + else { + ndc = pos; + } + + /* This includes the workaround for -ve rhw, so is no longer an + * optional step: + */ + if (info->writes_psize || + c->key.nr_userclip || + !c->key.know_w_is_one) + { + struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + unsigned i; + + brw_MOV(p, header1, brw_imm_ud(0)); + + brw_set_access_mode(p, BRW_ALIGN_16); + + if (info->writes_psize) { + struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx]; + brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W), + brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, + brw_imm_ud(0x7ff<<8)); + } + + + for (i = 0; i < c->key.nr_userclip; i++) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); + brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (!c->key.know_w_is_one) { + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); + brw_set_access_mode(p, BRW_ALIGN_16); + + release_tmp(c, header1); + } + else { + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + } + + + /* Emit the (interleaved) headers for the two vertices - an 8-reg + * of zeros followed by two sets of NDC coordinates: + */ + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 0, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + c->nr_outputs + 3, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + +} + +static void +post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +{ + struct tgsi_parse_context parse; + const struct tgsi_token *tokens = c->vp->program.tokens; + tgsi_parse_init(&parse, tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { +#if 0 + struct brw_instruction *brw_inst1, *brw_inst2; + const struct tgsi_full_instruction *inst1, *inst2; + int offset; + inst1 = &parse.FullToken.FullInstruction; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BRA: + target_insn = inst1->BranchTarget; + inst2 = &c->vp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + case TGSI_OPCODE_END: + offset = end_inst - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } +#endif + } + } + tgsi_parse_free(&parse); +} + +static void process_declaration(const struct tgsi_full_declaration *decl, + struct brw_prog_info *info) +{ + int first = decl->u.DeclarationRange.First; + int last = decl->u.DeclarationRange.Last; + + assert (decl->Declaration.Declare != TGSI_DECLARE_MASK); + + switch(decl->Declaration.File) { + case TGSI_FILE_CONSTANT: + info->num_consts += last - first + 1; + break; + case TGSI_FILE_INPUT: { + } + break; + case TGSI_FILE_OUTPUT: { + assert(last == first); /* for now */ + if (decl->Declaration.Semantic) { + switch (decl->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: { + info->pos_idx = first; + } + break; + case TGSI_SEMANTIC_COLOR: + break; + case TGSI_SEMANTIC_BCOLOR: + break; + case TGSI_SEMANTIC_FOG: + break; + case TGSI_SEMANTIC_PSIZE: { + info->writes_psize = TRUE; + info->psize_idx = first; + } + break; + case TGSI_SEMANTIC_GENERIC: + break; + } + } + } + break; + case TGSI_FILE_TEMPORARY: { + info->num_temps += (last - first) + 1; + } + break; + case TGSI_FILE_SAMPLER: { + } + break; + case TGSI_FILE_ADDRESS: { + info->num_addrs += (last - first) + 1; + } + break; + case TGSI_FILE_IMMEDIATE: { + } + break; + case TGSI_FILE_NULL: { + } + break; + } +} + +static void process_instruction(struct brw_vs_compile *c, + struct tgsi_full_instruction *inst, + struct brw_prog_info *info) +{ + struct brw_reg args[3], dst; + struct brw_compile *p = &c->func; + struct brw_indirect stack_index = brw_indirect(0, 0); + unsigned i; + unsigned index; + unsigned file; + /*FIXME: might not be the only one*/ + const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister; + /* + struct brw_instruction *if_inst[MAX_IFSN]; + unsigned insn, if_insn = 0; + */ + + for (i = 0; i < 3; i++) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + index = src->SrcRegister.Index; + file = src->SrcRegister.File; + if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) + args[i] = c->output_regs[index].reg; + else + args[i] = get_arg(c, &src->SrcRegister); + } + + /* Get dest regs. Note that it is possible for a reg to be both + * dst and arg, given the static allocation of registers. So + * care needs to be taken emitting multi-operation instructions. + */ + index = dst_reg->Index; + file = dst_reg->File; + if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) + dst = c->output_regs[index].reg; + else + dst = get_dst(c, dst_reg); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + brw_MOV(p, dst, brw_abs(args[0])); + break; + case TGSI_OPCODE_ADD: + brw_ADD(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP3: + brw_DP3(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP4: + brw_DP4(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DPH: + brw_DPH(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DST: + unalias2(c, dst, args[0], args[1], emit_dst_noalias); + break; + case TGSI_OPCODE_EXP: + unalias1(c, dst, args[0], emit_exp_noalias); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_ARL: + emit_arl(c, dst, args[0]); + break; + case TGSI_OPCODE_FLR: + brw_RNDD(p, dst, args[0]); + break; + case TGSI_OPCODE_FRC: + brw_FRC(p, dst, args[0]); + break; + case TGSI_OPCODE_LOG: + unalias1(c, dst, args[0], emit_log_noalias); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_LIT: + unalias1(c, dst, args[0], emit_lit_noalias); + break; + case TGSI_OPCODE_MAD: + brw_MOV(p, brw_acc_reg(), args[2]); + brw_MAC(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MAX: + emit_max(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MIN: + emit_min(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MOV: +#if 0 + case TGSI_OPCODE_SWZ: + /* The args[0] value can't be used here as it won't have + * correctly encoded the full swizzle: + */ + emit_swz(c, dst, inst->SrcReg[0] ); +#endif + brw_MOV(p, dst, args[0]); + break; + case TGSI_OPCODE_MUL: + brw_MUL(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_POW: + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + + case TGSI_OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGE: + emit_sge(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLT: + emit_slt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SUB: + brw_ADD(p, dst, args[0], negate(args[1])); + break; + case TGSI_OPCODE_XPD: + emit_xpd(p, dst, args[0], args[1]); + break; +#if 0 + case TGSI_OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case TGSI_OPCODE_BRA: + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control_flag_value(p, 0xff); + break; + case TGSI_OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; +#endif + case TGSI_OPCODE_RET: +#if 0 + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); +#else + /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/ +#endif + break; + case TGSI_OPCODE_END: + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + default: + debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); + break; + } + + if (dst_reg->File == TGSI_FILE_OUTPUT + && dst_reg->Index != info->pos_idx + && c->output_regs[dst_reg->Index].used_in_src) + brw_MOV(p, get_dst(c, dst_reg), dst); + + release_tmps(c); +} + +/* Emit the fragment program instructions here. + */ +void brw_vs_emit(struct brw_vs_compile *c) +{ +#define MAX_IFSN 32 + struct brw_compile *p = &c->func; + struct brw_instruction *end_inst; + struct tgsi_parse_context parse; + struct brw_indirect stack_index = brw_indirect(0, 0); + const struct tgsi_token *tokens = c->vp->program.tokens; + struct brw_prog_info prog_info; + unsigned allocated_registers = 0; + memset(&prog_info, 0, sizeof(struct brw_prog_info)); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_access_mode(p, BRW_ALIGN_16); + + tgsi_parse_init(&parse, tokens); + /* Message registers can't be read, so copy the output into GRF register + if they are used in source registers */ + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + unsigned i; + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: { + const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + for (i = 0; i < 3; ++i) { + const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister; + unsigned index = src->Index; + unsigned file = src->File; + if (file == TGSI_FILE_OUTPUT) + c->output_regs[index].used_in_src = TRUE; + } + } + break; + default: + /* nothing */ + break; + } + } + tgsi_parse_free(&parse); + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: { + struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + process_declaration(decl, &prog_info); + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: { + struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; + /*assert(imm->Immediate.Size == 4);*/ + c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float; + c->prog_data.num_imm++; + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: { + struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + if (!allocated_registers) { + /* first instruction (declerations finished). + * now that we know what vars are being used allocate + * registers for them.*/ + c->prog_data.num_consts = prog_info.num_consts; + c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm; + brw_vs_alloc_regs(c, &prog_info); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + brw_set_access_mode(p, BRW_ALIGN_16); + allocated_registers = 1; + } + process_instruction(c, inst, &prog_info); + } + break; + } + } + + end_inst = &p->store[p->nr_insn]; + emit_vertex_write(c, &prog_info); + post_vs_emit(c, end_inst); + tgsi_parse_free(&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c new file mode 100644 index 00000000000..c73469929ce --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_state.c @@ -0,0 +1,102 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "pipe/p_util.h" + +static void upload_vs_unit( struct brw_context *brw ) +{ + struct brw_vs_unit_state vs; + + memset(&vs, 0, sizeof(vs)); + + /* CACHE_NEW_VS_PROG */ + vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; + vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; + vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; + vs.thread3.dispatch_grf_start_reg = 1; + + + /* BRW_NEW_URB_FENCE */ + vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; + vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + vs.thread4.max_threads = MIN2( + MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), + 15); + + + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; + + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ + if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + } + else { + vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + } + + vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + vs.thread3.urb_entry_read_offset = 0; + + /* No samplers for ARB_vp programs: + */ + vs.vs5.sampler_count = 0; + + if (BRW_DEBUG & DEBUG_STATS) + vs.thread4.stats_enable = 1; + + /* Vertex program always enabled: + */ + vs.vs6.vs_enable = 1; + + brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); +} + + +const struct brw_tracked_state brw_vs_unit = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_vs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h new file mode 100644 index 00000000000..3523a58614f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_winsys.h @@ -0,0 +1,205 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i965simple requires any window system + * hosting it to implement. This is the only include file in i965simple + * which is public. + * + */ + +#ifndef BRW_WINSYS_H +#define BRW_WINSYS_H + + +#include "pipe/p_defines.h" + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct pipe_buffer; +struct pipe_fence_handle; +struct pipe_winsys; + +/* The pipe driver currently understands the following chipsets: + */ +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + + +/* These are the names of all the state caches managed by the driver. + * + * When data is uploaded to a buffer with buffer_subdata, we use the + * special version of that function below so that information about + * what type of data this is can be passed to the winsys backend. + * That in turn allows the correct flags to be set in the aub file + * dump to allow human-readable file dumps later on. + */ + +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, + + BRW_MAX_CACHE +}; + +#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE + +/** + * Additional winsys interface for i965simple. + * + * It is an over-simple batchbuffer mechanism. Will want to improve the + * performance of this, perhaps based on the cmdstream stuff. It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct brw_winsys { + + /** + * Reserve space on batch buffer. + * + * Returns a null pointer if there is insufficient space in the batch buffer + * to hold the requested number of dwords and relocations. + * + * The number of dwords should also include the number of relocations. + */ + unsigned *(*batch_start)(struct brw_winsys *sws, + unsigned dwords, + unsigned relocs); + + void (*batch_dword)(struct brw_winsys *sws, + unsigned dword); + + /** + * Emit a relocation to a buffer. + * + * Used not only when the buffer addresses are not pinned, but also to + * ensure refered buffers will not be destroyed until the current batch + * buffer execution is finished. + * + * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and + * I915_BUFFER_ACCESS_READ macros. + */ + void (*batch_reloc)(struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta); + + + /* Not used yet, but really want this: + */ + void (*batch_end)( struct brw_winsys *sws ); + + /** + * Flush the batch buffer. + * + * Fence argument must point to NULL or to a previous fence, and the caller + * must call fence_reference when done with the fence. + */ + void (*batch_flush)(struct brw_winsys *sws, + struct pipe_fence_handle **fence); + + + /* A version of buffer_subdata that includes information for the + * simulator: + */ + void (*buffer_subdata_typed)(struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type); + + + /* A cheat so we don't have to think about relocations in a couple + * of places yet: + */ + unsigned (*get_buffer_offset)( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned flags ); + +}; + +#define BRW_BUFFER_ACCESS_WRITE 0x1 +#define BRW_BUFFER_ACCESS_READ 0x2 + +#define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *brw_create(struct pipe_winsys *, + struct brw_winsys *, + unsigned pci_id); + +static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys, + const void *data, + unsigned bytes) +{ + static const unsigned incr = sizeof(unsigned); + uint i; + const unsigned *udata = (const unsigned*)(data); + unsigned size = bytes/incr; + + winsys->batch_start(winsys, size, 0); + for (i = 0; i < size; ++i) { + winsys->batch_dword(winsys, udata[i]); + } + winsys->batch_end(winsys); + + return (i == size); +} +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c new file mode 100644 index 00000000000..539b1707444 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -0,0 +1,210 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "brw_eu.h" +#include "brw_state.h" +#include "pipe/p_util.h" + + + +static void do_wm_prog( struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); + const unsigned *program; + unsigned program_size; + + c->key = *key; + c->fp = fp; + + c->delta_xy[0] = brw_null_reg(); + c->delta_xy[1] = brw_null_reg(); + c->pixel_xy[0] = brw_null_reg(); + c->pixel_xy[1] = brw_null_reg(); + c->pixel_w = brw_null_reg(); + + + debug_printf("XXXXXXXX FP\n"); + + brw_wm_glsl_emit(c); + + /* get the program + */ + program = brw_get_program(&c->func, &program_size); + + /* + */ + brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], + &c->key, + sizeof(c->key), + program, + program_size, + &c->prog_data, + &brw->wm.prog_data ); + + FREE(c); +} + + + +static void brw_wm_populate_key( struct brw_context *brw, + struct brw_wm_prog_key *key ) +{ + /* BRW_NEW_FRAGMENT_PROGRAM */ + struct brw_fragment_program *fp = + (struct brw_fragment_program *)brw->attribs.FragmentProgram; + unsigned lookup = 0; + unsigned line_aa; + + memset(key, 0, sizeof(*key)); + + /* Build the index for table lookup + */ + /* BRW_NEW_DEPTH_STENCIL */ + if (fp->UsesKill || + brw->attribs.DepthStencil->alpha.enabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->ComputesDepth) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + if (brw->attribs.DepthStencil->depth.enabled) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (brw->attribs.DepthStencil->depth.enabled && + brw->attribs.DepthStencil->depth.writemask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + if (brw->attribs.DepthStencil->stencil[0].enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (brw->attribs.DepthStencil->stencil[0].write_mask || + brw->attribs.DepthStencil->stencil[1].write_mask) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + + /* XXX: when should this be disabled? + */ + if (1) + lookup |= IZ_EARLY_DEPTH_TEST_BIT; + + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (brw->attribs.Raster->line_smooth) { + if (brw->reduced_primitive == PIPE_PRIM_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { + if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE || + (brw->attribs.Raster->cull_mode == PIPE_WINDING_CW)) + line_aa = AA_ALWAYS; + } + else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW) + line_aa = AA_ALWAYS; + } + } + } + + brw_wm_lookup_iz(line_aa, + lookup, + key); + + +#if 0 + /* BRW_NEW_SAMPLER + * + * Not doing any of this at the moment: + */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct pipe_sampler_state *unit = brw->attribs.Samplers[i]; + + if (unit) { + + if (unit->compare && + unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + key->shadowtex_mask |= 1<<i; + } + if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) + key->yuvtex_mask |= 1<<i; + } + } +#endif + + + /* Extra info: + */ + key->program_string_id = fp->id; + +} + + +static void brw_upload_wm_prog( struct brw_context *brw ) +{ + struct brw_wm_prog_key key; + struct brw_fragment_program *fp = (struct brw_fragment_program *) + brw->attribs.FragmentProgram; + + brw_wm_populate_key(brw, &key); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_WM_PROG], + &key, sizeof(key), + &brw->wm.prog_data, + &brw->wm.prog_gs_offset)) + return; + + do_wm_prog(brw, fp, &key); +} + + +const struct brw_tracked_state brw_wm_prog = { + .dirty = { + .brw = (BRW_NEW_FS | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = 0 + }, + .update = brw_upload_wm_prog +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h new file mode 100644 index 00000000000..a1ac0f504a6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.h @@ -0,0 +1,142 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_WM_H +#define BRW_WM_H + + +#include "brw_context.h" +#include "brw_eu.h" + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution. These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT 0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 +#define IZ_EARLY_DEPTH_TEST_BIT 0x40 +#define IZ_BIT_MAX 0x80 + +#define AA_NEVER 0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS 2 + +struct brw_wm_prog_key { + unsigned source_depth_reg:3; + unsigned aa_dest_stencil_reg:3; + unsigned dest_depth_reg:3; + unsigned nr_depth_regs:3; + unsigned shadowtex_mask:8; + unsigned computes_depth:1; /* could be derived from program string */ + unsigned source_depth_to_render_target:1; + unsigned runtime_check_aads_emit:1; + + unsigned yuvtex_mask:8; + + unsigned program_string_id; +}; + + + + + +#define PROGRAM_INTERNAL_PARAM +#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_ATTRIB_MAX + 3) +#define BRW_WM_MAX_GRF 128 /* hardware limit */ +#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS + +#define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX) + +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + +struct brw_wm_compile { + struct brw_compile func; + struct brw_wm_prog_key key; + struct brw_wm_prog_data prog_data; /* result */ + + struct brw_fragment_program *fp; + + unsigned grf_limit; + unsigned max_wm_grf; + + + struct brw_reg pixel_xy[2]; + struct brw_reg delta_xy[2]; + struct brw_reg pixel_w; + + + struct brw_reg wm_regs[8][32][4]; + + struct brw_reg payload_depth[4]; + struct brw_reg payload_coef[16]; + + struct brw_reg emit_mask_reg; + + struct brw_instruction *if_inst[MAX_IFSN]; + int if_insn; + + struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + int loop_insn; + + struct brw_instruction *inst0; + struct brw_instruction *inst1; + + struct brw_reg stack; + struct brw_indirect stack_index; + + unsigned reg_index; + + unsigned tmp_start; + unsigned tmp_index; +}; + + + +void brw_wm_lookup_iz( unsigned line_aa, + unsigned lookup, + struct brw_wm_prog_key *key ); + +void brw_wm_glsl_emit(struct brw_wm_compile *c); +void brw_wm_emit_decls(struct brw_wm_compile *c); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c new file mode 100644 index 00000000000..b45a333a2e9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -0,0 +1,383 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + + +static int is_null( struct brw_reg reg ) +{ + return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.nr == BRW_ARF_NULL); +} + +static void emit_pixel_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_xy[0])) { + + struct brw_compile *p = &c->func; + struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + + c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + brw_ADD(p, + c->pixel_xy[0], + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + + brw_ADD(p, + c->pixel_xy[1], + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } +} + + + + + + +static void emit_delta_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->delta_xy[0])) { + struct brw_compile *p = &c->func; + struct brw_reg r1 = brw_vec1_grf(1, 0); + + emit_pixel_xy(c); + + c->delta_xy[0] = alloc_tmp(c); + c->delta_xy[1] = alloc_tmp(c); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + brw_ADD(p, + c->delta_xy[0], + retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + + brw_ADD(p, + c->delta_xy[1], + retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + } +} + + + +#if 0 +static void emit_pixel_w( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_w)) { + struct brw_compile *p = &c->func; + + struct brw_reg interp_wpos = c->coef_wpos; + + c->pixel_w = alloc_tmp(c); + + emit_delta_xy(c); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); + brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); + + /* Calc w */ + brw_math_16( p, + c->pixel_w, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} +#endif + + +static void emit_cinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_linterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + emit_delta_xy(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + } + } +} + +#if 0 +static void emit_pinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + get_delta_xy(c); + get_pixel_w(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i); + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + brw_MUL(p, dst, dst, c->pixel_w); + } + } +} +#endif + + + +#if 0 +static void emit_wpos( ) +{ + struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); + struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + struct tgsi_full_src_register deltas = get_delta_xy(c); + struct tgsi_full_src_register arg2; + unsigned opcode; + + opcode = WM_LINTERP; + arg2 = src_undef(); + + /* Have to treat wpos.xy specially: + */ + emit_op(c, + WM_WPOSXY, + dst_mask(dst, WRITEMASK_XY), + 0, 0, 0, + get_pixel_xy(c), + src_undef(), + src_undef()); + + dst = dst_mask(dst, WRITEMASK_ZW); + + /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op(c, + WM_LINTERP, + dst, + 0, 0, 0, + interp, + deltas, + arg2); +} +#endif + + + + +/* Perform register allocation: + * + * -- r0??? + * -- passthrough depth regs (and stencil/aa??) + * -- curbe ?? + * -- inputs (coefficients) + * + * Use a totally static register allocation. This will perform poorly + * but is an easy way to get started (again). + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + int nr_curbe_regs = 0; + + /* R0, then some depth related regs: + */ + for (i = 0; i < c->key.nr_depth_regs; i++) { + c->payload_depth[i] = brw_vec8_grf(i*2, 0); + c->reg_index += 2; + } + + + /* Then a copy of our part of the CURBE entry: + */ + { + int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT]; + int index = 0; + + c->prog_data.max_const = 4*nr_constants; + for (i = 0; i < nr_constants; i++) { + for (j = 0; j < 4; j++, index++) + c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, + index%8); + } + + nr_curbe_regs = 2*((4*nr_constants+15)/16); + c->reg_index += nr_curbe_regs; + } + + /* Adjust for parameter coefficients for position, which are + * currently always provided. + */ +// c->position_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + + /* Next we receive the plane coefficients for parameter + * interpolation: + */ + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) { + c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + } + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = (c->fp->program.num_inputs + 1) * 2; + c->prog_data.curb_read_length = nr_curbe_regs; + + /* That's the end of the payload, now we can start allocating registers. + */ + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; + + /* Now allocate room for the interpolated inputs and staging + * registers for the outputs: + */ + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + /* Beyond this we should only need registers for internal temporaries: + */ + c->tmp_start = c->reg_index; +} + + + + + +/* Need to interpolate fragment program inputs in as a preamble to the + * shader. A more sophisticated compiler would do this on demand, but + * we'll do it up front: + */ +void brw_wm_emit_decls(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + int done = 0; + + prealloc_reg(c); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned first = decl->u.DeclarationRange.First; + unsigned last = decl->u.DeclarationRange.Last; + unsigned mask = decl->Declaration.UsageMask; /* ? */ + unsigned i; + + if (decl->Declaration.File != TGSI_FILE_INPUT) + break; + + assert(decl->Declaration.Interpolate); + + for( i = first; i <= last; i++ ) { + switch (decl->Interpolation.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + emit_cinterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_linterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + //emit_pinterp(c, i, mask); + emit_linterp(c, i, mask); + break; + } + } + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + + release_tmps(c); +} diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c new file mode 100644 index 00000000000..d95645d1085 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -0,0 +1,1079 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" + + + +static int get_scalar_dst_index(struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister; + int i; + for (i = 0; i < 4; i++) + if (dst.WriteMask & (1<<i)) + break; + return i; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component ) +{ + switch (file) { + case TGSI_FILE_NULL: + return brw_null_reg(); + + case TGSI_FILE_SAMPLER: + /* Should never get here: + */ + assert (0); + return brw_null_reg(); + + case TGSI_FILE_IMMEDIATE: + /* These need a different path: + */ + assert(0); + return brw_null_reg(); + + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + return c->wm_regs[file][index][component]; + + default: + assert(0); + return brw_null_reg(); + } +} + + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + int component) +{ + return get_reg(c, + inst->FullDstRegisters[0].DstRegister.File, + inst->FullDstRegisters[0].DstRegister.Index, + component); +} + +static int get_swz( struct tgsi_src_register src, int index ) +{ + switch (index & 3) { + case 0: return src.SwizzleX; + case 1: return src.SwizzleY; + case 2: return src.SwizzleZ; + case 3: return src.SwizzleW; + default: return 0; + } +} + +static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) +{ + switch (index & 3) { + case 0: return src.ExtSwizzleX; + case 1: return src.ExtSwizzleY; + case 2: return src.ExtSwizzleZ; + case 3: return src.ExtSwizzleW; + default: return 0; + } +} + +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct tgsi_full_src_register *src, + int index) +{ + struct brw_reg reg; + int component = index; + int neg = 0; + int abs = 0; + + if (src->SrcRegister.Negate) + neg = 1; + + component = get_swz(src->SrcRegister, component); + + /* Yes, there are multiple negates: + */ + switch (component & 3) { + case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; + case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; + case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; + case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; + } + + /* And multiple swizzles, fun isn't it: + */ + component = get_ext_swz(src->SrcRegisterExtSwz, component); + + /* Can't handle this, don't know if we need to: + */ + assert(src->SrcRegisterExtSwz.ExtDivide == TGSI_EXTSWIZZLE_ONE); + + /* Not handling indirect lookups yet: + */ + assert(src->SrcRegister.Indirect == 0); + + /* Don't know what dimension means: + */ + assert(src->SrcRegister.Dimension == 0); + + /* Will never handle any of this stuff: + */ + assert(src->SrcRegisterExtMod.Complement == 0); + assert(src->SrcRegisterExtMod.Bias == 0); + assert(src->SrcRegisterExtMod.Scale2X == 0); + + if (src->SrcRegisterExtMod.Absolute) + abs = 1; + + /* Another negate! This is a post-absolute negate, which we + * can't do. Need to clean the crap out of tgsi somehow. + */ + assert(src->SrcRegisterExtMod.Negate == 0); + + switch( component ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + reg = get_reg(c, + src->SrcRegister.File, + src->SrcRegister.Index, + component ); + + if (neg) + reg = negate(reg); + + if (abs) + reg = brw_abs(reg); + + break; + + /* XXX: this won't really work in the general case, but we know + * that the extended swizzle is only allowed in the SWZ + * instruction (right??), in which case using an immediate + * directly will work. + */ + case TGSI_EXTSWIZZLE_ZERO: + reg = brw_imm_f(0); + break; + + case TGSI_EXTSWIZZLE_ONE: + if (neg && !abs) + reg = brw_imm_f(-1.0); + else + reg = brw_imm_f(1.0); + break; + + default: + assert(0); + break; + } + + + return reg; +} + +static void emit_abs( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, &inst->FullSrcRegisters[0], i); + brw_MOV(p, dst, brw_abs(src)); /* NOTE */ + } + } + brw_set_saturate(p, 0); +} + + +static void emit_xpd(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + for (i = 0; i < 4; i++) { + unsigned i2 = (i+2)%3; + unsigned i1 = (i+1)%3; + if (mask & (1<<i)) { + struct brw_reg src0, src1, dst; + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2)); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_math1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, unsigned func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + ((inst->Instruction.Saturate != TGSI_SAT_NONE) + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + + +static void emit_alu2(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_alu2(p, opcode, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + + +static void emit_alu1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + brw_alu1(p, opcode, dst, src0); + } + } + if (inst->Instruction.Saturate != TGSI_SAT_NONE) + brw_set_saturate(p, 0); +} + + +static void emit_max(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_min(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_pow(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->Instruction.Saturate != TGSI_SAT_NONE + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c); + } +} + +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, unsigned cond) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_pop_insn_state(p); +} + + +static void emit_ddx(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, interp[i]); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static void emit_ddy(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + nr = src0.nr; + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, suboffset(interp[i], 1)); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +/* TODO + BIAS on SIMD8 not workind yet... +*/ +static void emit_txb(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned i; + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } +#else + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +#endif + + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +#endif +} + +static void emit_tex(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned msg_len; + unsigned i, nr; + unsigned emit; + boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0; + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } +#else + emit = WRITEMASK_XY; + nr = 2; +#endif + + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const unsigned swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); + else + brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); + msg_len += 1; + } + + if (shadow) { + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), src[2]); + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +#endif +} + + + + + + + + +static void emit_fb_write(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + int base_reg = 0; + + // src0 = output color + // src1 = payload_depth[0] + // src2 = output depth + // dst = ??? + + + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; + + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* render surface always 0 */ + nr, + 0, + 1); + +} + + +static void brw_wm_emit_instruction( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst ) +{ + struct brw_compile *p = &c->func; + +#if 0 + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#endif + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + emit_abs(c, inst); + break; + case TGSI_OPCODE_ADD: + emit_alu2(c, inst, BRW_OPCODE_ADD); + break; + case TGSI_OPCODE_SUB: + assert(0); +// emit_alu2(c, inst, BRW_OPCODE_SUB); + break; + case TGSI_OPCODE_FRC: + emit_alu1(c, inst, BRW_OPCODE_FRC); + break; + case TGSI_OPCODE_FLR: + assert(0); +// emit_alu1(c, inst, BRW_OPCODE_FLR); + break; + case TGSI_OPCODE_LRP: + emit_lrp(c, inst); + break; + case TGSI_OPCODE_INT: + emit_alu1(c, inst, BRW_OPCODE_RNDD); + break; + case TGSI_OPCODE_MOV: + emit_alu1(c, inst, BRW_OPCODE_MOV); + break; + case TGSI_OPCODE_DP3: + emit_dp3(c, inst); + break; + case TGSI_OPCODE_DP4: + emit_dp4(c, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(c, inst); + break; + case TGSI_OPCODE_DPH: + emit_dph(c, inst); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); + break; + case TGSI_OPCODE_SIN: + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); + break; + case TGSI_OPCODE_COS: + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); + break; + case TGSI_OPCODE_MAX: + emit_max(c, inst); + break; + case TGSI_OPCODE_MIN: + emit_min(c, inst); + break; + case TGSI_OPCODE_DDX: + emit_ddx(c, inst); + break; + case TGSI_OPCODE_DDY: + emit_ddy(c, inst); + break; + case TGSI_OPCODE_SLT: + emit_sop(c, inst, BRW_CONDITIONAL_L); + break; + case TGSI_OPCODE_SLE: + emit_sop(c, inst, BRW_CONDITIONAL_LE); + break; + case TGSI_OPCODE_SGT: + emit_sop(c, inst, BRW_CONDITIONAL_G); + break; + case TGSI_OPCODE_SGE: + emit_sop(c, inst, BRW_CONDITIONAL_GE); + break; + case TGSI_OPCODE_SEQ: + emit_sop(c, inst, BRW_CONDITIONAL_EQ); + break; + case TGSI_OPCODE_SNE: + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); + break; + case TGSI_OPCODE_MUL: + emit_alu2(c, inst, BRW_OPCODE_MUL); + break; + case TGSI_OPCODE_POW: + emit_pow(c, inst); + break; + case TGSI_OPCODE_MAD: + emit_mad(c, inst); + break; + case TGSI_OPCODE_TEX: + emit_tex(c, inst); + break; + case TGSI_OPCODE_TXB: + emit_txb(c, inst); + break; + case TGSI_OPCODE_TEXKILL: + emit_kil(c); + break; + case TGSI_OPCODE_IF: + assert(c->if_insn < MAX_IFSN); + c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(c->if_insn > 0); + brw_ENDIF(p, c->if_inst[--c->if_insn]); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + case TGSI_OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); +// orig_inst = inst->Data; +// orig_inst->Data = &p->store[p->nr_insn]; + assert(0); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case TGSI_OPCODE_RET: +#if 0 + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); +#else + emit_fb_write(c, inst); +#endif + + break; + case TGSI_OPCODE_LOOP: + c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_ENDLOOP: + c->loop_insn--; + c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (c->inst0 > c->loop_inst[c->loop_insn]) { + c->inst0--; + if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; + c->inst0->bits3.if_else.pop_count = 0; + } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; + c->inst0->bits3.if_else.pop_count = 0; + } + } + break; + case TGSI_OPCODE_END: + emit_fb_write(c, inst); + break; + + default: + debug_printf("unsupported IR in fragment shader %d\n", + inst->Instruction.Opcode); + } +#if 0 + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif +} + + + + + + +void brw_wm_glsl_emit(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + struct brw_compile *p = &c->func; + + brw_init_compile(&c->func); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + c->reg_index = 0; + c->if_insn = 0; + c->loop_insn = 0; + c->stack_index = brw_indirect(0,0); + + /* Do static register allocation and parameter interpolation: + */ + brw_wm_emit_decls( c ); + + /* Emit the actual program. All done with very direct translation, + * hopefully we can improve on this shortly... + */ + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* already done */ + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* not handled yet */ + assert(0); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free (&parse); + + /* Fix up call targets: + */ +#if 0 + { + unsigned nr_insns = c->fp->program.Base.NumInstructions; + unsigned insn, target_insn; + struct tgsi_full_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } + } +#endif + + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/src/gallium/drivers/i965simple/brw_wm_iz.c b/src/gallium/drivers/i965simple/brw_wm_iz.c new file mode 100644 index 00000000000..6c5f25bf39e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_iz.c @@ -0,0 +1,214 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_wm.h" + + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +const struct { + unsigned mode:2; + unsigned sd_present:1; + unsigned sd_to_rt:1; + unsigned dd_present:1; + unsigned ds_present:1; +} wm_iz_table[IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 0, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 } +}; + +void brw_wm_lookup_iz( unsigned line_aa, + unsigned lookup, + struct brw_wm_prog_key *key ) +{ + unsigned reg = 2; + + assert (lookup < IZ_BIT_MAX); + + if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) + key->computes_depth = 1; + + if (wm_iz_table[lookup].sd_present) { + key->source_depth_reg = reg; + reg += 2; + } + + if (wm_iz_table[lookup].sd_to_rt) + key->source_depth_to_render_target = 1; + + if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { + key->aa_dest_stencil_reg = reg; + key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + line_aa == AA_SOMETIMES); + reg++; + } + + if (wm_iz_table[lookup].dd_present) { + key->dest_depth_reg = reg; + reg+=2; + } + + key->nr_depth_regs = (reg+1)/2; +} + diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c new file mode 100644 index 00000000000..de42ffc5b1e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -0,0 +1,273 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "pipe/p_util.h" + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +/* Samplers aren't strictly wm state from the hardware's perspective, + * but that is the only situation in which we use them in this driver. + */ + +static int intel_translate_shadow_compare_func(unsigned func) +{ + switch(func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_ALWAYS; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LESS; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_NEVER; + } + + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_NEVER; +} + +/* The brw (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned translate_wrap_mode( int wrap ) +{ + switch( wrap ) { + case PIPE_TEX_WRAP_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return BRW_TEXCOORDMODE_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return BRW_TEXCOORDMODE_MIRROR; + default: + return BRW_TEXCOORDMODE_WRAP; + } +} + + +static unsigned U_FIXED(float value, unsigned frac_bits) +{ + value *= (1<<frac_bits); + return value < 0 ? 0 : value; +} + +static int S_FIXED(float value, unsigned frac_bits) +{ + return value * (1<<frac_bits); +} + + +static unsigned upload_default_color( struct brw_context *brw, + const float *color ) +{ + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); +} + + +/* + */ +static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler, + unsigned sdc_gs_offset, + struct brw_sampler_state *sampler) +{ + memset(sampler, 0, sizeof(*sampler)); + + switch (pipe_sampler->min_mip_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + + switch (pipe_sampler->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + default: + break; + } + /* Set Anisotropy: + */ + if (pipe_sampler->max_anisotropy > 1.0) { + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; + + if (pipe_sampler->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, + BRW_ANISORATIO_16); + } + } + else { + switch (pipe_sampler->mag_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + } + + sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); + sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r); + sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t); + + /* Fulsim complains if I don't do this. Hardware doesn't mind: + */ +#if 0 + if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } +#endif + + /* Set shadow function: + */ + if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func); + } + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6); + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set BaseMipLevel, MaxLOD, MinLOD: + * + * XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + sampler->ss0.base_level = U_FIXED(0, 1); + + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6); + + sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; +} + + + +/* All samplers must be uploaded in a single contiguous array, which + * complicates various things. However, this is still too confusing - + * FIXME: simplify all the different new texture state flags. + */ +static void upload_wm_samplers(struct brw_context *brw) +{ + unsigned unit; + unsigned sampler_count = 0; + + /* BRW_NEW_SAMPLER */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + /* determine unit enable/disable by looking for a bound texture */ + if (brw->attribs.Texture[unit]) { + const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; + unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color); + + brw_update_sampler_state(sampler, + sdc_gs_offset, + &brw->wm.sampler[unit]); + + sampler_count = unit + 1; + } + } + + if (brw->wm.sampler_count != sampler_count) { + brw->wm.sampler_count = sampler_count; + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + } + + brw->wm.sampler_gs_offset = 0; + + if (brw->wm.sampler_count) + brw->wm.sampler_gs_offset = + brw_cache_data_sz(&brw->cache[BRW_SAMPLER], + brw->wm.sampler, + sizeof(struct brw_sampler_state) * brw->wm.sampler_count); +} + +const struct brw_tracked_state brw_wm_samplers = { + .dirty = { + .brw = BRW_NEW_SAMPLER, + .cache = 0 + }, + .update = upload_wm_samplers +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c new file mode 100644 index 00000000000..5ccd4888423 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -0,0 +1,194 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_wm.h" +#include "pipe/p_util.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ +static void upload_wm_unit(struct brw_context *brw ) +{ + struct brw_wm_unit_state wm; + unsigned max_threads; + unsigned per_thread; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + max_threads = 0; + else + max_threads = 31; + + + memset(&wm, 0, sizeof(wm)); + + /* CACHE_NEW_WM_PROG */ + wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; + wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; + wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; + + wm.wm5.max_threads = max_threads; + + per_thread = align(brw->wm.prog_data->total_scratch, 1024); + assert(per_thread <= 12 * 1024); + +#if 0 + if (brw->wm.prog_data->total_scratch) { + unsigned total = per_thread * (max_threads + 1); + + /* Scratch space -- just have to make sure there is sufficient + * allocated for the active program and current number of threads. + */ + brw->wm.scratch_buffer_size = total; + if (brw->wm.scratch_buffer && + brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { + dri_bo_unreference(brw->wm.scratch_buffer); + brw->wm.scratch_buffer = NULL; + } + if (!brw->wm.scratch_buffer) { + brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr, + "wm scratch", + brw->wm.scratch_buffer_size, + 4096, DRM_BO_FLAG_MEM_TT); + } + } + /* XXX: Scratch buffers are not implemented correectly. + * + * The scratch offset to be programmed into wm is relative to the general + * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or + * the previous bmGenBuffers scheme), we get an offset relative to the + * start of framebuffer. Even before then, it was broken in other ways, + * so just fail for now if we hit that path. + */ + assert(brw->wm.prog_data->total_scratch == 0); +#endif + + /* CACHE_NEW_SURFACE */ + wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + + /* BRW_NEW_CURBE_OFFSETS */ + wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + + wm.thread3.urb_entry_read_offset = 0; + wm.thread1.depth_coef_urb_read_offset = 1; + wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + /* CACHE_NEW_SAMPLER */ + wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; + wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + { + const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; + + if (fp->UsesDepth) + wm.wm5.program_uses_depth = 1; /* as far as we can tell */ + + if (fp->ComputesDepth) + wm.wm5.program_computes_depth = 1; + + /* BRW_NEW_ALPHA_TEST */ + if (fp->UsesKill || + brw->attribs.DepthStencil->alpha.enabled) + wm.wm5.program_uses_killpixel = 1; + + wm.wm5.enable_8_pix = 1; + } + + wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ + wm.wm5.legacy_line_rast = 0; + wm.wm5.legacy_global_depth_bias = 0; + wm.wm5.early_depth_test = 1; /* never need to disable */ + wm.wm5.line_aa_region_width = 0; + wm.wm5.line_endcap_aa_region_width = 1; + + /* BRW_NEW_RASTERIZER */ + if (brw->attribs.Raster->poly_stipple_enable) + wm.wm5.polygon_stipple = 1; + +#if 0 + if (brw->attribs.Polygon->OffsetFill) { + wm.wm5.depth_offset = 1; + /* Something wierd going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + + /* This is the only value that passes glean: + */ + wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; + } +#endif + + if (brw->attribs.Raster->line_stipple_enable) { + wm.wm5.line_stipple = 1; + } + + if (BRW_DEBUG & DEBUG_STATS) + wm.wm4.stats_enable = 1; + + brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + + if (brw->wm.prog_data->total_scratch) { + /* + dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + (per_thread / 1024) - 1, + brw->wm.state_gs_offset + + ((char *)&wm.thread2 - (char *)&wm), + brw->wm.scratch_buffer); + */ + } else { + wm.thread2.scratch_space_base_pointer = 0; + } +} + +const struct brw_tracked_state brw_wm_unit = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_ALPHA_TEST | + BRW_NEW_FS | + BRW_NEW_CURBE_OFFSETS), + + .cache = (CACHE_NEW_SURFACE | + CACHE_NEW_WM_PROG | + CACHE_NEW_SAMPLER) + }, + .update = upload_wm_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c new file mode 100644 index 00000000000..d16d919bce9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -0,0 +1,304 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static unsigned translate_tex_target( enum pipe_texture_target target ) +{ + switch (target) { + case PIPE_TEXTURE_1D: + return BRW_SURFACE_1D; + + case PIPE_TEXTURE_2D: + return BRW_SURFACE_2D; + + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + + case PIPE_TEXTURE_CUBE: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return 0; + } +} + +static unsigned translate_tex_format( enum pipe_format pipe_format ) +{ + switch( pipe_format ) { + case PIPE_FORMAT_U_L8: + return BRW_SURFACEFORMAT_L8_UNORM; + + case PIPE_FORMAT_U_I8: + return BRW_SURFACEFORMAT_I8_UNORM; + + case PIPE_FORMAT_U_A8: + return BRW_SURFACEFORMAT_A8_UNORM; + + case PIPE_FORMAT_U_A8_L8: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case PIPE_FORMAT_R8G8B8_UNORM: + assert(0); /* not supported for sampling */ + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R5G6B5_UNORM: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + + case PIPE_FORMAT_A1R5G5B5_UNORM: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + + case PIPE_FORMAT_A4R4G4B4_UNORM: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + + case PIPE_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case PIPE_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return BRW_SURFACEFORMAT_FXT1; +#endif + + case PIPE_FORMAT_Z16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; +#if 0 + case PIPE_FORMAT_RGB_DXT1: + return BRW_SURFACEFORMAT_DXT1_RGB; + + case PIPE_FORMAT_RGBA_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case PIPE_FORMAT_RGBA_DXT3: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case PIPE_FORMAT_RGBA_DXT5: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case PIPE_FORMAT_SRGBA8: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case PIPE_FORMAT_SRGB_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; +#endif + + default: + assert(0); + return 0; + } +} + +static unsigned brw_buffer_offset(struct brw_context *brw, + struct pipe_buffer *buffer) +{ + return brw->winsys->get_buffer_offset(brw->winsys, + buffer, + 0); +} + +static +void brw_update_texture_surface( struct brw_context *brw, + unsigned unit ) +{ + const struct brw_texture *tObj = brw->attribs.Texture[unit]; + struct brw_surface_state surf; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = translate_tex_target(tObj->base.target); + surf.ss0.surface_format = translate_tex_format(tObj->base.format); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + /* Updated in emit_reloc */ + surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); + + surf.ss2.mip_count = tObj->base.last_level; + surf.ss2.width = tObj->base.width[0] - 1; + surf.ss2.height = tObj->base.height[0] - 1; + + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = 0; /* always zero */ + surf.ss3.pitch = tObj->pitch - 1; + surf.ss3.depth = tObj->base.depth[0] - 1; + + surf.ss4.min_lod = 0; + + if (tObj->base.target == PIPE_TEXTURE_CUBE) { + surf.ss0.cube_pos_x = 1; + surf.ss0.cube_pos_y = 1; + surf.ss0.cube_pos_z = 1; + surf.ss0.cube_neg_x = 1; + surf.ss0.cube_neg_y = 1; + surf.ss0.cube_neg_z = 1; + } + + brw->wm.bind.surf_ss_offset[unit + 1] = + brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); +} + + + +#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) ) + + +static void upload_wm_surfaces(struct brw_context *brw ) +{ + unsigned i; + + { + struct brw_surface_state surf; + + /* BRW_NEW_FRAMEBUFFER + */ + struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ + + memset(&surf, 0, sizeof(surf)); + + if (pipe_surface != NULL) { + if (pipe_surface->cpp == 4) + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + else + surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + + surf.ss0.surface_type = BRW_SURFACE_2D; + + surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer ); + + surf.ss2.width = pipe_surface->width - 1; + surf.ss2.height = pipe_surface->height - 1; + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = 0; + surf.ss3.pitch = (pipe_surface->pitch * pipe_surface->cpp) - 1; + } else { + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + surf.ss0.surface_type = BRW_SURFACE_NULL; + } + + /* BRW_NEW_BLEND */ + surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable && + brw->attribs.Blend->blend_enable); + + + surf.ss0.writedisable_red = !(brw->attribs.Blend->colormask & PIPE_MASK_R); + surf.ss0.writedisable_green = !(brw->attribs.Blend->colormask & PIPE_MASK_G); + surf.ss0.writedisable_blue = !(brw->attribs.Blend->colormask & PIPE_MASK_B); + surf.ss0.writedisable_alpha = !(brw->attribs.Blend->colormask & PIPE_MASK_A); + + + + + brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + + brw->wm.nr_surfaces = 1; + } + + + /* BRW_NEW_TEXTURE + */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct brw_texture *texUnit = brw->attribs.Texture[i]; + + if (texUnit && + texUnit->base.refcount/*(texUnit->refcount > 0) == really used */) { + + brw_update_texture_surface(brw, i); + + brw->wm.nr_surfaces = i+2; + } + else { + brw->wm.bind.surf_ss_offset[i+1] = 0; + } + } + + brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], + &brw->wm.bind ); +} + + +/* KW: Will find a different way to acheive this, see for example the + * state caches with relocs in the i915 swz driver. + */ +#if 0 +static void emit_reloc_wm_surfaces(struct brw_context *brw) +{ + int unit; + + if (brw->state.draw_region != NULL) { + /* Emit framebuffer relocation */ + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + 0, + brw->wm.bind.surf_ss_offset[0] + + offsetof(struct brw_surface_state, ss1), + brw->state.draw_region->buffer); + } + + /* Emit relocations for texture buffers */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + + if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + brw->wm.bind.surf_ss_offset[unit + 1] + + offsetof(struct brw_surface_state, ss1), + intelObj->mt->region->buffer); + } + } +} +#endif + +const struct brw_tracked_state brw_wm_surfaces = { + .dirty = { + .brw = (BRW_NEW_FRAMEBUFFER | + BRW_NEW_BLEND | + BRW_NEW_TEXTURE), + .cache = 0 + }, + .update = upload_wm_surfaces, +}; diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile new file mode 100644 index 00000000000..31438a882e6 --- /dev/null +++ b/src/gallium/drivers/softpipe/Makefile @@ -0,0 +1,50 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = softpipe + +DRIVER_SOURCES = \ + sp_clear.c \ + sp_flush.c \ + sp_query.c \ + sp_context.c \ + sp_draw_arrays.c \ + sp_prim_setup.c \ + sp_prim_vbuf.c \ + sp_quad.c \ + sp_quad_alpha_test.c \ + sp_quad_blend.c \ + sp_quad_bufloop.c \ + sp_quad_colormask.c \ + sp_quad_coverage.c \ + sp_quad_depth_test.c \ + sp_quad_earlyz.c \ + sp_quad_fs.c \ + sp_quad_occlusion.c \ + sp_quad_output.c \ + sp_quad_stencil.c \ + sp_quad_stipple.c \ + sp_state_blend.c \ + sp_state_clip.c \ + sp_state_derived.c \ + sp_state_fs.c \ + sp_state_sampler.c \ + sp_state_rasterizer.c \ + sp_state_surface.c \ + sp_state_vertex.c \ + sp_texture.c \ + sp_tex_sample.c \ + sp_tile_cache.c \ + sp_surface.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript new file mode 100644 index 00000000000..d581ee8d3ca --- /dev/null +++ b/src/gallium/drivers/softpipe/SConscript @@ -0,0 +1,42 @@ +Import('*') + +env = env.Clone() + +softpipe = env.ConvenienceLibrary( + target = 'softpipe', + source = [ + 'sp_clear.c', + 'sp_context.c', + 'sp_draw_arrays.c', + 'sp_flush.c', + 'sp_prim_setup.c', + 'sp_prim_vbuf.c', + 'sp_quad_alpha_test.c', + 'sp_quad_blend.c', + 'sp_quad_bufloop.c', + 'sp_quad.c', + 'sp_quad_colormask.c', + 'sp_quad_coverage.c', + 'sp_quad_depth_test.c', + 'sp_quad_earlyz.c', + 'sp_quad_fs.c', + 'sp_quad_occlusion.c', + 'sp_quad_output.c', + 'sp_quad_stencil.c', + 'sp_quad_stipple.c', + 'sp_query.c', + 'sp_state_blend.c', + 'sp_state_clip.c', + 'sp_state_derived.c', + 'sp_state_fs.c', + 'sp_state_rasterizer.c', + 'sp_state_sampler.c', + 'sp_state_surface.c', + 'sp_state_vertex.c', + 'sp_surface.c', + 'sp_tex_sample.c', + 'sp_texture.c', + 'sp_tile_cache.c', + ]) + +Export('softpipe') \ No newline at end of file diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c new file mode 100644 index 00000000000..8d295a30ca6 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + +#if 0 + softpipe_update_derived(softpipe); /* not needed?? */ +#endif + + if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) { + sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue); +#if TILE_CLEAR_OPTIMIZATION + return; +#endif + } + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { + sp_tile_cache_clear(softpipe->cbuf_cache[i], clearValue); + } + } + +#if !TILE_CLEAR_OPTIMIZATION + /* non-cached surface */ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +#endif +} diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h new file mode 100644 index 00000000000..a8ed1c4ecc4 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + +#ifndef SP_CLEAR_H +#define SP_CLEAR_H + +#include "pipe/p_state.h" +struct pipe_context; + +extern void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +#endif /* SP_CLEAR_H */ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c new file mode 100644 index 00000000000..cea6b90104f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -0,0 +1,333 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_flush.h" +#include "sp_prim_setup.h" +#include "sp_prim_vbuf.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_query.h" + + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +softpipe_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ + switch (type) { + case PIPE_TEXTURE: + /* softpipe supports all texture formats */ + return TRUE; + case PIPE_SURFACE: + /* softpipe supports all (off-screen) surface formats */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +/** + * Map any drawing surfaces which aren't already mapped + */ +void +softpipe_map_surfaces(struct softpipe_context *sp) +{ + unsigned i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); + } + + sp_tile_cache_map_surfaces(sp->zsbuf_cache); +} + + +/** + * Unmap any mapped drawing surfaces + */ +void +softpipe_unmap_surfaces(struct softpipe_context *sp) +{ + uint i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); + } + sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); +} + + +static void softpipe_destroy( struct pipe_context *pipe ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct pipe_winsys *ws = pipe->winsys; + uint i; + + draw_destroy( softpipe->draw ); + + softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); + softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); + softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); + softpipe->quad.coverage->destroy( softpipe->quad.coverage ); + softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.colormask->destroy( softpipe->quad.colormask ); + softpipe->quad.output->destroy( softpipe->quad.output ); + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + sp_destroy_tile_cache(softpipe->zsbuf_cache); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + sp_destroy_tile_cache(softpipe->tex_cache[i]); + + for (i = 0; i < Elements(softpipe->constants); i++) { + if (softpipe->constants[i].buffer) { + pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + } + } + + FREE( softpipe ); +} + + +static const char *softpipe_get_name( struct pipe_context *pipe ) +{ + return "softpipe"; +} + +static const char *softpipe_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + +static int softpipe_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + +static float softpipe_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 0; + } +} + +struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, + struct softpipe_winsys *softpipe_winsys ) +{ + struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); + uint i; + +#if defined(__i386__) || defined(__386__) + softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; +#else + softpipe->use_sse = FALSE; +#endif + + softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; + + softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.destroy = softpipe_destroy; + + /* queries */ + softpipe->pipe.is_format_supported = softpipe_is_format_supported; + softpipe->pipe.get_name = softpipe_get_name; + softpipe->pipe.get_vendor = softpipe_get_vendor; + softpipe->pipe.get_param = softpipe_get_param; + softpipe->pipe.get_paramf = softpipe_get_paramf; + + /* state setters */ + softpipe->pipe.create_blend_state = softpipe_create_blend_state; + softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; + softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; + + softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; + softpipe->pipe.bind_sampler_state = softpipe_bind_sampler_state; + softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; + + softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; + softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; + softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; + + softpipe->pipe.create_fs_state = softpipe_create_fs_state; + softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; + softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; + + softpipe->pipe.create_vs_state = softpipe_create_vs_state; + softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; + softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + + softpipe->pipe.set_blend_color = softpipe_set_blend_color; + softpipe->pipe.set_clip_state = softpipe_set_clip_state; + softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; + softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; + softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; + softpipe->pipe.set_sampler_texture = softpipe_set_sampler_texture; + softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; + + softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; + softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; + + softpipe->pipe.draw_arrays = softpipe_draw_arrays; + softpipe->pipe.draw_elements = softpipe_draw_elements; + + softpipe->pipe.clear = softpipe_clear; + softpipe->pipe.flush = softpipe_flush; + + softpipe_init_query_funcs( softpipe ); + + /* textures */ + softpipe->pipe.texture_create = softpipe_texture_create; + softpipe->pipe.texture_release = softpipe_texture_release; + softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; + + /* + * Alloc caches for accessing drawing surfaces and textures. + * Must be before quad stage setup! + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + softpipe->cbuf_cache[i] = sp_create_tile_cache(); + softpipe->zsbuf_cache = sp_create_tile_cache(); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + softpipe->tex_cache[i] = sp_create_tile_cache(); + + + /* setup quad rendering stages */ + softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); + softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad.output = sp_quad_output_stage(softpipe); + + softpipe->winsys = softpipe_winsys; + + /* + * Create drawing context and plug our rendering stage into it. + */ + softpipe->draw = draw_create(); + assert(softpipe->draw); + softpipe->setup = sp_draw_render_stage(softpipe); + + if (GETENV( "SP_VBUF" ) != NULL) { + sp_init_vbuf(softpipe); + } + else { + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + } + + sp_init_surface_functions(softpipe); + + return &softpipe->pipe; +} diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h new file mode 100644 index 00000000000..aff8c2cc5dc --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_CONTEXT_H +#define SP_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +#include "pipe/draw/draw_vertex.h" + +#include "sp_quad.h" + + +struct softpipe_winsys; +struct softpipe_vbuf_render; +struct draw_context; +struct draw_stage; +struct softpipe_tile_cache; +struct sp_fragment_shader_state; +struct sp_vertex_shader_state; + + +struct softpipe_context { + struct pipe_context pipe; /**< base class */ + struct softpipe_winsys *winsys; /**< window system interface */ + + + /* The most recent drawing state as set by the driver: + */ + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct sp_fragment_shader_state *fs; + const struct sp_vertex_shader_state *vs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[2]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct softpipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + unsigned dirty; + + /* Counter for occlusion queries. Note this supports overlapping + * queries. + */ + uint64 occlusion_count; + + /* + * Mapped vertex buffers + */ + ubyte *mapped_vbuffer[PIPE_ATTRIB_MAX]; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + /** Vertex format */ + struct vertex_info vertex_info; + struct vertex_info vertex_info_vbuf; + + int psize_slot; + +#if 0 + /* Stipple derived state: + */ + ubyte stipple_masks[16][16]; +#endif + + /** Derived from scissor and surface bounds: */ + struct pipe_scissor_state cliprect; + + unsigned line_stipple_counter; + + /** Software quad rendering pipeline */ + struct { + struct quad_stage *polygon_stipple; + struct quad_stage *earlyz; + struct quad_stage *shade; + struct quad_stage *alpha_test; + struct quad_stage *stencil_test; + struct quad_stage *depth_test; + struct quad_stage *occlusion; + struct quad_stage *coverage; + struct quad_stage *bufloop; + struct quad_stage *blend; + struct quad_stage *colormask; + struct quad_stage *output; + + struct quad_stage *first; /**< points to one of the above stages */ + } quad; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *setup; + struct draw_stage *vbuf; + struct softpipe_vbuf_render *vbuf_render; + + uint current_cbuf; /**< current color buffer being written to */ + + struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; + struct softpipe_tile_cache *zsbuf_cache; + + struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + + int use_sse : 1; + int dump_fs : 1; +}; + + + + +static INLINE struct softpipe_context * +softpipe_context( struct pipe_context *pipe ) +{ + return (struct softpipe_context *)pipe; +} + + +#endif /* SP_CONTEXT_H */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c new file mode 100644 index 00000000000..71a303a8b58 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "sp_context.h" +#include "sp_state.h" + +#include "pipe/draw/draw_context.h" + + + +static void +softpipe_map_constant_buffers(struct softpipe_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX]); +} + +static void +softpipe_unmap_constant_buffers(struct softpipe_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + +boolean +softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ +boolean +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + /* first, check that the primitive is not malformed. It is the + * state tracker's responsibility to do send only correctly formed + * primitives down. It currently isn't doing that though... + */ +#if 1 + count = draw_trim_prim( mode, count ); +#else + if (!draw_validate_prim( mode, count )) + assert(0); +#endif + + + if (sp->dirty) + softpipe_update_derived( sp ); + + softpipe_map_surfaces(sp); + softpipe_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + } + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + } + + + /* Note: leave drawing surfaces mapped */ + softpipe_unmap_constant_buffers(sp); + + return TRUE; +} diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c new file mode 100644 index 00000000000..ced0d5d0983 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "pipe/draw/draw_context.h" +#include "sp_flush.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" +#include "sp_winsys.h" + + +/* There will be actual work to do here. In future we may want a + * fence-like interface instead of finish, and perhaps flush will take + * flags to indicate what type of flush is required. + */ +void +softpipe_flush( struct pipe_context *pipe, + unsigned flags ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + draw_flush(softpipe->draw); + + /* - flush the quad pipeline + * - flush the texture cache + * - flush the render cache + */ + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); + + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + + /* Need this call for hardware buffers before swapbuffers. + * + * there should probably be another/different flush-type function + * that's called before swapbuffers because we don't always want + * to unmap surfaces when flushing. + */ + softpipe_unmap_surfaces(softpipe); +} + diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h new file mode 100644 index 00000000000..34ec617866b --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_FLUSH_H +#define SP_FLUSH_H + +struct pipe_context; + +void softpipe_flush(struct pipe_context *pipe, unsigned flags ); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h new file mode 100644 index 00000000000..0ae31d87961 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_HEADERS_H +#define SP_HEADERS_H + +#include "pipe/tgsi/exec/tgsi_exec.h" + +#define PRIM_POINT 1 +#define PRIM_LINE 2 +#define PRIM_TRI 3 + + +/* The rasterizer generates 2x2 quads of fragment and feeds them to + * the current fp_machine (see below). + * Remember that Y=0=top with Y increasing down the window. + */ +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +/** + * Encodes everything we need to know about a 2x2 pixel block. Uses + * "Channel-Serial" or "SoA" layout. + */ +struct quad_header { + int x0; + int y0; + unsigned mask:4; + unsigned facing:1; /**< Front (0) or back (1) facing? */ + unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ + + struct { + float color[NUM_CHANNELS][QUAD_SIZE]; /* rrrr, gggg, bbbb, aaaa */ + float depth[QUAD_SIZE]; + } outputs; + + float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ + + const struct tgsi_interp_coef *coef; + const struct tgsi_interp_coef *posCoef; + + unsigned nr_attrs; +}; + + +#endif /* SP_HEADERS_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c new file mode 100644 index 00000000000..27720486615 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -0,0 +1,1247 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Primitive rasterization/rendering (points, lines, triangles) + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Brian Paul + */ + + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "pipe/draw/draw_private.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#define DEBUG_VERTS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct softpipe_context *softpipe; + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const struct vertex_header *vmax; + const struct vertex_header *vmid; + const struct vertex_header *vmin; + const struct vertex_header *vprovoke; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ + struct quad_header quad; + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +/** + * Clip setup->quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip(struct setup_stage *setup) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (setup->quad.x0 >= maxx || + setup->quad.y0 >= maxy || + setup->quad.x0 + 1 < minx || + setup->quad.y0 + 1 < miny) { + /* totally clipped */ + setup->quad.mask = 0x0; + return; + } + if (setup->quad.x0 < minx) + setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup->quad.y0 < miny) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup->quad.x0 == maxx - 1) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup->quad.y0 == maxy - 1) + setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} + + +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad(struct setup_stage *setup) +{ + quad_clip(setup); + if (setup->quad.mask) { + struct softpipe_context *sp = setup->softpipe; + sp->quad.first->run(sp->quad.first, &setup->quad); + } +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + */ +static INLINE void +emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) +{ + struct softpipe_context *sp = setup->softpipe; + setup->quad.x0 = x; + setup->quad.y0 = y; + setup->quad.mask = mask; + sp->quad.first->run(sp->quad.first, &setup->quad); +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * + * this is pretty nasty... may need to rework flush_spans again to + * fix it, if possible. + */ +static unsigned calculate_mask( struct setup_stage *setup, int x ) +{ + unsigned mask = 0x0; + + if (x >= setup->span.left[0] && x < setup->span.right[0]) + mask |= MASK_TOP_LEFT; + + if (x >= setup->span.left[1] && x < setup->span.right[1]) + mask |= MASK_BOTTOM_LEFT; + + if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) + mask |= MASK_TOP_RIGHT; + + if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) + mask |= MASK_BOTTOM_RIGHT; + + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( struct setup_stage *setup ) +{ + int minleft, maxright; + int x; + + switch (setup->span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup->span.left[0], setup->span.left[1]); + maxright = MAX2(setup->span.right[0], setup->span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup->span.left[0]; + maxright = setup->span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup->span.left[1]; + maxright = setup->span.right[1]; + break; + + default: + return; + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { + emit_quad( setup, x, setup->span.y, + calculate_mask( setup, x ) ); + } + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; +} + +#if DEBUG_VERTS +static void print_vertex(const struct setup_stage *setup, + const struct vertex_header *v) +{ + int i; + debug_printf("Vertex: (%p)\n", v); + for (i = 0; i < setup->quad.nr_attrs; i++) { + debug_printf(" %d: %f %f %f %f\n", i, + v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + } +} +#endif + +static boolean setup_sort_vertices( struct setup_stage *setup, + const struct prim_header *prim ) +{ + const struct vertex_header *v0 = prim->v[0]; + const struct vertex_header *v1 = prim->v[1]; + const struct vertex_header *v2 = prim->v[2]; + +#if DEBUG_VERTS + debug_printf("Triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif + + setup->vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = v0->data[0][1]; + float y1 = v1->data[0][1]; + float y2 = v2->data[0][1]; + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup->vmin = v0; + setup->vmid = v1; + setup->vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup->vmin = v2; + setup->vmid = v0; + setup->vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup->vmin = v0; + setup->vmid = v2; + setup->vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup->vmin = v1; + setup->vmid = v0; + setup->vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup->vmin = v2; + setup->vmid = v1; + setup->vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup->vmin = v1; + setup->vmid = v2; + setup->vmax = v0; + } + } + } + + setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0]; + setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1]; + setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; + setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; + setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0]; + setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1]; + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup->emaj.dx * setup->ebot.dy - + setup->ebot.dx * setup->emaj.dy); + + setup->oneoverarea = 1.0f / area; + /* + debug_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup->oneoverarea, area, prim->det ); + */ + } + + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex->data[slot][i]. + * The result will be put into setup->coef[slot].a0[i]. + * \param slot which attribute slot + * \param i which component of the slot (0..3) + */ +static void const_coeff( struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + + coef->dadx[i] = 0; + coef->dady[i] = 0; + + /* need provoking vertex info! + */ + coef->a0[i] = setup->vprovoke->data[vertSlot][i]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void tri_linear_coeff( struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; + float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); + + /* + debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup->coef[slot].a0[i], + setup->coef[slot].dadx[i], + setup->coef[slot].dady[i]); + */ +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* premultiply by 1/w (v->data[0][3] is always W): + */ + float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; + float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3]; + float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; + float botda = mida - mina; + float majda = maxa - mina; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + /* + debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin->data[vertSlot][i], + setup->vmid->data[vertSlot][i], + setup->vmax->data[vertSlot][i] + ); + */ + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_stage *setup) +{ + /*X*/ + setup->coef[0].a0[0] = 0; + setup->coef[0].dadx[0] = 1.0; + setup->coef[0].dady[0] = 0.0; + /*Y*/ + if (setup->softpipe->rasterizer->origin_lower_left) { + /* y=0=bottom */ + const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height; + setup->coef[0].a0[1] = (float) (winHeight - 1); + setup->coef[0].dady[1] = -1.0; + } + else { + /* y=0=top */ + setup->coef[0].a0[1] = 0.0; + setup->coef[0].dady[1] = 1.0; + } + setup->coef[0].dadx[1] = 0.0; + /*Z*/ + setup->coef[0].a0[2] = setup->posCoef.a0[2]; + setup->coef[0].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[0].dady[2] = setup->posCoef.dady[2]; + /*W*/ + setup->coef[0].a0[3] = setup->posCoef.a0[3]; + setup->coef[0].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[0].dady[3] = setup->posCoef.dady[3]; +} + + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients( struct setup_stage *setup ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* z and w are done by linear interpolation: + */ + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + assert(fragSlot == 0); + setup_fragcoord_coeff(setup); + break; + default: + assert(0); + } + + if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + + +static void setup_tri_edges( struct setup_stage *setup ) +{ + float vmin_x = setup->vmin->data[0][0] + 0.5f; + float vmid_x = setup->vmid->data[0][0] + 0.5f; + + float vmin_y = setup->vmin->data[0][1] - 0.5f; + float vmid_y = setup->vmid->data[0][1] - 0.5f; + float vmax_y = setup->vmax->data[0][1] - 0.5f; + + setup->emaj.sy = CEILF(vmin_y); + setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); + setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; + setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; + + setup->etop.sy = CEILF(vmid_y); + setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); + setup->etop.dxdy = setup->etop.dx / setup->etop.dy; + setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; + + setup->ebot.sy = CEILF(vmin_y); + setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); + setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; + setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct setup_stage *setup, + struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + assert((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup->span.y) { + flush_spans(setup); + setup->span.y = block(_y); + } + + setup->span.left[_y&1] = left; + setup->span.right[_y&1] = right; + setup->span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +static void setup_tri( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct setup_stage *setup = setup_stage( stage ); + + /* + debug_printf("%s\n", __FUNCTION__ ); + */ + + setup_sort_vertices( setup, prim ); + setup_tri_coefficients( setup ); + setup_tri_edges( setup ); + + setup->quad.prim = PRIM_TRI; + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; + /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup->oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); + subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); + subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + } + + flush_spans( setup ); +} + + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a line. + */ +static void +line_linear_coeff(struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a line. + */ +static void +line_persp_coeff(struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* XXX double-check/verify this arithmetic */ + const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; + const float a1 = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; + const float da = a1 - a0; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); +} + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmax are initialized. + */ +static INLINE void +setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* use setup->vmin, vmax to point to vertices */ + setup->vprovoke = prim->v[1]; + setup->vmin = prim->v[0]; + setup->vmax = prim->v[1]; + + setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; + setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; + /* NOTE: this is not really 1/area */ + setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + + setup->emaj.dy * setup->emaj.dy); + + /* z and w are done by linear interpolation: + */ + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + assert(fragSlot == 0); + assert(0); /* XXX fix this: */ + setup_fragcoord_coeff(setup); + break; + default: + assert(0); + } + + if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + +/** + * Plot a pixel in a line segment. + */ +static INLINE void +plot(struct setup_stage *setup, int x, int y) +{ + const int iy = y & 1; + const int ix = x & 1; + const int quadX = x - ix; + const int quadY = y - iy; + const int mask = (1 << ix) << (2 * iy); + + if (quadX != setup->quad.x0 || + quadY != setup->quad.y0) + { + /* flush prev quad, start new quad */ + + if (setup->quad.x0 != -1) + clip_emit_quad(setup); + + setup->quad.x0 = quadX; + setup->quad.y0 = quadY; + setup->quad.mask = 0x0; + } + + setup->quad.mask |= mask; +} + + +/** + * Do setup for line rasterization, then render the line. + * Single-pixel width, no stipple, etc. We rely on the 'draw' module + * to handle stippling and wide lines. + */ +static void +setup_line(struct draw_stage *stage, struct prim_header *prim) +{ + const struct vertex_header *v0 = prim->v[0]; + const struct vertex_header *v1 = prim->v[1]; + struct setup_stage *setup = setup_stage( stage ); + int x0 = (int) v0->data[0][0]; + int x1 = (int) v1->data[0][0]; + int y0 = (int) v0->data[0][1]; + int y1 = (int) v1->data[0][1]; + int dx = x1 - x0; + int dy = y1 - y0; + int xstep, ystep; + + if (dx == 0 && dy == 0) + return; + + setup_line_coefficients(setup, prim); + + if (dx < 0) { + dx = -dx; /* make positive */ + xstep = -1; + } + else { + xstep = 1; + } + + if (dy < 0) { + dy = -dy; /* make positive */ + ystep = -1; + } + else { + ystep = 1; + } + + assert(dx >= 0); + assert(dy >= 0); + + setup->quad.x0 = setup->quad.y0 = -1; + setup->quad.mask = 0x0; + setup->quad.prim = PRIM_LINE; + /* XXX temporary: set coverage to 1.0 so the line appears + * if AA mode happens to be enabled. + */ + setup->quad.coverage[0] = + setup->quad.coverage[1] = + setup->quad.coverage[2] = + setup->quad.coverage[3] = 1.0; + + if (dx > dy) { + /*** X-major line ***/ + int i; + const int errorInc = dy + dy; + int error = errorInc - dx; + const int errorDec = error - dx; + + for (i = 0; i < dx; i++) { + plot(setup, x0, y0); + + x0 += xstep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + y0 += ystep; + } + } + } + else { + /*** Y-major line ***/ + int i; + const int errorInc = dx + dx; + int error = errorInc - dy; + const int errorDec = error - dy; + + for (i = 0; i < dy; i++) { + plot(setup, x0, y0); + + y0 += ystep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + x0 += xstep; + } + } + } + + /* draw final quad */ + if (setup->quad.mask) { + clip_emit_quad(setup); + } +} + + +static void +point_persp_coeff(struct setup_stage *setup, + const struct vertex_header *vert, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3]; +} + + +/** + * Do setup for point rasterization, then render the point. + * Round or square points... + * XXX could optimize a lot for 1-pixel points. + */ +static void +setup_point(struct draw_stage *stage, struct prim_header *prim) +{ + struct setup_stage *setup = setup_stage( stage ); + struct softpipe_context *softpipe = setup->softpipe; + const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct vertex_header *v0 = prim->v[0]; + const int sizeAttr = setup->softpipe->psize_slot; + const float size + = sizeAttr > 0 ? v0->data[sizeAttr][0] + : setup->softpipe->rasterizer->point_size; + const float halfSize = 0.5F * size; + const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; + const float x = v0->data[0][0]; /* Note: data[0] is always position */ + const float y = v0->data[0][1]; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* For points, all interpolants are constant-valued. + * However, for point sprites, we'll need to setup texcoords appropriately. + * XXX: which coefficients are the texcoords??? + * We may do point sprites as textured quads... + * + * KW: We don't know which coefficients are texcoords - ultimately + * the choice of what interpolation mode to use for each attribute + * should be determined by the fragment program, using + * per-attribute declaration statements that include interpolation + * mode as a parameter. So either the fragment program will have + * to be adjusted for pointsprite vs normal point behaviour, or + * otherwise a special interpolation mode will have to be defined + * which matches the required behaviour for point sprites. But - + * the latter is not a feature of normal hardware, and as such + * probably should be ruled out on that basis. + */ + setup->vprovoke = prim->v[0]; + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + assert(fragSlot == 0); + assert(0); /* XXX fix this: */ + setup_fragcoord_coeff(setup); + break; + default: + assert(0); + } + + if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + + setup->quad.prim = PRIM_POINT; + + if (halfSize <= 0.5 && !round) { + /* special case for 1-pixel points */ + const int ix = ((int) x) & 1; + const int iy = ((int) y) & 1; + setup->quad.x0 = (int) x - ix; + setup->quad.y0 = (int) y - iy; + setup->quad.mask = (1 << ix) << (2 * iy); + clip_emit_quad(setup); + } + else { + if (round) { + /* rounded points */ + const int ixmin = block((int) (x - halfSize)); + const int ixmax = block((int) (x + halfSize)); + const int iymin = block((int) (y - halfSize)); + const int iymax = block((int) (y + halfSize)); + const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ + const float rmax = halfSize + 0.7071F; + const float rmin2 = MAX2(0.0F, rmin * rmin); + const float rmax2 = rmax * rmax; + const float cscale = 1.0F / (rmax2 - rmin2); + int ix, iy; + + for (iy = iymin; iy <= iymax; iy += 2) { + for (ix = ixmin; ix <= ixmax; ix += 2) { + float dx, dy, dist2, cover; + + setup->quad.mask = 0x0; + + dx = (ix + 0.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_RIGHT; + } + + dx = (ix + 0.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_RIGHT; + } + + if (setup->quad.mask) { + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } + else { + /* square points */ + const int xmin = (int) (x + 0.75 - halfSize); + const int ymin = (int) (y + 0.25 - halfSize); + const int xmax = xmin + (int) size; + const int ymax = ymin + (int) size; + /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ + const int ixmin = block(xmin); + const int ixmax = block(xmax - 1); + const int iymin = block(ymin); + const int iymax = block(ymax - 1); + int ix, iy; + + /* + debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); + */ + for (iy = iymin; iy <= iymax; iy += 2) { + uint rowMask = 0xf; + if (iy < ymin) { + /* above the top edge */ + rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + } + if (iy + 1 >= ymax) { + /* below the bottom edge */ + rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + } + + for (ix = ixmin; ix <= ixmax; ix += 2) { + uint mask = rowMask; + + if (ix < xmin) { + /* fragment is past left edge of point, turn off left bits */ + mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + } + if (ix + 1 >= xmax) { + /* past the right edge */ + mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + } + + setup->quad.mask = mask; + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } +} + + + +static void setup_begin( struct draw_stage *stage ) +{ + struct setup_stage *setup = setup_stage(stage); + struct softpipe_context *sp = setup->softpipe; + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + + setup->quad.nr_attrs = fs->num_inputs; + + sp->quad.first->begin(sp->quad.first); + + stage->point = setup_point; + stage->line = setup_line; + stage->tri = setup_tri; +} + + +static void setup_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->point( stage, header ); +} + +static void setup_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->line( stage, header ); +} + + +static void setup_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->tri( stage, header ); +} + + + +static void setup_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->point = setup_first_point; + stage->line = setup_first_line; + stage->tri = setup_first_tri; +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) +{ + struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + + setup->softpipe = softpipe; + setup->stage.draw = softpipe->draw; + setup->stage.point = setup_first_point; + setup->stage.line = setup_first_line; + setup->stage.tri = setup_first_tri; + setup->stage.flush = setup_flush; + setup->stage.reset_stipple_counter = reset_stipple_counter; + setup->stage.destroy = render_destroy; + + setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; + + return &setup->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h new file mode 100644 index 00000000000..f3e8a79dd9e --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SP_PRIM_SETUP_H +#define SP_PRIM_SETUP_H + + +/** + * vbuf is a special stage to gather the stream of triangles, lines, points + * together and reconstruct vertex buffers for hardware upload. + * + * First attempt, work in progress. + * + * TODO: + * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. + * - tell vbuf stage how to build hw vertices directly + * - pass vbuf stage a buffer pointer for direct emit to agp/vram. + * + * + * + * Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + + +struct draw_stage; +struct softpipe_context; + + +typedef void (*vbuf_draw_func)( struct pipe_context *pipe, + unsigned prim, + const ushort *elements, + unsigned nr_elements, + const void *vertex_buffer, + unsigned nr_vertices ); + + +extern struct draw_stage * +sp_draw_render_stage( struct softpipe_context *softpipe ); + + +extern struct draw_stage * +sp_draw_vbuf_stage( struct draw_context *draw_context, + struct pipe_context *pipe, + vbuf_draw_func draw ); + + +#endif /* SP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c new file mode 100644 index 00000000000..7f71fdb6a9a --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -0,0 +1,221 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Post-transform vertex buffering. This is an optional part of the + * softpipe rendering pipeline. + * Probably not desired in general, but useful for testing/debuggin. + * Enabled/Disabled with SP_VBUF env var. + * + * Authors + * Brian Paul + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_prim_vbuf.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" +#include "pipe/draw/draw_vbuf.h" + + +#define SP_MAX_VBUF_INDEXES 1024 +#define SP_MAX_VBUF_SIZE 4096 + + +/** + * Subclass of vbuf_render. + */ +struct softpipe_vbuf_render +{ + struct vbuf_render base; + struct softpipe_context *softpipe; + uint prim; + uint vertex_size; + void *vertex_buffer; +}; + + +/** cast wrapper */ +static struct softpipe_vbuf_render * +softpipe_vbuf_render(struct vbuf_render *vbr) +{ + return (struct softpipe_vbuf_render *) vbr; +} + + + +static const struct vertex_info * +sp_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + return softpipe_get_vbuf_vertex_info(cvbr->softpipe); +} + + +static void * +sp_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + assert(!cvbr->vertex_buffer); + cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16); + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + align_free(vertices); + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + +static void +sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + cvbr->prim = prim; +} + + +/** + * Recalculate prim's determinant. + * XXX is this needed? + */ +static void +calc_det(struct prim_header *header) +{ + /* Window coords: */ + const float *v0 = header->v[0]->data[0]; + const float *v1 = header->v[1]->data[0]; + const float *v2 = header->v[2]->data[0]; + + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0] - v2[0]; + const float ey = v0[1] - v2[1]; + const float fx = v1[0] - v2[0]; + const float fy = v1[1] - v2[1]; + + /* det = cross(e,f).z */ + header->det = ex * fy - ey * fx; +} + + +static void +sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + struct softpipe_context *softpipe = cvbr->softpipe; + struct draw_stage *setup = softpipe->setup; + struct prim_header prim; + unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); + unsigned i, j; + void *vertex_buffer = cvbr->vertex_buffer; + + prim.det = 0; + prim.reset_line_stipple = 0; + prim.edgeflags = 0; + prim.pad = 0; + + switch (cvbr->prim) { + case PIPE_PRIM_TRIANGLES: + for (i = 0; i < nr_indices; i += 3) { + for (j = 0; j < 3; j++) + prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i < nr_indices; i += 2) { + for (j = 0; j < 2; j++) + prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + setup->line( setup, &prim ); + } + break; + + case PIPE_PRIM_POINTS: + for (i = 0; i < nr_indices; i++) { + prim.v[0] = (struct vertex_header *)((char *)vertex_buffer + + indices[i] * vertex_size); + setup->point( setup, &prim ); + } + break; + } + + setup->flush( setup, 0 ); +} + + +static void +sp_vbuf_destroy(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + cvbr->softpipe->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +sp_init_vbuf(struct softpipe_context *sp) +{ + assert(sp->draw); + + sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render); + + sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES; + sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; + + sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info; + sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; + sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; + sp->vbuf_render->base.draw = sp_vbuf_draw; + sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices; + sp->vbuf_render->base.destroy = sp_vbuf_destroy; + + sp->vbuf_render->softpipe = sp; + + sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base); + + draw_set_rasterize_stage(sp->draw, sp->vbuf); +} diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h new file mode 100644 index 00000000000..1de9cc2a894 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_VBUF_H +#define SP_VBUF_H + + +struct softpipe_context; + +extern void +sp_init_vbuf(struct softpipe_context *softpipe); + + +#endif /* SP_VBUF_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c new file mode 100644 index 00000000000..6bd468a51cf --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/p_shader_tokens.h" + +static void +sp_push_quad_first( + struct softpipe_context *sp, + struct quad_stage *quad ) +{ + quad->next = sp->quad.first; + sp->quad.first = quad; +} + +static void +sp_build_depth_stencil( + struct softpipe_context *sp ) +{ + if (sp->depth_stencil->stencil[0].enabled || + sp->depth_stencil->stencil[1].enabled) { + sp_push_quad_first( sp, sp->quad.stencil_test ); + } + else if (sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf) { + sp_push_quad_first( sp, sp->quad.depth_test ); + } +} + +void +sp_build_quad_pipeline(struct softpipe_context *sp) +{ + boolean early_depth_test = + sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf && + !sp->depth_stencil->alpha.enabled && + sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION; + + /* build up the pipeline in reverse order... */ + + sp->quad.first = sp->quad.output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad.colormask ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad.blend ); + } + + if (sp->framebuffer.num_cbufs == 1) { + /* the usual case: write to exactly one colorbuf */ + sp->current_cbuf = 0; + } + else { + /* insert bufloop stage */ + sp_push_quad_first( sp, sp->quad.bufloop ); + } + + if (sp->depth_stencil->depth.occlusion_count) { + sp_push_quad_first( sp, sp->quad.occlusion ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad.coverage ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad.alpha_test ); + } + + /* XXX always enable shader? */ + if (1) { + sp_push_quad_first( sp, sp->quad.shade ); + } + + if (early_depth_test) { + sp_build_depth_stencil( sp ); + sp_push_quad_first( sp, sp->quad.earlyz ); + } + + if (sp->rasterizer->poly_stipple_enable) { + sp_push_quad_first( sp, sp->quad.polygon_stipple ); + } +} diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h new file mode 100644 index 00000000000..f1e0281764f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_QUAD_H +#define SP_QUAD_H + + +struct softpipe_context; +struct quad_header; + + +struct quad_stage { + struct softpipe_context *softpipe; + + struct quad_stage *next; + + void (*begin)(struct quad_stage *qs); + + /** the stage action */ + void (*run)(struct quad_stage *qs, struct quad_header *quad); + + void (*destroy)(struct quad_stage *qs); +}; + + +struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); + +void sp_build_quad_pipeline(struct softpipe_context *sp); + +void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); + +#endif /* SP_QUAD_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c new file mode 100644 index 00000000000..4ffeac35e16 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -0,0 +1,108 @@ + +/** + * quad alpha test + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + + +static void +alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + const float ref = softpipe->depth_stencil->alpha.ref; + unsigned passMask = 0x0, j; + const float *aaaa = quad->outputs.color[3]; + + switch (softpipe->depth_stencil->alpha.func) { + case PIPE_FUNC_NEVER: + quad->mask = 0x0; + break; + case PIPE_FUNC_LESS: + /* + * If mask were an array [4] we could do this SIMD-style: + * passMask = (quad->outputs.color[3] <= vec4(ref)); + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] < ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] == ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] <= ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] > ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] != ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] >= ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + abort(); + } + + quad->mask &= passMask; + + if (quad->mask) + qs->next->run(qs->next, quad); +} + + +static void alpha_test_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void alpha_test_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage * +sp_quad_alpha_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = alpha_test_begin; + stage->run = alpha_test_quad; + stage->destroy = alpha_test_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c new file mode 100644 index 00000000000..17f3ecd0b83 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -0,0 +1,749 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * quad blending + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" + + +#define VEC4_COPY(DST, SRC) \ +do { \ + DST[0] = SRC[0]; \ + DST[1] = SRC[1]; \ + DST[2] = SRC[2]; \ + DST[3] = SRC[3]; \ +} while(0) + +#define VEC4_SCALAR(DST, SRC) \ +do { \ + DST[0] = SRC; \ + DST[1] = SRC; \ + DST[2] = SRC; \ + DST[3] = SRC; \ +} while(0) + +#define VEC4_ADD(R, A, B) \ +do { \ + R[0] = A[0] + B[0]; \ + R[1] = A[1] + B[1]; \ + R[2] = A[2] + B[2]; \ + R[3] = A[3] + B[3]; \ +} while (0) + +#define VEC4_SUB(R, A, B) \ +do { \ + R[0] = A[0] - B[0]; \ + R[1] = A[1] - B[1]; \ + R[2] = A[2] - B[2]; \ + R[3] = A[3] - B[3]; \ +} while (0) + +#define VEC4_MUL(R, A, B) \ +do { \ + R[0] = A[0] * B[0]; \ + R[1] = A[1] * B[1]; \ + R[2] = A[2] * B[2]; \ + R[3] = A[3] * B[3]; \ +} while (0) + +#define VEC4_MIN(R, A, B) \ +do { \ + R[0] = (A[0] < B[0]) ? A[0] : B[0]; \ + R[1] = (A[1] < B[1]) ? A[1] : B[1]; \ + R[2] = (A[2] < B[2]) ? A[2] : B[2]; \ + R[3] = (A[3] < B[3]) ? A[3] : B[3]; \ +} while (0) + +#define VEC4_MAX(R, A, B) \ +do { \ + R[0] = (A[0] > B[0]) ? A[0] : B[0]; \ + R[1] = (A[1] > B[1]) ? A[1] : B[1]; \ + R[2] = (A[2] > B[2]) ? A[2] : B[2]; \ + R[3] = (A[3] > B[3]) ? A[3] : B[3]; \ +} while (0) + + + +static void +logicop_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + float dest[4][QUAD_SIZE]; + ubyte src[4][4], dst[4][4], res[4][4]; + uint *src4 = (uint *) src; + uint *dst4 = (uint *) dst; + uint *res4 = (uint *) res; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[softpipe->current_cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ + + UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ + } + + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } + + for (j = 0; j < 4; j++) { + quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); + quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); + quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); + quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + } + + /* pass quad to next stage */ + qs->next->run(qs->next, quad); +} + + + + +static void +blend_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + static const float zero[4] = { 0, 0, 0, 0 }; + static const float one[4] = { 1, 1, 1, 1 }; + float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[softpipe->current_cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color; + uint i, j; + + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quad); + return; + } + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(source[0], alpha, diff); /* R */ + VEC4_MIN(source[1], alpha, diff); /* G */ + VEC4_MIN(source[2], alpha, diff); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float alpha[4]; + VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(source[0], quadColor[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(source[1], quadColor[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(source[2], quadColor[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_alpha[4]; + VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + abort(); + } + + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(source[3], alpha, diff); /* A */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(source[3], quadColor[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + /* A */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[3], quadColor[3], inv_comp); + } + break; + default: + abort(); + } + + + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(dest[0], dest[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(dest[1], dest[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); + } + + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[3], dest[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[3], dest[3], inv_comp); + } + break; + default: + assert(0); + } + + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); + } + + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + abort(); + } + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quad); +} + + +static void blend_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void blend_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = blend_begin; + stage->run = blend_quad; + stage->destroy = blend_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c new file mode 100644 index 00000000000..2ae4e22a7de --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -0,0 +1,72 @@ + +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + + +/** + * Loop over colorbuffers, passing quad to next stage each time. + */ +static void +cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + float tmp[4][QUAD_SIZE]; + unsigned i; + + assert(sizeof(quad->outputs.color) == sizeof(tmp)); + assert(softpipe->framebuffer.num_cbufs <= PIPE_MAX_COLOR_BUFS); + + /* make copy of original colors since they can get modified + * by blending and masking. + * XXX we won't have to do this if the fragment program actually emits + * N separate colors and we're drawing to N color buffers (MRT). + * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is + * in effect, we need to save/restore colors like this. + */ + memcpy(tmp, quad->outputs.color, sizeof(tmp)); + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + /* set current cbuffer */ + softpipe->current_cbuf = i; + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quad); + + /* restore quad's colors for next buffer */ + memcpy(quad->outputs.color, tmp, sizeof(tmp)); + } +} + + +static void cbuf_loop_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void cbuf_loop_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +/** + * Create the colorbuffer loop stage. + * This is used to implement multiple render targets and GL_FRONT_AND_BACK + * rendering. + */ +struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = cbuf_loop_begin; + stage->run = cbuf_loop_quad; + stage->destroy = cbuf_loop_destroy; + + return stage; +} + diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c new file mode 100644 index 00000000000..1f09d900ca8 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief quad colormask stage + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + + +/** + * XXX colormask could be rolled into blending... + */ +static void +colormask_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + float dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[softpipe->current_cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* R */ + if (!(softpipe->blend->colormask & PIPE_MASK_R)) + COPY_4V(quadColor[0], dest[0]); + + /* G */ + if (!(softpipe->blend->colormask & PIPE_MASK_G)) + COPY_4V(quadColor[1], dest[1]); + + /* B */ + if (!(softpipe->blend->colormask & PIPE_MASK_B)) + COPY_4V(quadColor[2], dest[2]); + + /* A */ + if (!(softpipe->blend->colormask & PIPE_MASK_A)) + COPY_4V(quadColor[3], dest[3]); + + /* pass quad to next stage */ + qs->next->run(qs->next, quad); +} + + +static void colormask_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void colormask_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = colormask_begin; + stage->run = colormask_quad; + stage->destroy = colormask_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c new file mode 100644 index 00000000000..b3d3fae22fd --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \brief Apply AA coverage to quad alpha valus + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * Multiply quad's alpha values by the fragment coverage. + */ +static void +coverage_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + + if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) || + (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) || + (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) { + float (*quadColor)[4] = quad->outputs.color; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->coverage[j] >= 0.0); + assert(quad->coverage[j] <= 1.0); + quadColor[3][j] *= quad->coverage[j]; + } + } + + qs->next->run(qs->next, quad); +} + + +static void coverage_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void coverage_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = coverage_begin; + stage->run = coverage_quad; + stage->destroy = coverage_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c new file mode 100644 index 00000000000..a9a0754f279 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Quad depth testing + */ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Do depth testing for a quad. + * Not static since it's used by the stencil code. + */ + +/* + * To increase efficiency, we should probably have multiple versions + * of this function that are specifically for Z16, Z32 and FP Z buffers. + * Try to effectively do that with codegen... + */ + +void +sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct pipe_surface *ps = softpipe->framebuffer.zsbuf; + const enum pipe_format format = ps->format; + unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ + unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ + unsigned zmask = 0; + unsigned j; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0); + + assert(ps); /* shouldn't get here if there's no zbuffer */ + + /* + * Convert quad's float depth values to int depth values (qzzzz). + * If the Z buffer stores integer values, we _have_ to do the depth + * compares with integers (not floats). Otherwise, the float->int->float + * conversion of Z values (which isn't an identity function) will cause + * Z-fighting errors. + * + * Also, get the zbuffer values (bzzzz) from the cached tile. + */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + { + float scale = 65535.0; + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth16[y][x]; + } + } + break; + case PIPE_FORMAT_Z32_UNORM: + { + double scale = (double) (uint) ~0UL; + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x]; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + { + float scale = (float) ((1 << 24) - 1); + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + } + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + { + float scale = (float) ((1 << 24) - 1); + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x] >> 8; + } + } + break; + default: + assert(0); + } + + switch (softpipe->depth_stencil->depth.func) { + case PIPE_FUNC_NEVER: + /* zmask = 0 */ + break; + case PIPE_FUNC_LESS: + /* Note this is pretty much a single sse or cell instruction. + * Like this: quad->mask &= (quad->outputs.depth < zzzz); + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] < bzzzz[j]) + zmask |= 1 << j; + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] == bzzzz[j]) + zmask |= 1 << j; + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] <= bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] > bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] != bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] >= bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_ALWAYS: + zmask = MASK_ALL; + break; + default: + abort(); + } + + quad->mask &= zmask; + + if (softpipe->depth_stencil->depth.writemask) { + + /* This is also efficient with sse / spe instructions: + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->mask & (1 << j)) { + bzzzz[j] = qzzzz[j]; + } + } + + /* put updated Z values back into cached tile */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + tile->data.depth16[y][x] = (ushort) bzzzz[j]; + } + break; + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = bzzzz[j]; + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 24); + tile->data.depth32[y][x] = z24s8; + } + break; + default: + assert(0); + } + } +} + + +static void +depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + sp_depth_test_quad(qs, quad); + + if (quad->mask) + qs->next->run(qs->next, quad); +} + + +static void depth_test_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void depth_test_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = depth_test_begin; + stage->run = depth_test_quad; + stage->destroy = depth_test_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c new file mode 100644 index 00000000000..22ea99049fe --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Quad early-z testing + */ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * All this stage does is compute the quad's Z values (which is normally + * done by the shading stage). + * The next stage will do the actual depth test. + */ +static void +earlyz_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->outputs.depth[0] = z0; + quad->outputs.depth[1] = z0 + dzdx; + quad->outputs.depth[2] = z0 + dzdy; + quad->outputs.depth[3] = z0 + dzdx + dzdy; + + qs->next->run( qs->next, quad ); +} + +static void +earlyz_begin( + struct quad_stage *qs ) +{ + qs->next->begin( qs->next ); +} + +static void +earlyz_destroy( + struct quad_stage *qs ) +{ + FREE( qs ); +} + +struct quad_stage * +sp_quad_earlyz_stage( + struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT( quad_stage ); + + stage->softpipe = softpipe; + stage->begin = earlyz_begin; + stage->run = earlyz_quad; + stage->destroy = earlyz_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c new file mode 100644 index 00000000000..33168584137 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -0,0 +1,390 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "x86/rtasm/x86sse.h" + +#ifdef MESA_LLVM +#include "pipe/llvm/gallivm.h" +#endif + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" + + +struct quad_shade_stage +{ + struct quad_stage stage; + struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; + struct tgsi_exec_machine machine; + struct tgsi_exec_vector *inputs, *outputs; + int colorOutSlot, depthOutSlot; +#ifdef MESA_LLVM + struct gallivm_prog *llvm_prog; +#endif +}; + + +/** cast wrapper */ +static INLINE struct quad_shade_stage * +quad_shade_stage(struct quad_stage *qs) +{ + return (struct quad_shade_stage *) qs; +} + + +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * Note that we only need to "compute" X and Y for the upper-left fragment. + * We could do less work if we're not depth testing, or there's no + * perspective-corrected attributes, but that's seldom. + */ +static void +setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + /* do Y */ + quadpos->xyzw[1].f[0] = y; + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + + +typedef void (XSTDCALL *codegen_function)( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], + struct tgsi_exec_vector *temporary, + const struct tgsi_interp_coef *coef +#if 0 + ,const struct tgsi_exec_vector *quadPos +#endif + ); + + +/** + * Execute fragment shader for the four fragments in the quad. + */ +static void +shade_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = &qss->machine; + + /* Consts do not require 16 byte alignment. */ + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + + machine->InterpCoefs = quad->coef; + + /* Compute X, Y, Z, W vals for this quad */ + setup_pos_vector(quad->posCoef, (float) quad->x0, (float) quad->y0, &machine->QuadPos); + + /* run shader */ +#if defined(__i386__) || defined(__386__) + if( softpipe->use_sse ) { + codegen_function func = (codegen_function) x86_get_func( &softpipe->fs->sse2_program ); + func( + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps, + machine->InterpCoefs +#if 0 + ,machine->QuadPos +#endif + ); + quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); + } + else +#endif + { + quad->mask &= tgsi_exec_machine_run( machine ); + } + + /* store result color */ + if (qss->colorOutSlot >= 0) { + /* XXX need to handle multiple color outputs someday */ + assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + memcpy( + quad->outputs.color, + &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], + sizeof( quad->outputs.color ) ); + } + + /* + * XXX the following code for updating quad->outputs.depth + * isn't really needed if we did early z testing. + */ + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; + /* XXX not sure the above line is always correct. The following + * might be better: + quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; + */ + } + } + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} + +#if 0 +#ifdef MESA_LLVM +#define DLLVM 0 +static void +shade_quad_llvm(struct quad_stage *qs, + struct quad_header *quad) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + float dests[4][16][4] ALIGN16_ATTRIB; + float inputs[4][16][4] ALIGN16_ATTRIB; + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + struct gallivm_prog *llvm = qss->llvm_prog; + + inputs[0][0][0] = fx; + inputs[1][0][0] = fx + 1.0f; + inputs[2][0][0] = fx; + inputs[3][0][0] = fx + 1.0f; + + inputs[0][0][1] = fy; + inputs[1][0][1] = fy; + inputs[2][0][1] = fy + 1.0f; + inputs[3][0][1] = fy + 1.0f; +#if DLLVM + debug_printf("MASK = %d\n", quad->mask); +#endif + gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); +#if DLLVM + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 2; ++j) { + debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, + inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); + } + } +#endif + + quad->mask &= + gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs, + softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], + qss->samplers); +#if DLLVM + debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", + dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], + dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); +#endif + + /* store result color */ + if (qss->colorOutSlot >= 0) { + unsigned i; + /* XXX need to handle multiple color outputs someday */ + assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + for (i = 0; i < QUAD_SIZE; ++i) { + quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0]; + quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1]; + quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2]; + quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3]; + } + } +#if DLLVM + for (int i = 0; i < QUAD_SIZE; ++i) { + debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, + quad->outputs.color[0][i], + quad->outputs.color[1][i], + quad->outputs.color[2][i], + quad->outputs.color[3][i]); + } +#endif + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = dests[i][0][2]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = inputs[i][0][2]; + } + } +#if DLLVM + debug_printf("D [%f, %f, %f, %f] mask = %d\n", + quad->outputs.depth[0], + quad->outputs.depth[1], + quad->outputs.depth[2], + quad->outputs.depth[3], quad->mask); +#endif + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} +#endif /*MESA_LLVM*/ +#endif + +/** + * Per-primitive (or per-begin?) setup + */ +static void shade_begin(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + unsigned i; + + /* set TGSI sampler state that varies */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + qss->samplers[i].state = softpipe->sampler[i]; + qss->samplers[i].texture = &softpipe->texture[i]->base; + } + +#ifdef MESA_LLVM + qss->llvm_prog = softpipe->fs->llvm_prog; +#endif + /* XXX only do this if the fragment shader changes... */ + tgsi_exec_machine_init(&qss->machine, + softpipe->fs->shader.tokens, + PIPE_MAX_SAMPLERS, + qss->samplers ); + + /* find output slots for depth, color */ + qss->colorOutSlot = -1; + qss->depthOutSlot = -1; + for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) { + switch (qss->stage.softpipe->fs->shader.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + qss->depthOutSlot = i; + break; + case TGSI_SEMANTIC_COLOR: + qss->colorOutSlot = i; + break; + } + } + + qs->next->begin(qs->next); +} + + +static void shade_destroy(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; + + tgsi_exec_machine_free_data(&qss->machine); + FREE( qss->inputs ); + FREE( qss->outputs ); + FREE( qs ); +} + + +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) +{ + struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); + uint i; + + /* allocate storage for program inputs/outputs, aligned to 16 bytes */ + qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); + qss->machine.Inputs = align16(qss->inputs); + qss->machine.Outputs = align16(qss->outputs); + + qss->stage.softpipe = softpipe; + qss->stage.begin = shade_begin; +#ifdef MESA_LLVM + /* disable until ported to accept + * x/y and soa layout + qss->stage.run = shade_quad_llvm; + */ + softpipe->use_sse = FALSE; + qss->stage.run = shade_quad; +#else + qss->stage.run = shade_quad; +#endif + qss->stage.destroy = shade_destroy; + + /* set TGSI sampler state that's constant */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + assert(softpipe->tex_cache[i]); + qss->samplers[i].get_samples = sp_get_samples; + qss->samplers[i].pipe = &softpipe->pipe; + qss->samplers[i].cache = softpipe->tex_cache[i]; + } + + return &qss->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c new file mode 100644 index 00000000000..54254df1f11 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \brief Quad occlusion counter stage + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + +static unsigned count_bits( unsigned val ) +{ + unsigned i; + + for (i = 0; val ; val >>= 1) + i += (val & 1); + + return i; +} + +static void +occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + + softpipe->occlusion_count += count_bits(quad->mask); + + qs->next->run(qs->next, quad); +} + + +static void occlusion_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void occlusion_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = occlusion_begin; + stage->run = occlusion_count_quad; + stage->destroy = occlusion_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c new file mode 100644 index 00000000000..cfe8f118081 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -0,0 +1,90 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Write quad to framebuffer, taking mask into account. + * + * Note that surfaces support only full quad reads and writes. + */ +static void +output_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[softpipe->current_cbuf], + quad->x0, quad->y0); + /* in-tile pos: */ + const int itx = quad->x0 % TILE_SIZE; + const int ity = quad->y0 % TILE_SIZE; + float (*quadColor)[4] = quad->outputs.color; + int i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + } + } +} + + +static void output_begin(struct quad_stage *qs) +{ + assert(qs->next == NULL); +} + + +static void output_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = output_begin; + stage->run = output_quad; + stage->destroy = output_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c new file mode 100644 index 00000000000..92a0da00833 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -0,0 +1,352 @@ + +/** + * \brief Quad stencil testing + */ + + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + + +/** Only 8-bit stencil supported */ +#define STENCIL_MAX 0xff + + +/** + * Do the basic stencil test (compare stencil buffer values against the + * reference value. + * + * \param stencilVals the stencil values from the stencil buffer + * \param func the stencil func (PIPE_FUNC_x) + * \param ref the stencil reference value + * \param valMask the stencil value mask indicating which bits of the stencil + * values and ref value are to be used. + * \return mask indicating which pixels passed the stencil test + */ +static unsigned +do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func, + unsigned ref, unsigned valMask) +{ + unsigned passMask = 0x0; + unsigned j; + + ref &= valMask; + + switch (func) { + case PIPE_FUNC_NEVER: + /* passMask = 0x0 */ + break; + case PIPE_FUNC_LESS: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref < (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref == (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref <= (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref > (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref != (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref >= (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + return passMask; +} + + +/** + * Apply the stencil operator to stencil values. + * + * \param stencilVals the stencil buffer values (read and written) + * \param mask indicates which pixels to update + * \param op the stencil operator (PIPE_STENCIL_OP_x) + * \param ref the stencil reference value + * \param wrtMask writemask controlling which bits are changed in the + * stencil values + */ +static void +apply_stencil_op(ubyte stencilVals[QUAD_SIZE], + unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) +{ + unsigned j; + ubyte newstencil[QUAD_SIZE]; + + for (j = 0; j < QUAD_SIZE; j++) { + newstencil[j] = stencilVals[j]; + } + + switch (op) { + case PIPE_STENCIL_OP_KEEP: + /* no-op */ + break; + case PIPE_STENCIL_OP_ZERO: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = 0; + } + } + break; + case PIPE_STENCIL_OP_REPLACE: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ref; + } + } + break; + case PIPE_STENCIL_OP_INCR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (stencilVals[j] < STENCIL_MAX) { + newstencil[j] = stencilVals[j] + 1; + } + } + } + break; + case PIPE_STENCIL_OP_DECR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (stencilVals[j] > 0) { + newstencil[j] = stencilVals[j] - 1; + } + } + } + break; + case PIPE_STENCIL_OP_INCR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = stencilVals[j] + 1; + } + } + break; + case PIPE_STENCIL_OP_DECR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = stencilVals[j] - 1; + } + } + break; + case PIPE_STENCIL_OP_INVERT: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ~stencilVals[j]; + } + } + break; + default: + assert(0); + } + + /* + * update the stencil values + */ + if (wrtMask != STENCIL_MAX) { + /* apply bit-wise stencil buffer writemask */ + for (j = 0; j < QUAD_SIZE; j++) { + stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]); + } + } + else { + for (j = 0; j < QUAD_SIZE; j++) { + stencilVals[j] = newstencil[j]; + } + } +} + + +/** + * Do stencil (and depth) testing. Stenciling depends on the outcome of + * depth testing. + */ +static void +stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct pipe_surface *ps = softpipe->framebuffer.zsbuf; + unsigned func, zFailOp, zPassOp, failOp; + ubyte ref, wrtMask, valMask; + ubyte stencilVals[QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0); + uint j; + uint face = quad->facing; + + if (!softpipe->depth_stencil->stencil[1].enabled) { + /* single-sided stencil test, use front (face=0) state */ + face = 0; + } + + /* choose front or back face function, operator, etc */ + /* XXX we could do these initializations once per primitive */ + func = softpipe->depth_stencil->stencil[face].func; + failOp = softpipe->depth_stencil->stencil[face].fail_op; + zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; + zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; + ref = softpipe->depth_stencil->stencil[face].ref_value; + wrtMask = softpipe->depth_stencil->stencil[face].write_mask; + valMask = softpipe->depth_stencil->stencil[face].value_mask; + + assert(ps); /* shouldn't get here if there's no stencil buffer */ + + /* get stencil values from cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + case PIPE_FORMAT_U_S8: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.stencil8[y][x]; + } + break; + default: + assert(0); + } + + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(stencilVals, func, ref, valMask); + failMask = quad->mask & ~passMask; + quad->mask &= passMask; + + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); + } + } + + if (quad->mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->mask; + + sp_depth_test_quad(qs, quad); /* quad->mask is updated */ + + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned failMask = origMask & ~quad->mask; + apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); + } + + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned passMask = origMask & quad->mask; + apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); + } + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(stencilVals, quad->mask, zPassOp, ref, wrtMask); + } + + } + + /* put new stencil values into cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; + tile->data.depth32[y][x] = z24s8; + } + break; + case PIPE_FORMAT_U_S8: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + tile->data.stencil8[y][x] = stencilVals[j]; + } + break; + default: + assert(0); + } + + if (quad->mask) + qs->next->run(qs->next, quad); +} + + +static void stencil_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void stencil_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = stencil_begin; + stage->run = stencil_test_quad; + stage->destroy = stencil_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c new file mode 100644 index 00000000000..8660432259c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -0,0 +1,94 @@ + +/** + * quad polygon stipple stage + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + + +/** + * Apply polygon stipple to quads produced by triangle rasterization + */ +static void +stipple_quad(struct quad_stage *qs, struct quad_header *quad) +{ + static const uint bit31 = 1 << 31; + static const uint bit30 = 1 << 30; + + if (quad->prim == PRIM_TRI) { + struct softpipe_context *softpipe = qs->softpipe; + /* need to invert Y to index into OpenGL's stipple pattern */ + int y0, y1; + uint stipple0, stipple1; + if (softpipe->rasterizer->origin_lower_left) { + y0 = softpipe->framebuffer.cbufs[0]->height - 1 - quad->y0; + y1 = y0 - 1; + } + else { + y0 = quad->y0; + y1 = y0 + 1; + } + stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + +#if 1 + { + const int col0 = quad->x0 % 32; + if ((stipple0 & (bit31 >> col0)) == 0) + quad->mask &= ~MASK_TOP_LEFT; + + if ((stipple0 & (bit30 >> col0)) == 0) + quad->mask &= ~MASK_TOP_RIGHT; + + if ((stipple1 & (bit31 >> col0)) == 0) + quad->mask &= ~MASK_BOTTOM_LEFT; + + if ((stipple1 & (bit30 >> col0)) == 0) + quad->mask &= ~MASK_BOTTOM_RIGHT; + } +#else + /* We'd like to use this code, but we'd need to redefine + * MASK_TOP_LEFT to be (1 << 1) and MASK_TOP_RIGHT to be (1 << 0), + * and similarly for the BOTTOM bits. But that may have undesirable + * side effects elsewhere. + */ + const int col0 = 30 - (quad->x0 % 32); + quad->mask &= (((stipple0 >> col0) & 0x3) | + (((stipple1 >> col0) & 0x3) << 2)); +#endif + if (!quad->mask) + return; + } + + qs->next->run(qs->next, quad); +} + + +static void stipple_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void stipple_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage * +sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = stipple_begin; + stage->run = stipple_quad; + stage->destroy = stipple_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c new file mode 100644 index 00000000000..6a8a43aedaf --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_query.h" + +struct softpipe_query { + uint64 start; + uint64 end; +}; + + +static struct softpipe_query *softpipe_query( struct pipe_query *p ) +{ + return (struct softpipe_query *)p; +} + +static struct pipe_query * +softpipe_create_query(struct pipe_context *pipe, + unsigned type) +{ + assert(type == PIPE_QUERY_OCCLUSION_COUNTER); + return (struct pipe_query *)CALLOC_STRUCT( softpipe_query ); +} + + +static void +softpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ + FREE(q); +} + + +static void +softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct softpipe_query *sq = softpipe_query(q); + + sq->start = softpipe->occlusion_count; +} + + +static void +softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct softpipe_query *sq = softpipe_query(q); + + sq->end = softpipe->occlusion_count; +} + + +static boolean +softpipe_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64 *result ) +{ + struct softpipe_query *sq = softpipe_query(q); + *result = sq->end - sq->start; + return TRUE; +} + + +void softpipe_init_query_funcs(struct softpipe_context *softpipe ) +{ + softpipe->pipe.create_query = softpipe_create_query; + softpipe->pipe.destroy_query = softpipe_destroy_query; + softpipe->pipe.begin_query = softpipe_begin_query; + softpipe->pipe.end_query = softpipe_end_query; + softpipe->pipe.get_query_result = softpipe_get_query_result; +} + + diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h new file mode 100644 index 00000000000..05060a45759 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + +#ifndef SP_QUERY_H +#define SP_QUERY_H + +struct softpipe_context; +extern void softpipe_init_query_funcs(struct softpipe_context * ); + + +#endif /* SP_QUERY_H */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h new file mode 100644 index 00000000000..b79db0d1f19 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -0,0 +1,187 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_STATE_H +#define SP_STATE_H + +#include "pipe/p_state.h" + +#include "x86/rtasm/x86sse.h" + + +#define SP_NEW_VIEWPORT 0x1 +#define SP_NEW_RASTERIZER 0x2 +#define SP_NEW_FS 0x4 +#define SP_NEW_BLEND 0x8 +#define SP_NEW_CLIP 0x10 +#define SP_NEW_SCISSOR 0x20 +#define SP_NEW_STIPPLE 0x40 +#define SP_NEW_FRAMEBUFFER 0x80 +#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define SP_NEW_CONSTANTS 0x200 +#define SP_NEW_SAMPLER 0x400 +#define SP_NEW_TEXTURE 0x800 +#define SP_NEW_VERTEX 0x1000 +#define SP_NEW_VS 0x2000 +#define SP_NEW_QUERY 0x4000 + + + +#ifdef MESA_LLVM +struct gallivm_prog; +#endif + + +struct vertex_info; + + +/** Subclass of pipe_shader_state */ +struct sp_fragment_shader_state { + struct pipe_shader_state shader; +#if defined(__i386__) || defined(__386__) + struct x86_function sse2_program; +#endif +#ifdef MESA_LLVM + struct gallivm_prog *llvm_prog; +#endif +}; + + +/** Subclass of pipe_shader_state */ +struct sp_vertex_shader_state { + struct pipe_shader_state shader; + struct draw_vertex_shader *draw_data; +}; + + + +void * +softpipe_create_blend_state(struct pipe_context *, + const struct pipe_blend_state *); +void softpipe_bind_blend_state(struct pipe_context *, + void *); +void softpipe_delete_blend_state(struct pipe_context *, + void *); + +void * +softpipe_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); +void softpipe_bind_sampler_state(struct pipe_context *, unsigned, void *); +void softpipe_delete_sampler_state(struct pipe_context *, void *); + +void * +softpipe_create_depth_stencil_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); +void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); +void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); + +void * +softpipe_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void softpipe_bind_rasterizer_state(struct pipe_context *, void *); +void softpipe_delete_rasterizer_state(struct pipe_context *, void *); + +void softpipe_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + +void softpipe_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void softpipe_set_constant_buffer(struct pipe_context *, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void *softpipe_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_fs_state(struct pipe_context *, void *); +void softpipe_delete_fs_state(struct pipe_context *, void *); +void *softpipe_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_vs_state(struct pipe_context *, void *); +void softpipe_delete_vs_state(struct pipe_context *, void *); + +void softpipe_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void softpipe_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void softpipe_set_sampler_texture( struct pipe_context *, + unsigned unit, + struct pipe_texture * ); + +void softpipe_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + +void softpipe_set_vertex_element(struct pipe_context *, + unsigned index, + const struct pipe_vertex_element *); + +void softpipe_set_vertex_buffer(struct pipe_context *, + unsigned index, + const struct pipe_vertex_buffer *); + + +void softpipe_update_derived( struct softpipe_context *softpipe ); + + +boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + +void +softpipe_map_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_surfaces(struct softpipe_context *sp); + +void +softpipe_map_texture_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_texture_surfaces(struct softpipe_context *sp); + + +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe); + +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c new file mode 100644 index 00000000000..2d40d6bd8f5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -0,0 +1,98 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_state.h" + + +void * +softpipe_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + +void softpipe_bind_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->blend = (const struct pipe_blend_state *)blend; + + softpipe->dirty |= SP_NEW_BLEND; +} + +void softpipe_delete_blend_state(struct pipe_context *pipe, + void *blend) +{ + FREE( blend ); +} + + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->blend_color = *blend_color; + + softpipe->dirty |= SP_NEW_BLEND; +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + + +void * +softpipe_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + +void +softpipe_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; +} + +void +softpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ + FREE( depth ); +} diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c new file mode 100644 index 00000000000..08c5f06d05d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/draw/draw_context.h" + + +void softpipe_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(softpipe->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +void softpipe_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->viewport = *viewport; /* struct copy */ + softpipe->dirty |= SP_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(softpipe->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +void softpipe_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + memcpy( &softpipe->scissor, scissor, sizeof(*scissor) ); + softpipe->dirty |= SP_NEW_SCISSOR; +} + + +void softpipe_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + memcpy( &softpipe->poly_stipple, stipple, sizeof(*stipple) ); + softpipe->dirty |= SP_NEW_STIPPLE; +} diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c new file mode 100644 index 00000000000..372597869f3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -0,0 +1,235 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/draw/draw_private.h" +#include "sp_context.h" +#include "sp_state.h" + + +/** + * Search vertex program's outputs to find a match for the given + * semantic name/index. Return the index of the output slot. + * + * Return 0 if not found. This will cause the fragment program to use + * vertex attrib 0 (position) in the cases where the fragment program + * attempts to use a missing vertex program output. This is an undefined + * condition that users shouldn't hit anyway. + */ +static int +find_vs_output(const struct pipe_shader_state *vs, + uint semantic_name, + uint semantic_index) +{ + uint i; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == semantic_name && + vs->output_semantic_index[i] == semantic_index) + return i; + } + return 0; +} + + +/** + * Mark the current vertex layout as "invalid". + * We'll validate the vertex layout later, when we start to actually + * render a point or line or tri. + */ +static void +invalidate_vertex_layout(struct softpipe_context *softpipe) +{ + softpipe->vertex_info.num_attribs = 0; +} + + +/** + * The vertex info describes how to convert the post-transformed vertices + * (simple float[][4]) used by the 'draw' module into vertices for + * rasterization. + * + * This function validates the vertex layout and returns a pointer to a + * vertex_info object. + */ +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe) +{ + struct vertex_info *vinfo = &softpipe->vertex_info; + + if (vinfo->num_attribs == 0) { + /* compute vertex layout now */ + const struct pipe_shader_state *vs = &softpipe->vs->shader; + const struct pipe_shader_state *fs = &softpipe->fs->shader; + const enum interp_mode colorInterp + = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + uint i; + + if (softpipe->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; + vinfo_vbuf->num_attribs = 0; + draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); + vinfo_vbuf->size = 4 * vs->num_outputs + + sizeof(struct vertex_header) / 4; + } + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + vinfo->num_attribs = 0; + for (i = 0; i < fs->num_inputs; i++) { + int src; + switch (fs->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + break; + + case TGSI_SEMANTIC_COLOR: + src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, + fs->input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + fs->input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + softpipe->psize_slot = find_vs_output(vs, TGSI_SEMANTIC_PSIZE, 0); + if (softpipe->psize_slot > 0) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, + softpipe->psize_slot); + } + + draw_compute_vertex_size(vinfo); + } + + return vinfo; +} + + +/** + * Called from vbuf module. + * + * Note that there's actually two different vertex layouts in softpipe. + * + * The normal one is computed in softpipe_get_vertex_info() above and is + * used by the point/line/tri "setup" code. + * + * The other one (this one) is only used by the vbuf module (which is + * not normally used by default but used in testing). For the vbuf module, + * we basically want to pass-through the draw module's vertex layout as-is. + * When the softpipe vbuf code begins drawing, the normal vertex layout + * will come into play again. + */ +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) +{ + (void) softpipe_get_vertex_info(softpipe); + return &softpipe->vertex_info_vbuf; +} + + +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct softpipe_context *sp) +{ + unsigned surfWidth, surfHeight; + + if (sp->framebuffer.num_cbufs > 0) { + surfWidth = sp->framebuffer.cbufs[0]->width; + surfHeight = sp->framebuffer.cbufs[0]->height; + } + else { + /* no surface? */ + surfWidth = sp->scissor.maxx; + surfHeight = sp->scissor.maxy; + } + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void softpipe_update_derived( struct softpipe_context *softpipe ) +{ + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_FS | + SP_NEW_VS)) + invalidate_vertex_layout( softpipe ); + + if (softpipe->dirty & (SP_NEW_SCISSOR | + SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_FRAMEBUFFER)) + compute_cliprect(softpipe); + + if (softpipe->dirty & (SP_NEW_BLEND | + SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_FRAMEBUFFER | + SP_NEW_RASTERIZER | + SP_NEW_FS | + SP_NEW_QUERY)) + sp_build_quad_pipeline(softpipe); + + softpipe->dirty = 0; +} diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c new file mode 100644 index 00000000000..0b814fc2847 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -0,0 +1,179 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sp_context.h" +#include "sp_state.h" + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/draw/draw_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/llvm/gallivm.h" +#include "pipe/tgsi/util/tgsi_dump.h" +#include "pipe/tgsi/exec/tgsi_sse2.h" + + +void * +softpipe_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_fragment_shader_state *state; + + /* Decide whether we'll be codegenerating this shader and if so do + * that now. + */ + + state = CALLOC_STRUCT(sp_fragment_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + + if (softpipe->dump_fs) { + tgsi_dump(state->shader.tokens, 0); + } + +#ifdef MESA_LLVM + state->llvm_prog = 0; + +#if 0 + if (!gallivm_global_cpu_engine()) { + gallivm_cpu_engine_create(state->llvm_prog); + } + else + gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); +#endif + +#elif defined(__i386__) || defined(__386__) + if (softpipe->use_sse) { + x86_init_func( &state->sse2_program ); + tgsi_emit_sse2_fs( state->shader.tokens, &state->sse2_program ); + } +#endif + + return state; +} + + +void +softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->fs = (struct sp_fragment_shader_state *) fs; + + softpipe->dirty |= SP_NEW_FS; +} + + +void +softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct sp_fragment_shader_state *state = fs; + +#if defined(__i386__) || defined(__386__) + x86_release_func( &state->sse2_program ); +#endif + + FREE( state ); +} + + +void * +softpipe_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_vertex_shader_state *state; + + state = CALLOC_STRUCT(sp_vertex_shader_state); + if (state == NULL ) { + return NULL; + } + + state->shader = *templ; + + state->draw_data = draw_create_vertex_shader(softpipe->draw, + &state->shader); + if (state->draw_data == NULL) { + FREE( state ); + return NULL; + } + + return state; +} + + +void +softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->vs = (const struct sp_vertex_shader_state *)vs; + + draw_bind_vertex_shader(softpipe->draw, softpipe->vs->draw_data); + + softpipe->dirty |= SP_NEW_VS; +} + + +void +softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_vertex_shader_state *state = + (struct sp_vertex_shader_state *)vs; + + draw_delete_vertex_shader(softpipe->draw, state->draw_data); + FREE( state ); +} + + + +void +softpipe_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* note: reference counting */ + pipe_buffer_reference(ws, + &softpipe->constants[shader].buffer, + buf->buffer); + softpipe->constants[shader].size = buf->size; + + softpipe->dirty |= SP_NEW_CONSTANTS; +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c new file mode 100644 index 00000000000..53755099dde --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/draw/draw_context.h" + + + +void * +softpipe_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rast) +{ + return mem_dup(rast, sizeof(*rast)); +} + +void softpipe_bind_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(softpipe->draw, setup); + + softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + + softpipe->dirty |= SP_NEW_RASTERIZER; +} + +void softpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) +{ + FREE( rasterizer ); +} + + diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c new file mode 100644 index 00000000000..ea348c7e958 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + +#include "pipe/p_util.h" + +#include "pipe/draw/draw_context.h" + +#include "sp_context.h" +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" +#include "pipe/draw/draw_context.h" + + + +void * +softpipe_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + +void +softpipe_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + draw_flush(softpipe->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + softpipe->sampler[unit] = (struct pipe_sampler_state *)sampler; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + + +void +softpipe_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + +void +softpipe_set_sampler_texture(struct pipe_context *pipe, + unsigned unit, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + draw_flush(softpipe->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + softpipe->texture[unit] = softpipe_texture(texture); /* ptr, not struct */ + + sp_tile_cache_set_texture(softpipe->tex_cache[unit], texture); + + softpipe->dirty |= SP_NEW_TEXTURE; +} + + + diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c new file mode 100644 index 00000000000..e2c6893e9f5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ +#include "p_inlines.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" + + +/** + * XXX this might get moved someday + * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. + * Here, we flush the old surfaces and update the tile cache to point to the new + * surfaces. + */ +void +softpipe_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct softpipe_context *sp = softpipe_context(pipe); + uint i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + /* check if changing cbuf */ + if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { + /* flush old */ + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + + /* assign new */ + sp->framebuffer.cbufs[i] = fb->cbufs[i]; + + /* update cache */ + sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); + } + } + + sp->framebuffer.num_cbufs = fb->num_cbufs; + + /* zbuf changing? */ + if (sp->framebuffer.zsbuf != fb->zsbuf) { + /* flush old */ + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + /* assign new */ + sp->framebuffer.zsbuf = fb->zsbuf; + + /* update cache */ + sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); + } + +#if 0 + /* XXX combined depth/stencil here */ + + /* sbuf changing? */ + if (sp->framebuffer.sbuf != fb->sbuf) { + /* flush old */ + sp_flush_tile_cache(sp, sp->sbuf_cache_sep); + + /* assign new */ + sp->framebuffer.sbuf = fb->sbuf; + + /* update cache */ + if (fb->sbuf != fb->zbuf) { + /* separate stencil buf */ + sp->sbuf_cache = sp->sbuf_cache_sep; + sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); + } + else { + /* combined depth/stencil */ + sp->sbuf_cache = sp->zbuf_cache; + sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); + } + } +#endif + + sp->dirty |= SP_NEW_FRAMEBUFFER; +} + + + + diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c new file mode 100644 index 00000000000..09ff540ccfd --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" + +#include "pipe/draw/draw_context.h" + + +void +softpipe_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *attrib) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + softpipe->vertex_element[index] = *attrib; /* struct copy */ + softpipe->dirty |= SP_NEW_VERTEX; + + draw_set_vertex_element(softpipe->draw, index, attrib); +} + + +void +softpipe_set_vertex_buffer(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + softpipe->vertex_buffer[index] = *buffer; /* struct copy */ + softpipe->dirty |= SP_NEW_VERTEX; + + draw_set_vertex_buffer(softpipe->draw, index, buffer); +} diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c new file mode 100644 index 00000000000..8802ced1874 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -0,0 +1,159 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" +#include "sp_context.h" +#include "sp_surface.h" + + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +sp_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert( dst->cpp == src->cpp ); + + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src), + do_flip ? -(int) src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); +} + + +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) + + +/** + * Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void +sp_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + assert(dst->pitch > 0); + assert(width <= dst->pitch); + + + switch (dst->cpp) { + case 1: + { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: + { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: + { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + case 8: + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst->pitch * 4; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); +} + + +void +sp_init_surface_functions(struct softpipe_context *sp) +{ + sp->pipe.surface_copy = sp_surface_copy; + sp->pipe.surface_fill = sp_surface_fill; +} diff --git a/src/gallium/drivers/softpipe/sp_surface.h b/src/gallium/drivers/softpipe/sp_surface.h new file mode 100644 index 00000000000..22de3ba43f6 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_SURFACE_H +#define SP_SURFACE_H + + +struct softpipe_context; + + +extern void +sp_init_surface_functions(struct softpipe_context *sp); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c new file mode 100644 index 00000000000..325bdb86da5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -0,0 +1,916 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling + * + * Authors: + * Brian Paul + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tex_sample.h" +#include "sp_tile_cache.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/tgsi/exec/tgsi_exec.h" + + +/* + * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes + * see 1-pixel bands of improperly weighted linear-filtered textures. + * The tests/texwrap.c demo is a good test. + * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. + * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). + */ +#define FRAC(f) ((f) - ifloor(f)) + + +/** + * Linear interpolation macro + */ +#define LERP(T, A, B) ( (A) + (T) * ((B) - (A)) ) + + +/** + * Do 2D/biliner interpolation of float values. + * v00, v10, v01 and v11 are typically four texture samples in a square/box. + * a and b are the horizontal and vertical interpolants. + * It's important that this function is inlined when compiled with + * optimization! If we find that's not true on some systems, convert + * to a macro. + */ +static INLINE float +lerp_2d(float a, float b, + float v00, float v10, float v01, float v11) +{ + const float temp0 = LERP(a, v00, v10); + const float temp1 = LERP(a, v01, v11); + return LERP(b, temp0, temp1); +} + + +/** + * If A is a signed integer, A % B doesn't give the right value for A < 0 + * (in terms of texture repeat). Just casting to unsigned fixes that. + */ +#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) + + +/** + * Apply texture coord wrapping mode and return integer texture index. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoord + * \param size the texture image size + * \return integer texture index + */ +static INLINE int +nearest_texcoord(unsigned wrapMode, float s, unsigned size) +{ + int i; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + /* s limited to [0,1) */ + /* i limited to [0,size-1] */ + i = ifloor(s * size); + i = REMAINDER(i, size); + return i; + case PIPE_TEX_WRAP_CLAMP: + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + if (s <= 0.0F) + i = 0; + else if (s >= 1.0F) + i = size - 1; + else + i = ifloor(s * size); + return i; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s < min) + i = 0; + else if (s > max) + i = size - 1; + else + i = ifloor(s * size); + } + return i; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [-1, size] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s <= min) + i = -1; + else if (s >= max) + i = size; + else + i = ifloor(s * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + const int flr = ifloor(s); + float u; + if (flr & 1) + u = 1.0F - (s - (float) flr); + else + u = s - (float) flr; + if (u < min) + i = 0; + else if (u > max) + i = size - 1; + else + i = ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + { + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + const float u = FABSF(s); + if (u <= 0.0F) + i = 0; + else if (u >= 1.0F) + i = size - 1; + else + i = ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + const float u = FABSF(s); + if (u < min) + i = 0; + else if (u > max) + i = size - 1; + else + i = ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + const float u = FABSF(s); + if (u < min) + i = -1; + else if (u > max) + i = size; + else + i = ifloor(u * size); + } + return i; + default: + assert(0); + return 0; + } +} + + +/** + * Used to compute texel locations for linear sampling. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoord + * \param size the texture image size + * \param i0 returns first texture index + * \param i1 returns second texture index (usually *i0 + 1) + * \param a returns blend factor/weight between texture indexes + */ +static INLINE void +linear_texcoord(unsigned wrapMode, float s, unsigned size, + int *i0, int *i1, float *a) +{ + float u; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + u = s * size - 0.5F; + *i0 = REMAINDER(ifloor(u), size); + *i1 = REMAINDER(*i0 + 1, size); + break; + case PIPE_TEX_WRAP_CLAMP: + if (s <= 0.0F) + u = 0.0F; + else if (s >= 1.0F) + u = (float) size; + else + u = s * size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + if (s <= 0.0F) + u = 0.0F; + else if (s >= 1.0F) + u = (float) size; + else + u = s * size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s <= min) + u = min * size; + else if (s >= max) + u = max * size; + else + u = s * size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + } + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const int flr = ifloor(s); + if (flr & 1) + u = 1.0F - (s - (float) flr); + else + u = s - (float) flr; + u = (u * size) - 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + } + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + u = FABSF(s); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + u = FABSF(s); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + u = FABSF(s); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + *i0 = ifloor(u); + *i1 = *i0 + 1; + } + break; + default: + assert(0); + } + *a = FRAC(u); +} + + +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = FABSF(rx), ary = FABSF(ry), arz = FABSF(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = ( sc / ma + 1.0F ) * 0.5F; + *newT = ( tc / ma + 1.0F ) * 0.5F; + + return face; +} + + +/** + * Examine the quad's texture coordinates to compute the partial + * derivatives w.r.t X and Y, then compute lambda (level of detail). + * + * This is only done for fragment shaders, not vertex shaders. + */ +static float +compute_lambda(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + float rho, lambda; + + assert(s); + { + float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; + float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; + dsdx = FABSF(dsdx); + dsdy = FABSF(dsdy); + rho = MAX2(dsdx, dsdy); + if (sampler->state->normalized_coords) + rho *= sampler->texture->width[0]; + } + if (t) { + float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; + float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; + float max; + dtdx = FABSF(dtdx); + dtdy = FABSF(dtdy); + max = MAX2(dtdx, dtdy); + if (sampler->state->normalized_coords) + max *= sampler->texture->height[0]; + rho = MAX2(rho, max); + } + if (p) { + float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; + float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; + float max; + dpdx = FABSF(dpdx); + dpdy = FABSF(dpdy); + max = MAX2(dpdx, dpdy); + if (sampler->state->normalized_coords) + max *= sampler->texture->depth[0]; + rho = MAX2(rho, max); + } + + lambda = LOG2(rho); + lambda += lodbias + sampler->state->lod_bias; + lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); + + return lambda; +} + + +/** + * Do several things here: + * 1. Compute lambda from the texcoords, if needed + * 2. Determine if we're minifying or magnifying + * 3. If minifying, choose mipmap levels + * 4. Return image filter to use within mipmap images + */ +static void +choose_mipmap_levels(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + unsigned *level0, unsigned *level1, float *levelBlend, + unsigned *imgFilter) +{ + if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* no mipmap selection needed */ + *imgFilter = sampler->state->mag_img_filter; + *level0 = *level1 = (int) sampler->state->min_lod; + } + else { + float lambda; + + if (1) + /* fragment shader */ + lambda = compute_lambda(sampler, s, t, p, lodbias); + else + /* vertex shader */ + lambda = lodbias; /* not really a bias, but absolute LOD */ + + if (lambda < 0.0) { /* XXX threshold depends on the filter */ + /* magnifying */ + *imgFilter = sampler->state->mag_img_filter; + *level0 = *level1 = 0; + } + else { + /* minifying */ + *imgFilter = sampler->state->min_img_filter; + + /* choose mipmap level(s) and compute the blend factor between them */ + if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + /* Nearest mipmap level */ + const int lvl = (int) (lambda + 0.5); + *level0 = + *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + } + else { + /* Linear interpolation between mipmap levels */ + const int lvl = (int) lambda; + *level0 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level); + *levelBlend = FRAC(lambda); /* blending weight between levels */ + } + } + } +} + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param face the cube face in 0..5 + * \param level the mipmap level + * \param x the x coord of texel within 2D image + * \param y the y coord of texel within 2D image + * \param z which slice of a 3D texture + * \param rgba the quad to put the texel/color into + * \param j which element of the rgba quad to write to + * + * XXX maybe move this into sp_tile_cache.c and merge with the + * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... + */ +static void +get_texel(struct tgsi_sampler *sampler, + unsigned face, unsigned level, int x, int y, int z, + float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +{ + const int tx = x % TILE_SIZE; + const int ty = y % TILE_SIZE; + const struct softpipe_cached_tile *tile + = sp_get_cached_tile_tex(sampler->pipe, sampler->cache, + x, y, z, face, level); + rgba[0][j] = tile->data.color[ty][tx][0]; + rgba[1][j] = tile->data.color[ty][tx][1]; + rgba[2][j] = tile->data.color[ty][tx][2]; + rgba[3][j] = tile->data.color[ty][tx][3]; +} + + +/** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels. We look at the red channel here. + */ +static INLINE void +shadow_compare(uint compare_func, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const float p[QUAD_SIZE], + uint j) +{ + int k; + switch (compare_func) { + case PIPE_FUNC_LESS: + k = p[j] < rgba[0][j]; + break; + case PIPE_FUNC_LEQUAL: + k = p[j] <= rgba[0][j]; + break; + case PIPE_FUNC_GREATER: + k = p[j] > rgba[0][j]; + break; + case PIPE_FUNC_GEQUAL: + k = p[j] >= rgba[0][j]; + break; + case PIPE_FUNC_EQUAL: + k = p[j] == rgba[0][j]; + break; + case PIPE_FUNC_NOTEQUAL: + k = p[j] != rgba[0][j]; + break; + case PIPE_FUNC_ALWAYS: + k = 1; + break; + case PIPE_FUNC_NEVER: + k = 0; + break; + default: + assert(0); + } + + rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; +} + + +/** + * Common code for sampling 1D/2D/cube textures. + * Could probably extend for 3D... + */ +static void +sp_get_samples_2d_common(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const unsigned faces[4]) +{ + const uint compare_func = sampler->state->compare_func; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + if (sampler->state->normalized_coords) { + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + } + else { + width = height = 1; + } + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); + get_texel(sampler, faces[j], level0, x, y, 0, rgba, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x = x / 2; + y = y / 2; + get_texel(sampler, faces[j], level1, x, y, 0, rgba2, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, rgba2, p, j); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4], a, b; + int x0, y0, x1, y1, c; + linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); + linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); + get_texel(sampler, faces[j], level0, x0, y0, 0, tx, 0); + get_texel(sampler, faces[j], level0, x1, y0, 0, tx, 1); + get_texel(sampler, faces[j], level0, x0, y1, 0, tx, 2); + get_texel(sampler, faces[j], level0, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0 = x0 / 2; + y0 = y0 / 2; + x1 = x1 / 2; + y1 = y1 / 2; + get_texel(sampler, faces[j], level1, x0, y0, 0, tx, 0); + get_texel(sampler, faces[j], level1, x1, y0, 0, tx, 1); + get_texel(sampler, faces[j], level1, x0, y1, 0, tx, 2); + get_texel(sampler, faces[j], level1, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_2d(a, b, + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + default: + assert(0); + } +} + + +static void +sp_get_samples_1d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + static const float tzero[4] = {0, 0, 0, 0}; + sp_get_samples_2d_common(sampler, s, tzero, NULL, lodbias, rgba, faces); +} + + +static void +sp_get_samples_2d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); +} + + +static void +sp_get_samples_3d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + /* get/map pipe_surfaces corresponding to 3D tex slices */ + unsigned level0, level1, j, imgFilter; + int width, height, depth; + float levelBlend; + const uint face = 0; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + if (sampler->state->normalized_coords) { + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + depth = sampler->texture->depth[level0]; + } + else { + width = height = depth = 1; + } + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); + int z = nearest_texcoord(sampler->state->wrap_r, p[j], depth); + get_texel(sampler, face, level0, x, y, z, rgba, j); + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x /= 2; + y /= 2; + z /= 2; + get_texel(sampler, face, level1, x, y, z, rgba2, j); + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba2[c][j], rgba[c][j]); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + for (j = 0; j < QUAD_SIZE; j++) { + float texel0[4][4], texel1[4][4]; + float xw, yw, zw; /* interpolation weights */ + int x0, x1, y0, y1, z0, z1, c; + linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &xw); + linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &yw); + linear_texcoord(sampler->state->wrap_r, p[j], depth, &z0, &z1, &zw); + get_texel(sampler, face, level0, x0, y0, z0, texel0, 0); + get_texel(sampler, face, level0, x1, y0, z0, texel0, 1); + get_texel(sampler, face, level0, x0, y1, z0, texel0, 2); + get_texel(sampler, face, level0, x1, y1, z0, texel0, 3); + get_texel(sampler, face, level0, x0, y0, z1, texel1, 0); + get_texel(sampler, face, level0, x1, y0, z1, texel1, 1); + get_texel(sampler, face, level0, x0, y1, z1, texel1, 2); + get_texel(sampler, face, level0, x1, y1, z1, texel1, 3); + + /* 3D lerp */ + for (c = 0; c < 4; c++) { + float ctemp0[4][4], ctemp1[4][4]; + ctemp0[c][j] = lerp_2d(xw, yw, + texel0[c][0], texel0[c][1], + texel0[c][2], texel0[c][3]); + ctemp1[c][j] = lerp_2d(xw, yw, + texel1[c][0], texel1[c][1], + texel1[c][2], texel1[c][3]); + rgba[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0 /= 2; + y0 /= 2; + z0 /= 2; + x1 /= 2; + y1 /= 2; + z1 /= 2; + get_texel(sampler, face, level1, x0, y0, z0, texel0, 0); + get_texel(sampler, face, level1, x1, y0, z0, texel0, 1); + get_texel(sampler, face, level1, x0, y1, z0, texel0, 2); + get_texel(sampler, face, level1, x1, y1, z0, texel0, 3); + get_texel(sampler, face, level1, x0, y0, z1, texel1, 0); + get_texel(sampler, face, level1, x1, y0, z1, texel1, 1); + get_texel(sampler, face, level1, x0, y1, z1, texel1, 2); + get_texel(sampler, face, level1, x1, y1, z1, texel1, 3); + + /* 3D lerp */ + for (c = 0; c < 4; c++) { + float ctemp0[4][4], ctemp1[4][4]; + ctemp0[c][j] = lerp_2d(xw, yw, + texel0[c][0], texel0[c][1], + texel0[c][2], texel0[c][3]); + ctemp1[c][j] = lerp_2d(xw, yw, + texel1[c][0], texel1[c][1], + texel1[c][2], texel1[c][3]); + rgba2[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]); + } + + /* blend mipmap levels */ + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + default: + assert(0); + } +} + + +static void +sp_get_samples_cube(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + unsigned faces[QUAD_SIZE], j; + float ssss[4], tttt[4]; + for (j = 0; j < QUAD_SIZE; j++) { + faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); + } + sp_get_samples_2d_common(sampler, ssss, tttt, NULL, lodbias, rgba, faces); +} + + +/** + * Called via tgsi_sampler::get_samples() + * Use the sampler's state setting to get a filtered RGBA value + * from the sampler's texture. + * + * XXX we can implement many versions of this function, each + * tightly coded for a specific combination of sampler state + * (nearest + repeat), (bilinear mipmap + clamp), etc. + * + * The update_samplers() function in st_atom_sampler.c could create + * a new tgsi_sampler object for each state combo it finds.... + */ +void +sp_get_samples(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + if (!sampler->texture) + return; + + switch (sampler->texture->target) { + case PIPE_TEXTURE_1D: + sp_get_samples_1d(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_2D: + sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_3D: + sp_get_samples_3d(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_CUBE: + sp_get_samples_cube(sampler, s, t, p, lodbias, rgba); + break; + default: + assert(0); + } +} + diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h new file mode 100644 index 00000000000..404bfd0c365 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -0,0 +1,17 @@ +#ifndef SP_TEX_SAMPLE_H +#define SP_TEX_SAMPLE_H + + +struct tgsi_sampler; + + +extern void +sp_get_samples(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + + +#endif /* SP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c new file mode 100644 index 00000000000..6de7a9b543b --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -0,0 +1,166 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +softpipe_texture_layout(struct softpipe_texture * spt) +{ + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + spt->buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + + spt->level_offset[level] = spt->buffer_size; + + spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +struct pipe_texture * +softpipe_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) +{ + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *templat; + + softpipe_texture_layout(spt); + + spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + if (!spt->buffer) { + FREE(spt); + return NULL; + } + + return &spt->base; +} + + +void +softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct softpipe_texture *spt = softpipe_texture(*pt); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + */ + + pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + + FREE(spt); + } + *pt = NULL; +} + + +/** + * Called via pipe->get_tex_surface() + */ +struct pipe_surface * +softpipe_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct softpipe_texture *spt = softpipe_texture(pt); + struct pipe_surface *ps; + + assert(level <= pt->last_level); + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = spt->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + (pt->compressed ? ps->height/4 : ps->height) * + ps->width * ps->cpp; + } else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h new file mode 100644 index 00000000000..fa646c0de9f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TEXTURE_H +#define SP_TEXTURE_H + + +struct pipe_context; +struct pipe_texture; + + +struct softpipe_texture +{ + struct pipe_texture base; + + unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + unsigned long buffer_size; +}; + + +/** cast wrapper */ +static INLINE struct softpipe_texture * +softpipe_texture(struct pipe_texture *pt) +{ + return (struct softpipe_texture *) pt; +} + + + +extern struct pipe_texture * +softpipe_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); + +extern void +softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + +extern struct pipe_surface * +softpipe_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice); + + +#endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c new file mode 100644 index 00000000000..1597361b82f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -0,0 +1,585 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Framebuffer/surface tile caching. + * + * Author: + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/util/p_tile.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" + +#define NUM_ENTRIES 30 + + +/** XXX move these */ +#define MAX_WIDTH 2048 +#define MAX_HEIGHT 2048 + + +struct softpipe_tile_cache +{ + struct pipe_surface *surface; /**< the surface we're caching */ + void *surface_map; + struct pipe_texture *texture; /**< if caching a texture */ + struct softpipe_cached_tile entries[NUM_ENTRIES]; + uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; + float clear_color[4]; + uint clear_val; + boolean depth_stencil; /** Is the surface a depth/stencil format? */ + + struct pipe_surface *tex_surf; + void *tex_surf_map; + int tex_face, tex_level, tex_z; + + struct softpipe_cached_tile tile; /**< scratch tile for clears */ +}; + + +/** + * Return the position in the cache for the tile that contains win pos (x,y). + * We currently use a direct mapped cache so this is like a hack key. + * At some point we should investige something more sophisticated, like + * a LRU replacement policy. + */ +#define CACHE_POS(x, y) \ + (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES) + + + +/** + * Is the tile at (x,y) in cleared state? + */ +static INLINE uint +is_clear_flag_set(const uint *bitvec, int x, int y) +{ + int pos, bit; + x /= TILE_SIZE; + y /= TILE_SIZE; + pos = y * (MAX_WIDTH / TILE_SIZE) + x; + assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); + bit = bitvec[pos / 32] & (1 << (pos & 31)); + return bit; +} + + +/** + * Mark the tile at (x,y) as not cleared. + */ +static INLINE void +clear_clear_flag(uint *bitvec, int x, int y) +{ + int pos; + x /= TILE_SIZE; + y /= TILE_SIZE; + pos = y * (MAX_WIDTH / TILE_SIZE) + x; + assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); + bitvec[pos / 32] &= ~(1 << (pos & 31)); +} + + +struct softpipe_tile_cache * +sp_create_tile_cache(void) +{ + struct softpipe_tile_cache *tc; + uint pos; + + tc = CALLOC_STRUCT( softpipe_tile_cache ); + if (tc) { + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].x = + tc->entries[pos].y = -1; + } + } + return tc; +} + + +void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc) +{ + uint pos; + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + //assert(tc->entries[pos].x < 0); + } + if (tc->surface) { + pipe_surface_reference(&tc->surface, NULL); + } + if (tc->tex_surf) { + pipe_surface_reference(&tc->tex_surf, NULL); + } + + FREE( tc ); +} + + +/** + * Specify the surface to cache. + */ +void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, + struct pipe_surface *ps) +{ + assert(!tc->texture); + + if (tc->surface_map) { + /*assert(tc->surface != ps);*/ + pipe_surface_unmap(tc->surface); + } + + pipe_surface_reference(&tc->surface, ps); + + if (ps) { + if (tc->surface_map) + tc->surface_map = pipe_surface_map(ps); + + tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || + ps->format == PIPE_FORMAT_Z16_UNORM || + ps->format == PIPE_FORMAT_Z32_UNORM || + ps->format == PIPE_FORMAT_U_S8); + } +} + + +/** + * Return the surface being cached. + */ +struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc) +{ + return tc->surface; +} + + +void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) +{ + if (tc->surface && !tc->surface_map) + tc->surface_map = pipe_surface_map(tc->surface); + + if (tc->tex_surf && !tc->tex_surf_map) + tc->tex_surf_map = pipe_surface_map(tc->tex_surf); +} + + +void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) +{ + if (tc->surface_map) { + pipe_surface_unmap(tc->surface); + tc->surface_map = NULL; + } + + if (tc->tex_surf_map) { + pipe_surface_unmap(tc->tex_surf); + tc->tex_surf_map = NULL; + } +} + + +/** + * Specify the texture to cache. + */ +void +sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, + struct pipe_texture *texture) +{ + uint i; + + assert(!tc->surface); + + tc->texture = texture; + + if (tc->tex_surf_map) { + pipe_surface_unmap(tc->tex_surf); + tc->tex_surf_map = NULL; + } + pipe_surface_reference(&tc->tex_surf, NULL); + + /* mark as entries as invalid/empty */ + /* XXX we should try to avoid this when the teximage hasn't changed */ + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].x = -1; + } + + tc->tex_face = -1; /* any invalid value here */ +} + + +/** + * Set pixels in a tile to the given clear color/value, float. + */ +static void +clear_tile_rgba(struct softpipe_cached_tile *tile, + enum pipe_format format, + const float clear_value[4]) +{ + if (clear_value[0] == 0.0 && + clear_value[1] == 0.0 && + clear_value[2] == 0.0 && + clear_value[3] == 0.0) { + memset(tile->data.color, 0, sizeof(tile->data.color)); + } + else { + uint i, j; + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.color[i][j][0] = clear_value[0]; + tile->data.color[i][j][1] = clear_value[1]; + tile->data.color[i][j][2] = clear_value[2]; + tile->data.color[i][j][3] = clear_value[3]; + } + } + } +} + + +/** + * Set a tile to a solid value/color. + */ +static void +clear_tile(struct softpipe_cached_tile *tile, + enum pipe_format format, + uint clear_value) +{ + uint i, j; + + switch (pf_get_size(format)) { + case 1: + memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE); + break; + case 2: + if (clear_value == 0) { + memset(tile->data.any, 0, 2 * TILE_SIZE * TILE_SIZE); + } + else { + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.depth16[i][j] = (ushort) clear_value; + } + } + } + break; + case 4: + if (clear_value == 0) { + memset(tile->data.any, 0, 4 * TILE_SIZE * TILE_SIZE); + } + else { + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.color32[i][j] = clear_value; + } + } + } + break; + default: + assert(0); + } +} + + +/** + * Actually clear the tiles which were flagged as being in a clear state. + */ +static void +sp_tile_cache_flush_clear(struct pipe_context *pipe, + struct softpipe_tile_cache *tc) +{ + struct pipe_surface *ps = tc->surface; + const uint w = tc->surface->width; + const uint h = tc->surface->height; + uint x, y; + uint numCleared = 0; + + /* clear the scratch tile to the clear value */ + clear_tile(&tc->tile, ps->format, tc->clear_val); + + /* push the tile to all positions marked as clear */ + for (y = 0; y < h; y += TILE_SIZE) { + for (x = 0; x < w; x += TILE_SIZE) { + if (is_clear_flag_set(tc->clear_flags, x, y)) { + pipe_put_tile_raw(pipe, ps, + x, y, TILE_SIZE, TILE_SIZE, + tc->tile.data.color32, 0/*STRIDE*/); + + /* do this? */ + clear_clear_flag(tc->clear_flags, x, y); + + numCleared++; + } + } + } +#if 0 + debug_printf("num cleared: %u\n", numCleared); +#endif +} + + +/** + * Flush the tile cache: write all dirty tiles back to the surface. + * any tiles "flagged" as cleared will be "really" cleared. + */ +void +sp_flush_tile_cache(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc) +{ + struct pipe_context *pipe = &softpipe->pipe; + struct pipe_surface *ps = tc->surface; + int inuse = 0, pos; + + if (!ps || !ps->buffer) + return; + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + if (tile->x >= 0) { + if (tc->depth_stencil) { + pipe_put_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + tile->x = tile->y = -1; /* mark as empty */ + inuse++; + } + } + +#if TILE_CLEAR_OPTIMIZATION + sp_tile_cache_flush_clear(&softpipe->pipe, tc); +#endif + +#if 0 + debug_printf("flushed tiles in use: %d\n", inuse); +#endif +} + + +/** + * Get a tile from the cache. + * \param x, y position of tile, in pixels + */ +struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y) +{ + struct pipe_context *pipe = &softpipe->pipe; + struct pipe_surface *ps = tc->surface; + + /* tile pos in framebuffer: */ + const int tile_x = x & ~(TILE_SIZE - 1); + const int tile_y = y & ~(TILE_SIZE - 1); + + /* cache pos/entry: */ + const int pos = CACHE_POS(x, y); + struct softpipe_cached_tile *tile = tc->entries + pos; + + if (tile_x != tile->x || + tile_y != tile->y) { + + if (tile->x != -1) { + /* put dirty tile back in framebuffer */ + if (tc->depth_stencil) { + pipe_put_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + } + + tile->x = tile_x; + tile->y = tile_y; + + if (is_clear_flag_set(tc->clear_flags, x, y)) { + /* don't get tile from framebuffer, just clear it */ + if (tc->depth_stencil) { + clear_tile(tile, ps->format, tc->clear_val); + } + else { + clear_tile_rgba(tile, ps->format, tc->clear_color); + } + clear_clear_flag(tc->clear_flags, x, y); + } + else { + /* get new tile data from surface */ + if (tc->depth_stencil) { + pipe_get_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_get_tile_rgba(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + } + } + + return tile; +} + + +/** + * Given the texture face, level, zslice, x and y values, compute + * the cache entry position/index where we'd hope to find the + * cached texture tile. + * This is basically a direct-map cache. + * XXX There's probably lots of ways in which we can improve this. + */ +static INLINE uint +tex_cache_pos(int x, int y, int z, int face, int level) +{ + uint entry = x + y * 5 + z * 4 + face + level; + return entry % NUM_ENTRIES; +} + + +/** + * Similar to sp_get_cached_tile() but for textures. + * Tiles are read-only and indexed with more params. + */ +const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, int x, int y, int z, + int face, int level) +{ + /* tile pos in framebuffer: */ + const int tile_x = x & ~(TILE_SIZE - 1); + const int tile_y = y & ~(TILE_SIZE - 1); + /* cache pos/entry: */ + const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z, + face, level); + struct softpipe_cached_tile *tile = tc->entries + pos; + + if (tile_x != tile->x || + tile_y != tile->y || + z != tile->z || + face != tile->face || + level != tile->level) { + /* cache miss */ + + /* check if we need to get a new surface */ + if (!tc->tex_surf || + tc->tex_face != face || + tc->tex_level != level || + tc->tex_z != z) { + /* get new surface (view into texture) */ + + if (tc->tex_surf_map) + pipe_surface_unmap(tc->tex_surf); + + tc->tex_surf = pipe->get_tex_surface(pipe, tc->texture, face, level, z); + tc->tex_surf_map = pipe_surface_map(tc->tex_surf); + + tc->tex_face = face; + tc->tex_level = level; + tc->tex_z = z; + } + + /* get tile from the surface (view into texture) */ + pipe_get_tile_rgba(pipe, tc->tex_surf, + tile_x, tile_y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + tile->x = tile_x; + tile->y = tile_y; + tile->z = z; + tile->face = face; + tile->level = level; + } + + return tile; +} + + +/** + * When a whole surface is being cleared to a value we can avoid + * fetching tiles above. + * Save the color and set a 'clearflag' for each tile of the screen. + */ +void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue) +{ + uint r, g, b, a; + uint pos; + + tc->clear_val = clearValue; + + switch (tc->surface->format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + r = (clearValue >> 24) & 0xff; + g = (clearValue >> 16) & 0xff; + b = (clearValue >> 8) & 0xff; + a = (clearValue ) & 0xff; + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + r = (clearValue >> 16) & 0xff; + g = (clearValue >> 8) & 0xff; + b = (clearValue ) & 0xff; + a = (clearValue >> 24) & 0xff; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + r = (clearValue >> 8) & 0xff; + g = (clearValue >> 16) & 0xff; + b = (clearValue >> 24) & 0xff; + a = (clearValue ) & 0xff; + break; + default: + r = g = b = a = 0; + } + + tc->clear_color[0] = r / 255.0f; + tc->clear_color[1] = g / 255.0f; + tc->clear_color[2] = b / 255.0f; + tc->clear_color[3] = a / 255.0f; + +#if TILE_CLEAR_OPTIMIZATION + /* set flags to indicate all the tiles are cleared */ + memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); +#else + /* disable the optimization */ + memset(tc->clear_flags, 0, sizeof(tc->clear_flags)); +#endif + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + tile->x = tile->y = -1; + } +} diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h new file mode 100644 index 00000000000..7fd10812863 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TILE_CACHE_H +#define SP_TILE_CACHE_H + +#define TILE_CLEAR_OPTIMIZATION 1 + + +#include "pipe/p_compiler.h" + + +struct softpipe_context; +struct softpipe_tile_cache; + + +/** + * Cache tile size (width and height). This needs to be a power of two. + */ +#define TILE_SIZE 64 + + + +struct softpipe_cached_tile +{ + int x, y; /**< pos of tile in window coords */ + int z, face, level; /**< Extra texture indexes */ + union { + float color[TILE_SIZE][TILE_SIZE][4]; + uint color32[TILE_SIZE][TILE_SIZE]; + uint depth32[TILE_SIZE][TILE_SIZE]; + ushort depth16[TILE_SIZE][TILE_SIZE]; + ubyte stencil8[TILE_SIZE][TILE_SIZE]; + ubyte any[1]; + } data; +}; + + +extern struct softpipe_tile_cache * +sp_create_tile_cache(void); + +extern void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, + struct pipe_surface *sps); + +extern struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, + struct pipe_texture *texture); + +extern void +sp_flush_tile_cache(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue); + +extern struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y); + +extern const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, int x, int y, int z, + int face, int level); + + +#endif /* SP_TILE_CACHE_H */ + diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h new file mode 100644 index 00000000000..d6b379f58c6 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* This is the interface that softpipe requires any window system + * hosting it to implement. This is the only include file in softpipe + * which is public. + */ + + +#ifndef SP_WINSYS_H +#define SP_WINSYS_H + + +#include "pipe/p_compiler.h" /* for boolean */ + +enum pipe_format; + +struct softpipe_winsys { + /** test if the given format is supported for front/back color bufs */ + boolean (*is_format_supported)( struct softpipe_winsys *sws, + enum pipe_format format ); + +}; + +struct pipe_winsys; +struct pipe_context; + + +struct pipe_context *softpipe_create( struct pipe_winsys *, + struct softpipe_winsys * ); + + +#endif /* SP_WINSYS_H */ -- cgit v1.2.3 From 6acd63a4980951727939c0dd545a0324965b3834 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 15 Feb 2008 17:50:12 +0900 Subject: Code reorganization: update build. Update the Makefiles and includes for the new paths. Note that there hasn't been no separation of the Makefiles yet, and make is jumping all over the place. That will be taken care shortly. But for now, make should work. It was tested with linux and linux-dri. Linux-cell and linux-llvm might require some minor tweaks. --- configs/beos | 2 +- configs/darwin | 2 +- configs/darwin-x86ppc | 2 +- configs/default | 2 +- configs/freebsd-dri | 2 +- configs/linux-cell | 2 +- configs/linux-directfb | 2 +- configs/linux-dri | 6 +- configs/linux-dri-xcb | 4 +- configs/linux-fbdev | 2 +- configs/linux-osmesa | 2 +- configs/linux-osmesa16 | 2 +- configs/linux-osmesa16-static | 2 +- configs/linux-osmesa32 | 2 +- configs/linux-solo | 2 +- src/gallium/Makefile | 12 +-- src/gallium/Makefile.template | 7 +- src/gallium/aux/Makefile | 24 +++++ src/gallium/aux/draw/draw_private.h | 2 +- src/gallium/aux/draw/draw_vertex.c | 4 +- src/gallium/aux/draw/draw_vertex_shader.c | 4 +- src/gallium/aux/llvm/Makefile | 4 +- src/gallium/aux/llvm/gallivm.cpp | 4 +- src/gallium/aux/llvm/gallivm_cpu.cpp | 4 +- src/gallium/aux/llvm/tgsitollvm.cpp | 10 +- src/gallium/aux/pipebuffer/Makefile | 2 +- src/gallium/aux/tgsi/exec/tgsi_exec.c | 4 +- src/gallium/aux/tgsi/exec/tgsi_sse2.c | 4 +- src/gallium/aux/tgsi/util/tgsi_transform.h | 4 +- src/gallium/drivers/Makefile | 24 +++++ src/gallium/drivers/cell/ppu/Makefile | 7 +- src/gallium/drivers/cell/ppu/cell_clear.c | 2 +- src/gallium/drivers/cell/ppu/cell_context.c | 6 +- src/gallium/drivers/cell/ppu/cell_context.h | 6 +- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 2 +- src/gallium/drivers/cell/ppu/cell_flush.c | 2 +- src/gallium/drivers/cell/ppu/cell_render.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.h | 2 +- src/gallium/drivers/cell/ppu/cell_state_blend.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_clip.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_derived.c | 4 +- src/gallium/drivers/cell/ppu/cell_state_fs.c | 8 +- .../drivers/cell/ppu/cell_state_rasterizer.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_sampler.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 +- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/cell/ppu/cell_vbuf.c | 2 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 6 +- src/gallium/drivers/cell/spu/Makefile | 6 +- src/gallium/drivers/cell/spu/spu_exec.c | 4 +- src/gallium/drivers/cell/spu/spu_exec.h | 2 +- src/gallium/drivers/cell/spu/spu_main.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 4 +- src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_render.h | 2 +- src/gallium/drivers/cell/spu/spu_tile.h | 2 +- src/gallium/drivers/cell/spu/spu_util.c | 4 +- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 6 +- src/gallium/drivers/failover/Makefile | 2 +- src/gallium/drivers/i915simple/Makefile | 2 +- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.h | 2 +- .../drivers/i915simple/i915_fpc_translate.c | 4 +- src/gallium/drivers/i915simple/i915_prim_emit.c | 2 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_state.c | 2 +- .../drivers/i915simple/i915_state_derived.c | 4 +- src/gallium/drivers/i915simple/i915_strings.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 2 +- src/gallium/drivers/i965simple/Makefile | 2 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_strings.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 2 +- src/gallium/drivers/i965simple/brw_vs_emit.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 2 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 2 +- src/gallium/drivers/softpipe/Makefile | 2 +- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 2 +- src/gallium/drivers/softpipe/sp_flush.c | 2 +- src/gallium/drivers/softpipe/sp_headers.h | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 4 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 6 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 2 +- src/gallium/drivers/softpipe/sp_query.c | 2 +- src/gallium/drivers/softpipe/sp_state_clip.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 6 +- src/gallium/drivers/softpipe/sp_state_fs.c | 8 +- src/gallium/drivers/softpipe/sp_state_rasterizer.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 4 +- src/gallium/drivers/softpipe/sp_state_vertex.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 2 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- src/gallium/winsys/dri/Makefile | 38 +++++++ src/gallium/winsys/dri/Makefile.template | 113 +++++++++++++++++++++ src/gallium/winsys/dri/intel/Makefile | 8 +- src/gallium/winsys/dri/intel/intel_winsys_i915.c | 2 +- .../winsys/dri/intel/intel_winsys_softpipe.c | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 6 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/Makefile | 16 ++- src/mesa/drivers/x11/xm_api.c | 2 +- src/mesa/drivers/x11/xm_dd.c | 2 +- src/mesa/drivers/x11/xm_surface.c | 8 +- src/mesa/drivers/x11/xm_winsys.c | 2 +- src/mesa/drivers/x11/xmesaP.h | 4 +- src/mesa/sources | 83 +++++++-------- src/mesa/state_tracker/st_atom_shader.c | 2 +- src/mesa/state_tracker/st_cache.c | 4 +- src/mesa/state_tracker/st_cache.h | 2 +- src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_feedback.c | 6 +- src/mesa/state_tracker/st_cb_program.c | 4 +- src/mesa/state_tracker/st_cb_rasterpos.c | 4 +- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 2 +- src/mesa/state_tracker/st_context.c | 4 +- src/mesa/state_tracker/st_debug.c | 4 +- src/mesa/state_tracker/st_draw.c | 4 +- src/mesa/state_tracker/st_gen_mipmap.c | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +- src/mesa/state_tracker/st_program.c | 4 +- 127 files changed, 445 insertions(+), 241 deletions(-) create mode 100644 src/gallium/aux/Makefile create mode 100644 src/gallium/drivers/Makefile create mode 100644 src/gallium/winsys/dri/Makefile create mode 100644 src/gallium/winsys/dri/Makefile.template (limited to 'src/gallium/drivers') diff --git a/configs/beos b/configs/beos index f07973d0c78..2b74af739d0 100644 --- a/configs/beos +++ b/configs/beos @@ -86,7 +86,7 @@ else endif # Directories -SRC_DIRS = mesa glu glut/beos +SRC_DIRS = gallium mesa glu glut/beos GLU_DIRS = sgi DRIVER_DIRS = beos PROGRAM_DIRS = beos samples redbook demos tests diff --git a/configs/darwin b/configs/darwin index 7826ecc605e..bba78026962 100644 --- a/configs/darwin +++ b/configs/darwin @@ -25,5 +25,5 @@ GLW_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXt $(TOP)/lib/GL.dylib APP_LIB_DEPS = -L$(TOP)/lib -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm # omit glw lib for now: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx diff --git a/configs/darwin-x86ppc b/configs/darwin-x86ppc index 13172327a76..ebeb25051f1 100644 --- a/configs/darwin-x86ppc +++ b/configs/darwin-x86ppc @@ -29,5 +29,5 @@ GLW_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXt $(TOP)/lib/GL.dylib APP_LIB_DEPS = -L$(TOP)/lib -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm # omit glw lib for now: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx diff --git a/configs/default b/configs/default index 166205a1d31..25a87e66a1b 100644 --- a/configs/default +++ b/configs/default @@ -60,7 +60,7 @@ GLW_SOURCES = GLwDrawA.c # Directories to build LIB_DIR = lib -SRC_DIRS = mesa glu glut/glx glw +SRC_DIRS = gallium mesa glu glut/glx glw GLU_DIRS = sgi DRIVER_DIRS = x11 osmesa # Which subdirs under $(TOP)/progs/ to enter: diff --git a/configs/freebsd-dri b/configs/freebsd-dri index 402883d1de0..67d253b8695 100644 --- a/configs/freebsd-dri +++ b/configs/freebsd-dri @@ -36,7 +36,7 @@ GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -L/usr/X11R6/lib -lGL -lXt -lX11 # Directories -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw DRIVER_DIRS = dri PROGRAM_DIRS = WINDOW_SYSTEM=dri diff --git a/configs/linux-cell b/configs/linux-cell index 3d874491e41..fdf20deeeb0 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -21,7 +21,7 @@ CFLAGS = $(OPT_FLAGS) -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec -I. -I$( CXXFLAGS = $(CFLAGS) # Omitting glw here: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx MKDEP_OPTIONS = -fdepend -Y diff --git a/configs/linux-directfb b/configs/linux-directfb index 09332f48081..dff27f78503 100644 --- a/configs/linux-directfb +++ b/configs/linux-directfb @@ -22,7 +22,7 @@ ifeq ($(HAVE_X86), yes) endif # Directories -SRC_DIRS = mesa glu glut/directfb +SRC_DIRS = gallium mesa glu glut/directfb GLU_DIRS = sgi DRIVER_DIRS = directfb PROGRAM_DIRS = demos directfb diff --git a/configs/linux-dri b/configs/linux-dri index 936fce99829..e6135856fc1 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -54,10 +54,10 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 mesa glu glut/glx glw +SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = egl else -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = endif @@ -66,4 +66,4 @@ WINDOW_SYSTEM=dri # gamma are missing because they have not been converted to use the new # interface. -DRI_DIRS = intel_winsys +DRI_DIRS = intel diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb index aa292a13ec1..ea4bdf1864c 100644 --- a/configs/linux-dri-xcb +++ b/configs/linux-dri-xcb @@ -53,10 +53,10 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 mesa glu glut/glx glw +SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = egl else -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = endif diff --git a/configs/linux-fbdev b/configs/linux-fbdev index e36d20a702c..1ddccb3f52b 100644 --- a/configs/linux-fbdev +++ b/configs/linux-fbdev @@ -6,7 +6,7 @@ CONFIG_NAME = linux-fbdev CFLAGS = -O3 -ffast-math -ansi -pedantic -fPIC -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DPTHREADS -DUSE_GLFBDEV_DRIVER -SRC_DIRS = mesa glu glut/fbdev +SRC_DIRS = gallium mesa glu glut/fbdev DRIVER_DIRS = fbdev osmesa PROGRAM_DIRS = fbdev demos redbook samples diff --git a/configs/linux-osmesa b/configs/linux-osmesa index cc1fbbd109a..0382a19553a 100644 --- a/configs/linux-osmesa +++ b/configs/linux-osmesa @@ -14,7 +14,7 @@ CXXFLAGS = -O3 -ansi -pedantic -fPIC -ffast-math -D_POSIX_SOURCE -D_POSIX_C_SOUR # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = osdemos diff --git a/configs/linux-osmesa16 b/configs/linux-osmesa16 index 1fb0186d315..9a527592f1d 100644 --- a/configs/linux-osmesa16 +++ b/configs/linux-osmesa16 @@ -17,7 +17,7 @@ OSMESA_LIB_NAME = libOSMesa16.so # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-osmesa16-static b/configs/linux-osmesa16-static index 6645504478e..1e6380b02e0 100644 --- a/configs/linux-osmesa16-static +++ b/configs/linux-osmesa16-static @@ -18,7 +18,7 @@ OSMESA_LIB_NAME = libOSMesa16.a # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-osmesa32 b/configs/linux-osmesa32 index a1e5a358d60..f0ef1831b09 100644 --- a/configs/linux-osmesa32 +++ b/configs/linux-osmesa32 @@ -17,7 +17,7 @@ OSMESA_LIB_NAME = libOSMesa32.so # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-solo b/configs/linux-solo index 220fe58b9a4..d49b9722282 100644 --- a/configs/linux-solo +++ b/configs/linux-solo @@ -43,7 +43,7 @@ GLUT_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLU_LIB) -l$(GL_LIB) -lm APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -lm -lpthread # Directories -SRC_DIRS = glx/mini mesa glu glut/mini +SRC_DIRS = glx/mini gallium mesa glu glut/mini DRIVER_DIRS = dri PROGRAM_DIRS = miniglx diff --git a/src/gallium/Makefile b/src/gallium/Makefile index d880d090c15..a13b9a52d36 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -1,16 +1,8 @@ -TOP = ../../.. +TOP = ../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-cell) -CELL_DIR = cell -endif - -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_DIR = llvm -endif - -SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) $(LLVM_DIR) +SUBDIRS = aux drivers default: subdirs diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 8e84f8eb2d7..0717ed8dd24 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -15,7 +15,10 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/mesa/pipe \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/include/pipe \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/include \ $(DRIVER_INCLUDES) @@ -38,7 +41,7 @@ INCLUDES = \ default: depend symlinks $(LIBNAME) -$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/mesa/pipe/Makefile.template +$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) diff --git a/src/gallium/aux/Makefile b/src/gallium/aux/Makefile new file mode 100644 index 00000000000..da68498aa1f --- /dev/null +++ b/src/gallium/aux/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-llvm) +LLVM_DIR = llvm +endif + +SUBDIRS = pipebuffer $(LLVM_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/aux/draw/draw_private.h b/src/gallium/aux/draw/draw_private.h index b17eaaed65e..3d09aef87c1 100644 --- a/src/gallium/aux/draw/draw_private.h +++ b/src/gallium/aux/draw/draw_private.h @@ -45,7 +45,7 @@ #include "pipe/p_defines.h" #include "x86/rtasm/x86sse.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" struct gallivm_prog; diff --git a/src/gallium/aux/draw/draw_vertex.c b/src/gallium/aux/draw/draw_vertex.c index 2d6592150fc..daf1ef4b80f 100644 --- a/src/gallium/aux/draw/draw_vertex.c +++ b/src/gallium/aux/draw/draw_vertex.c @@ -34,8 +34,8 @@ */ -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" /** diff --git a/src/gallium/aux/draw/draw_vertex_shader.c b/src/gallium/aux/draw/draw_vertex_shader.c index c824c1407e9..377ecbb931c 100644 --- a/src/gallium/aux/draw/draw_vertex_shader.c +++ b/src/gallium/aux/draw/draw_vertex_shader.c @@ -34,13 +34,13 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #if defined(__i386__) || defined(__386__) -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "tgsi/exec/tgsi_sse2.h" #endif #include "draw_private.h" #include "draw_context.h" #include "x86/rtasm/x86sse.h" -#include "pipe/llvm/gallivm.h" +#include "llvm/gallivm.h" #define DBG_VS 0 diff --git a/src/gallium/aux/llvm/Makefile b/src/gallium/aux/llvm/Makefile index 9c6e16d86b7..e6ac399d088 100644 --- a/src/gallium/aux/llvm/Makefile +++ b/src/gallium/aux/llvm/Makefile @@ -30,7 +30,9 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/mesa/pipe \ + -I$(TOP)/src/gallium/drivers + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ -I$(TOP)/include diff --git a/src/gallium/aux/llvm/gallivm.cpp b/src/gallium/aux/llvm/gallivm.cpp index da0105c2c98..d14bb3b99a8 100644 --- a/src/gallium/aux/llvm/gallivm.cpp +++ b/src/gallium/aux/llvm/gallivm.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" #include <llvm/Module.h> #include <llvm/CallingConv.h> diff --git a/src/gallium/aux/llvm/gallivm_cpu.cpp b/src/gallium/aux/llvm/gallivm_cpu.cpp index dc4d92a72a6..8f9830d0b1e 100644 --- a/src/gallium/aux/llvm/gallivm_cpu.cpp +++ b/src/gallium/aux/llvm/gallivm_cpu.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" #include <llvm/Module.h> #include <llvm/CallingConv.h> diff --git a/src/gallium/aux/llvm/tgsitollvm.cpp b/src/gallium/aux/llvm/tgsitollvm.cpp index 0de595e6789..2cb4acce32f 100644 --- a/src/gallium/aux/llvm/tgsitollvm.cpp +++ b/src/gallium/aux/llvm/tgsitollvm.cpp @@ -10,11 +10,11 @@ #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_util.h" -#include "pipe/tgsi/util/tgsi_build.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" #include <llvm/Module.h> diff --git a/src/gallium/aux/pipebuffer/Makefile b/src/gallium/aux/pipebuffer/Makefile index 75764a9a188..588629e8701 100644 --- a/src/gallium/aux/pipebuffer/Makefile +++ b/src/gallium/aux/pipebuffer/Makefile @@ -17,7 +17,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/aux/tgsi/exec/tgsi_exec.c b/src/gallium/aux/tgsi/exec/tgsi_exec.c index 37e60070686..a8f64c2287f 100644 --- a/src/gallium/aux/tgsi/exec/tgsi_exec.c +++ b/src/gallium/aux/tgsi/exec/tgsi_exec.c @@ -54,8 +54,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "tgsi_exec.h" #define TILE_TOP_LEFT 0 diff --git a/src/gallium/aux/tgsi/exec/tgsi_sse2.c b/src/gallium/aux/tgsi/exec/tgsi_sse2.c index 1e56e4afb69..593464db3ed 100755 --- a/src/gallium/aux/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/aux/tgsi/exec/tgsi_sse2.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "tgsi_exec.h" #include "tgsi_sse2.h" diff --git a/src/gallium/aux/tgsi/util/tgsi_transform.h b/src/gallium/aux/tgsi/util/tgsi_transform.h index 365d8c298c7..fcf85d603be 100644 --- a/src/gallium/aux/tgsi/util/tgsi_transform.h +++ b/src/gallium/aux/tgsi/util/tgsi_transform.h @@ -31,8 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile new file mode 100644 index 00000000000..c0345a9cb54 --- /dev/null +++ b/src/gallium/drivers/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-cell) +CELL_DIR = cell +endif + +SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 50060f5cd30..011863c11e1 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -40,8 +40,11 @@ SOURCES = \ OBJECTS = $(SOURCES:.c=.o) \ -INCLUDE_DIRS = -I$(TOP)/src/mesa - +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 07b908eec59..e588a30d5bc 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -35,7 +35,7 @@ #include <stdint.h> #include "pipe/p_inlines.h" #include "pipe/p_util.h" -#include "pipe/cell/common.h" +#include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_batch.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index bbe1fd7a111..e1eb22f4685 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -37,9 +37,9 @@ #include "pipe/p_format.h" #include "pipe/p_util.h" #include "pipe/p_winsys.h" -#include "pipe/cell/common.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 3b63419b5eb..6196c0c72f9 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -32,10 +32,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vbuf.h" #include "cell_winsys.h" -#include "pipe/cell/common.h" +#include "cell/common.h" struct cell_vbuf_render; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 717cd8370f9..f12613649b9 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -39,7 +39,7 @@ #include "cell_draw_arrays.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index f62bc4650ce..20f27531fce 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -31,7 +31,7 @@ #include "cell_flush.h" #include "cell_spu.h" #include "cell_render.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index 4ab277a4b24..b663b376222 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -34,7 +34,7 @@ #include "cell_render.h" #include "cell_spu.h" #include "pipe/p_util.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_private.h" struct render_stage { diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 7c83a47e574..419e74dc402 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -31,7 +31,7 @@ #include "cell_spu.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/cell/common.h" +#include "cell/common.h" /* diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h index 19eff94f967..137f26612e4 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -31,7 +31,7 @@ #include <libspe2.h> #include <libmisc.h> -#include "pipe/cell/common.h" +#include "cell/common.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c index 4fc60548c85..b6d6d71f0ce 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_blend.c +++ b/src/gallium/drivers/cell/ppu/cell_state_blend.c @@ -29,7 +29,7 @@ */ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c index 4f43665941d..0482f87e889 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_clip.c +++ b/src/gallium/drivers/cell/ppu/cell_state_clip.c @@ -30,7 +30,7 @@ #include "cell_context.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void cell_set_clip_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 56daf5dfde0..0c468292584 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "cell_context.h" #include "cell_batch.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c index 3f46a87d189..b2ed699a5be 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_fs.c +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -29,12 +29,12 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #if 0 #include "pipe/p_shader_tokens.h" -#include "pipe/llvm/gallivm.h" -#include "pipe/tgsi/util/tgsi_dump.h" -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "llvm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" #endif #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c index d8128ece54d..7eca5b57656 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c +++ b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c @@ -27,7 +27,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_state_sampler.c index ade6cc8338e..a33421a4ad0 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_sampler.c +++ b/src/gallium/drivers/cell/ppu/cell_state_sampler.c @@ -30,7 +30,7 @@ */ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" #include "cell_texture.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 0f01e920f95..563831b62db 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -32,7 +32,7 @@ #include "cell_context.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index fca93e47424..a35db0ef991 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "cell_context.h" #include "cell_surface.h" diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index e9fafe492ee..cc727ff4edb 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -36,7 +36,7 @@ #include "cell_flush.h" #include "cell_spu.h" #include "cell_vbuf.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vbuf.h" /** Allow vertex data to be inlined after RENDER command */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 80dd500b345..0ba4506505e 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -38,9 +38,9 @@ #include "cell_spu.h" #include "cell_batch.h" -#include "pipe/cell/common.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" /** * Run the vertex shader on all vertices in the vertex queue. diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index f202971d738..7aa947299e7 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -31,7 +31,11 @@ SPU_OBJECTS = $(SOURCES:.c=.o) \ SPU_ASM_OUT = $(SOURCES:.c=.s) \ -INCLUDE_DIRS = -I$(TOP)/src/mesa +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers .c.o: diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index e51008b9b3c..109540b1f7b 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -67,8 +67,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index b4c7661ef67..3e17c490d20 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -29,7 +29,7 @@ #define SPU_EXEC_H #include "pipe/p_compiler.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" #if defined __cplusplus extern "C" { diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index e375197fe60..1e7243b8639 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -38,7 +38,7 @@ #include "spu_tile.h" //#include "spu_test.h" #include "spu_vertex_shader.h" -#include "pipe/cell/common.h" +#include "cell/common.h" #include "pipe/p_defines.h" diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 1710a175123..5c95d112ac1 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -31,8 +31,8 @@ #include <spu_mfcio.h> -#include "pipe/cell/common.h" -#include "pipe/draw/draw_vertex.h" +#include "cell/common.h" +#include "draw/draw_vertex.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 932fb500b3f..20e77aa2e63 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -34,7 +34,7 @@ #include "spu_render.h" #include "spu_tri.h" #include "spu_tile.h" -#include "pipe/cell/common.h" +#include "cell/common.h" diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h index fbcdc5ec316..493434f0878 100644 --- a/src/gallium/drivers/cell/spu/spu_render.h +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -29,7 +29,7 @@ #ifndef SPU_RENDER_H #define SPU_RENDER_H -#include "pipe/cell/common.h" +#include "cell/common.h" extern void cmd_render(const struct cell_command_render *render, uint *pos_incr); diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h index e53340a55a4..3105b848fdc 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -32,7 +32,7 @@ #include <libmisc.h> #include <spu_mfcio.h> #include "spu_main.h" -#include "pipe/cell/common.h" +#include "cell/common.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index ac373240c1f..ea4274a0a7b 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,8 +1,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" //#include "tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_util.h" unsigned tgsi_util_get_src_register_swizzle( diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index c1cbbb6d1e9..3f5bf41aa2f 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -39,9 +39,9 @@ #include "pipe/p_shader_tokens.h" #include "spu_vertex_shader.h" #include "spu_exec.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cell/common.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" +#include "cell/common.h" #include "spu_main.h" static INLINE unsigned diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile index 72d0895c74d..14389bd0551 100644 --- a/src/gallium/drivers/failover/Makefile +++ b/src/gallium/drivers/failover/Makefile @@ -15,7 +15,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index 2f91de3afc6..ee22ba86f9b 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -32,7 +32,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 497623a7006..7f71f8fd4f5 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -32,7 +32,7 @@ #include "i915_texture.h" #include "i915_reg.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index b4ea63c3e74..2d876925b2c 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" #define I915_TEX_UNITS 8 diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 868f0c7e046..6c1524c768e 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,9 +33,9 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" /** diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index c4a706c37d8..44c43259369 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/draw/draw_private.h" +#include "draw/draw_private.h" #include "pipe/p_util.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index e069773fd4e..c5bf6174f68 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -38,7 +38,7 @@ */ -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vbuf.h" #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index abd5571b885..294e6fad035 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -29,7 +29,7 @@ */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 653983e4a99..4767584fc60 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c index c713bf72086..301fedea197 100644 --- a/src/gallium/drivers/i915simple/i915_strings.c +++ b/src/gallium/drivers/i915simple/i915_strings.c @@ -70,7 +70,7 @@ static const char *i915_get_name( struct pipe_context *pipe ) break; } - sprintf(buffer, "pipe/i915 (chipset: %s)", chipset); + sprintf(buffer, "i915 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index de0cc5fe06a..17fd27895a2 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -33,7 +33,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" /* diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 48c00ab50b8..1dec1f97495 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -61,6 +61,6 @@ ASM_SOURCES = DRIVER_DEFINES = -I. -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index 431b45466a6..a762a870fe9 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -3,7 +3,7 @@ #include "brw_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 95dfce88e4a..f746d1cc57c 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -33,7 +33,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_dump.h" #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i965simple/brw_strings.c index 29a41ed1e9b..3d9c50961fa 100644 --- a/src/gallium/drivers/i965simple/brw_strings.c +++ b/src/gallium/drivers/i965simple/brw_strings.c @@ -59,7 +59,7 @@ static const char *brw_get_name( struct pipe_context *pipe ) break; } - sprintf(buffer, "pipe/i965 (chipset: %s)", chipset); + sprintf(buffer, "i965 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 518845e4b2e..376a42b1a6a 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -32,7 +32,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" /* diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 98915ba1016..05df4860eda 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "brw_vs.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" struct brw_prog_info { unsigned num_temps; diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index b45a333a2e9..97418a52e7f 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index d95645d1085..44f946ea748 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 31438a882e6..2304ea4246a 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -44,7 +44,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index cea6b90104f..5e98f190bbf 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -29,7 +29,7 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index aff8c2cc5dc..8c79cb3ce4f 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 71a303a8b58..2049afda34f 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,7 +38,7 @@ #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index ced0d5d0983..2cbd0d7cabe 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_flush.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index 0ae31d87961..9cf82221333 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -31,7 +31,7 @@ #ifndef SP_HEADERS_H #define SP_HEADERS_H -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" #define PRIM_POINT 1 #define PRIM_LINE 2 diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 27720486615..d73521ccbe5 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -38,8 +38,8 @@ #include "sp_quad.h" #include "sp_state.h" #include "sp_prim_setup.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 7f71fdb6a9a..69bea8a8f5c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -39,9 +39,9 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_prim_vbuf.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" #define SP_MAX_VBUF_INDEXES 1024 diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 33168584137..b4c01a7ea8c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -42,7 +42,7 @@ #include "x86/rtasm/x86sse.h" #ifdef MESA_LLVM -#include "pipe/llvm/gallivm.h" +#include "llvm/gallivm.h" #endif #include "sp_context.h" diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 6a8a43aedaf..adf9ccf64c6 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -29,7 +29,7 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index 08c5f06d05d..c797c0dd3b1 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -29,7 +29,7 @@ */ #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void softpipe_set_clip_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 372597869f3..9d8fd8b750f 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -27,9 +27,9 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 0b814fc2847..1e3cadd43d1 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -32,11 +32,11 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/llvm/gallivm.h" -#include "pipe/tgsi/util/tgsi_dump.h" -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "llvm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" void * diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 53755099dde..98e04352dbc 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index ea348c7e958..460adccec4f 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -31,14 +31,14 @@ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 09ff540ccfd..f01a10de3b4 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -33,7 +33,7 @@ #include "sp_state.h" #include "sp_surface.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 8802ced1874..653449c4f18 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 325bdb86da5..2f82fd6abea 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" /* diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 1597361b82f..dde3fabc81e 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -34,7 +34,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_tile_cache.h" diff --git a/src/gallium/winsys/dri/Makefile b/src/gallium/winsys/dri/Makefile new file mode 100644 index 00000000000..f466ce6c3cc --- /dev/null +++ b/src/gallium/winsys/dri/Makefile @@ -0,0 +1,38 @@ +# src/mesa/drivers/dri/Makefile + +TOP = ../../../.. + +include $(TOP)/configs/current + + + +default: $(TOP)/$(LIB_DIR) subdirs + + +$(TOP)/$(LIB_DIR): + -mkdir $(TOP)/$(LIB_DIR) + + +subdirs: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +install: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) install) || exit 1 ; \ + fi \ + done + + +clean: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) clean) ; \ + fi \ + done + -rm -f common/*.o diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template new file mode 100644 index 00000000000..b96305c0940 --- /dev/null +++ b/src/gallium/winsys/dri/Makefile.template @@ -0,0 +1,113 @@ +# -*-makefile-*- + +MESA_MODULES = $(TOP)/src/mesa/libmesa.a + +COMMON_GALLIUM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/utils.c \ + $(TOP)/src/mesa/drivers/dri/common/vblank.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_util.c \ + $(TOP)/src/mesa/drivers/dri/common/xmlconfig.c + +COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ + $(TOP)/src/mesa/drivers/common/driverfuncs.c \ + $(TOP)/src/mesa/drivers/dri/common/texmem.c \ + $(TOP)/src/mesa/drivers/dri/common/drirenderbuffer.c + +COMMON_BM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/dri_bufmgr.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_drmpool.c + + +ifeq ($(WINDOW_SYSTEM),dri) +WINOBJ= +WINLIB= +INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) + +else +# miniglx +WINOBJ= +WINLIB=-L$(MESA)/src/glx/mini +MINIGLX_INCLUDES = -I$(TOP)/src/glx/mini +INCLUDES = $(MINIGLX_INCLUDES) \ + $(SHARED_INCLUDES) \ + $(PCIACCESS_CFLAGS) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(MINIGLX_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) +endif + + +### Include directories +SHARED_INCLUDES = \ + -I. \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -Iserver \ + -I$(TOP)/include \ + -I$(TOP)/include/GL/internal \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/mesa/glapi \ + -I$(TOP)/src/mesa/math \ + -I$(TOP)/src/mesa/transform \ + -I$(TOP)/src/mesa/shader \ + -I$(TOP)/src/mesa/swrast \ + -I$(TOP)/src/mesa/swrast_setup \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/egl/drivers/dri \ + $(LIBDRM_CFLAGS) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend symlinks $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) + + +$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template + $(TOP)/bin/mklib -noprefix -o $@ \ + $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) + + +$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) + $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR) + + +depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ + $(ASM_SOURCES) 2> /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) + -rm -f depend depend.bak + + +install: $(LIBNAME) + $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + + +include depend diff --git a/src/gallium/winsys/dri/intel/Makefile b/src/gallium/winsys/dri/intel/Makefile index 9ae0f013256..40654bb2ac3 100644 --- a/src/gallium/winsys/dri/intel/Makefile +++ b/src/gallium/winsys/dri/intel/Makefile @@ -7,8 +7,8 @@ LIBNAME = i915tex_dri.so MINIGLX_SOURCES = server/intel_dri.c PIPE_DRIVERS = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/i915simple/libi915simple.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i915simple/libi915simple.a DRIVER_SOURCES = \ intel_winsys_pipe.c \ @@ -28,11 +28,11 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ +DRIVER_DEFINES = -I$(TOP)/src/mesa/drivers/dri/intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") include ../Makefile.template -intel_tex_layout.o: ../intel/intel_tex_layout.c +intel_tex_layout.o: $(TOP)/src/mesa/drivers/dri/intel/intel_tex_layout.c symlinks: diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index 1ba6a9e1b25..0ed3890e936 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -39,7 +39,7 @@ #include "intel_winsys.h" #include "pipe/p_util.h" -#include "pipe/i915simple/i915_winsys.h" +#include "i915simple/i915_winsys.h" struct intel_i915_winsys { diff --git a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c index cec3437831c..9e483bdc9f5 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c @@ -34,7 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_format.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" struct intel_softpipe_winsys { diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index c3cd22eea3f..8da596d419e 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -41,11 +41,11 @@ #include "pipe/p_context.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" #ifdef GALLIUM_CELL -#include "pipe/cell/ppu/cell_context.h" -#include "pipe/cell/ppu/cell_winsys.h" +#include "cell/ppu/cell_context.h" +#include "cell/ppu/cell_winsys.h" #else #define TILE_SIZE 32 /* avoid compilation errors */ #endif diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index bf415702570..dbfd37bda29 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -39,7 +39,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/i965simple/brw_winsys.h" +#include "i965simple/brw_winsys.h" #include "brw_aub.h" #include "xm_winsys_aub.h" diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 720f1b2e026..561608fedd6 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,16 +12,16 @@ GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) PIPE_LIB = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/i965simple/libi965simple.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i965simple/libi965simple.a ifeq ($(CONFIG_NAME), linux-cell) -CELL_LIB = $(TOP)/src/mesa/pipe/cell/ppu/libcell.a -CELL_LIB_SPU = $(TOP)/src/mesa/pipe/cell/spu/g3d_spu.a +CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a +CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a endif ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/mesa/pipe/llvm/libgallivm.a +LLVM_LIB = $(TOP)/src/gallium/aux/llvm/libgallivm.a endif .SUFFIXES : .cpp @@ -71,7 +71,7 @@ libmesa.a: $(SOLO_OBJECTS) fi linux-solo: depend subdirs libmesa.a - cd drivers/dri ; $(MAKE) + cd $(TOP)/src/gallium/winsys/dri ; $(MAKE) ##################################################################### @@ -165,7 +165,6 @@ depend: $(ALL_SOURCES) subdirs: @ (cd x86 ; $(MAKE)) @ (cd x86-64 ; $(MAKE)) - (cd pipe ; $(MAKE)) install: default $(INSTALL) -d $(INSTALL_DIR)/include/GL @@ -178,7 +177,7 @@ install: default $(INSTALL) $(TOP)/$(LIB_DIR)/libOSMesa* $(INSTALL_DIR)/$(LIB_DIR); \ fi @if [ "${DRIVER_DIRS}" = "dri" ] ; then \ - cd drivers/dri ; $(MAKE) install ; \ + cd $(TOP)/gallium/winsys/dri ; $(MAKE) install ; \ fi ## NOT INSTALLED YET: @@ -198,7 +197,6 @@ clean: (cd drivers/dri && $(MAKE) clean) (cd x86 && $(MAKE) clean) (cd x86-64 && $(MAKE) clean) - (cd pipe ; $(MAKE) clean ) include depend diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 08c98eab486..18b033666f0 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -85,7 +85,7 @@ #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" -#include "pipe/softpipe/sp_context.h" +#include "softpipe/sp_context.h" #include "pipe/p_defines.h" /** diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 8ae243ae662..34287effe1c 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -53,7 +53,7 @@ #include "tnl/tnl.h" #include "tnl/t_context.h" -#include "pipe/softpipe/sp_context.h" +#include "softpipe/sp_context.h" #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" #include "state_tracker/st_draw.h" diff --git a/src/mesa/drivers/x11/xm_surface.c b/src/mesa/drivers/x11/xm_surface.c index 5533158ece0..81616b92d96 100644 --- a/src/mesa/drivers/x11/xm_surface.c +++ b/src/mesa/drivers/x11/xm_surface.c @@ -45,10 +45,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/softpipe/sp_context.h" -#include "pipe/softpipe/sp_clear.h" -#include "pipe/softpipe/sp_tile_cache.h" -#include "pipe/softpipe/sp_surface.h" +#include "softpipe/sp_context.h" +#include "softpipe/sp_clear.h" +#include "softpipe/sp_tile_cache.h" +#include "softpipe/sp_surface.h" #include "state_tracker/st_context.h" diff --git a/src/mesa/drivers/x11/xm_winsys.c b/src/mesa/drivers/x11/xm_winsys.c index a690df27727..2edc6976933 100644 --- a/src/mesa/drivers/x11/xm_winsys.c +++ b/src/mesa/drivers/x11/xm_winsys.c @@ -38,7 +38,7 @@ #include "main/macros.h" #include "pipe/p_winsys.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" /** diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index 4709d633942..fd2dfcd79aa 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -37,8 +37,8 @@ #include "xm_image.h" #endif #include "state_tracker/st_cb_fbo.h" -#include "pipe/softpipe/sp_context.h" -#include "pipe/softpipe/sp_surface.h" +#include "softpipe/sp_context.h" +#include "softpipe/sp_surface.h" extern _glthread_Mutex _xmesa_lock; diff --git a/src/mesa/sources b/src/mesa/sources index 1165425183a..2d07738210d 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -158,45 +158,45 @@ VF_SOURCES = \ DRAW_SOURCES = \ - pipe/draw/draw_clip.c \ - pipe/draw/draw_context.c\ - pipe/draw/draw_cull.c \ - pipe/draw/draw_debug.c \ - pipe/draw/draw_flatshade.c \ - pipe/draw/draw_offset.c \ - pipe/draw/draw_prim.c \ - pipe/draw/draw_stipple.c \ - pipe/draw/draw_twoside.c \ - pipe/draw/draw_unfilled.c \ - pipe/draw/draw_validate.c \ - pipe/draw/draw_vbuf.c \ - pipe/draw/draw_vertex.c \ - pipe/draw/draw_vertex_cache.c \ - pipe/draw/draw_vertex_fetch.c \ - pipe/draw/draw_vertex_shader.c \ - pipe/draw/draw_vf.c \ - pipe/draw/draw_vf_generic.c \ - pipe/draw/draw_vf_sse.c \ - pipe/draw/draw_wide_prims.c + $(TOP)/src/gallium/aux/draw/draw_clip.c \ + $(TOP)/src/gallium/aux/draw/draw_context.c\ + $(TOP)/src/gallium/aux/draw/draw_cull.c \ + $(TOP)/src/gallium/aux/draw/draw_debug.c \ + $(TOP)/src/gallium/aux/draw/draw_flatshade.c \ + $(TOP)/src/gallium/aux/draw/draw_offset.c \ + $(TOP)/src/gallium/aux/draw/draw_prim.c \ + $(TOP)/src/gallium/aux/draw/draw_stipple.c \ + $(TOP)/src/gallium/aux/draw/draw_twoside.c \ + $(TOP)/src/gallium/aux/draw/draw_unfilled.c \ + $(TOP)/src/gallium/aux/draw/draw_validate.c \ + $(TOP)/src/gallium/aux/draw/draw_vbuf.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_cache.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_fetch.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_shader.c \ + $(TOP)/src/gallium/aux/draw/draw_vf.c \ + $(TOP)/src/gallium/aux/draw/draw_vf_generic.c \ + $(TOP)/src/gallium/aux/draw/draw_vf_sse.c \ + $(TOP)/src/gallium/aux/draw/draw_wide_prims.c TGSIEXEC_SOURCES = \ - pipe/tgsi/exec/tgsi_exec.c \ - pipe/tgsi/exec/tgsi_sse2.c + $(TOP)/src/gallium/aux/tgsi/exec/tgsi_exec.c \ + $(TOP)/src/gallium/aux/tgsi/exec/tgsi_sse2.c TGSIUTIL_SOURCES = \ - pipe/tgsi/util/tgsi_build.c \ - pipe/tgsi/util/tgsi_dump.c \ - pipe/tgsi/util/tgsi_parse.c \ - pipe/tgsi/util/tgsi_util.c + $(TOP)/src/gallium/aux/tgsi/util/tgsi_build.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_dump.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_parse.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_util.c STATECACHE_SOURCES = \ - pipe/cso_cache/cso_hash.c \ - pipe/cso_cache/cso_cache.c + $(TOP)/src/gallium/aux/cso_cache/cso_hash.c \ + $(TOP)/src/gallium/aux/cso_cache/cso_cache.c PIPEUTIL_SOURCES = \ - pipe/util/p_debug.c \ - pipe/util/p_tile.c \ - pipe/util/p_util.c + $(TOP)/src/gallium/aux/util/p_debug.c \ + $(TOP)/src/gallium/aux/util/p_tile.c \ + $(TOP)/src/gallium/aux/util/p_util.c STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ @@ -331,13 +331,13 @@ __COMMON_DRIVER_SOURCES = \ drivers/common/driverfuncs.c X11_DRIVER_SOURCES = \ - pipe/xlib/glxapi.c \ - pipe/xlib/fakeglx.c \ - pipe/xlib/xfonts.c \ - pipe/xlib/xm_api.c \ - pipe/xlib/xm_winsys.c \ - pipe/xlib/xm_winsys_aub.c \ - pipe/xlib/brw_aub.c + $(TOP)/src/gallium/winsys/xlib/glxapi.c \ + $(TOP)/src/gallium/winsys/xlib/fakeglx.c \ + $(TOP)/src/gallium/winsys/xlib/xfonts.c \ + $(TOP)/src/gallium/winsys/xlib/xm_api.c \ + $(TOP)/src/gallium/winsys/xlib/xm_winsys.c \ + $(TOP)/src/gallium/winsys/xlib/xm_winsys_aub.c \ + $(TOP)/src/gallium/winsys/xlib/brw_aub.c OSMESA_DRIVER_SOURCES = \ drivers/osmesa/osmesa.c @@ -425,7 +425,10 @@ FBDEV_DRIVER_OBJECTS = $(FBDEV_DRIVER_SOURCES:.c=.o) INCLUDE_DIRS = \ -I$(TOP)/include \ -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/aux OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/tnl \ @@ -435,4 +438,4 @@ OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/shader \ -I$(TOP)/src/mesa/shader/grammar \ -I$(TOP)/src/mesa/shader/slang \ - -I$(TOP)/src/mesa/pipe/tgsi + -I$(TOP)/s$(TOP)/src/gallium/aux/tgsi diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 2c6ec8421b0..b67b620eaa5 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -43,7 +43,7 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_cache.h" diff --git a/src/mesa/state_tracker/st_cache.c b/src/mesa/state_tracker/st_cache.c index e0965b217ab..2979e7fae54 100644 --- a/src/mesa/state_tracker/st_cache.c +++ b/src/mesa/state_tracker/st_cache.c @@ -36,8 +36,8 @@ #include "pipe/p_state.h" -#include "pipe/cso_cache/cso_cache.h" -#include "pipe/cso_cache/cso_hash.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" /* Those function will either find the state of the given template diff --git a/src/mesa/state_tracker/st_cache.h b/src/mesa/state_tracker/st_cache.h index e0c176b0ffc..b81de316ec9 100644 --- a/src/mesa/state_tracker/st_cache.h +++ b/src/mesa/state_tracker/st_cache.h @@ -33,7 +33,7 @@ #ifndef ST_CACHE_H #define ST_CACHE_H -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" struct pipe_blend_state; struct pipe_sampler_state; diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 3a3bf9016dd..663c4f205d8 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -43,7 +43,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index f13199a3c0f..e2d4e06da19 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -56,7 +56,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "shader/prog_instruction.h" diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index 31744151f1d..5315294c07b 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -53,10 +53,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" /** diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index af3ee655048..61d4f4c41c6 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -39,8 +39,8 @@ #include "shader/programopt.h" #include "shader/shader_api.h" -#include "pipe/cso_cache/cso_cache.h" -#include "pipe/draw/draw_context.h" +#include "cso_cache/cso_cache.h" +#include "draw/draw_context.h" #include "st_context.h" #include "st_program.h" diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 7e347c48938..5b0eb6022be 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -44,8 +44,8 @@ #include "st_draw.h" #include "st_cb_rasterpos.h" #include "st_draw.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" #include "shader/prog_instruction.h" #include "vbo/vbo.h" diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 868c5f3c5f9..c89c74229e5 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "st_context.h" #include "st_cb_readpixels.h" #include "st_cb_fbo.h" diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 91a40288cc7..03dbb30b0fe 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -47,7 +47,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #define DBG if (0) printf diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index bf4618bed8e..09e389f9dc7 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -54,8 +54,8 @@ #include "pipe/p_context.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cso_cache/cso_cache.h" +#include "draw/draw_context.h" +#include "cso_cache/cso_cache.h" /** diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 57450e52bf4..5888bcb98a3 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -31,9 +31,9 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_dump.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_debug.h" diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index ae9f5c8b117..1c0fa8c6aad 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -47,8 +47,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" static GLuint double_types[4] = { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 459941cca87..6c09b86033b 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -37,7 +37,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_draw.h" diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 325aa201734..97206752af3 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -32,9 +32,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_util.h" #include "st_mesa_to_tgsi.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index c8297badedd..dc992ee9c24 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -38,8 +38,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "draw/draw_context.h" +#include "tgsi/util/tgsi_dump.h" #include "st_context.h" #include "st_cache.h" -- cgit v1.2.3 From 66f22aa3bf7fa546e946b45156aa578e202982c9 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 15 Feb 2008 20:11:40 +0900 Subject: Code reorganization: s/aux/auxiliary/ -- update build. --- src/gallium/Makefile | 2 +- src/gallium/Makefile.template | 2 +- src/gallium/auxiliary/llvm/Makefile | 2 +- src/gallium/drivers/cell/ppu/Makefile | 2 +- src/gallium/drivers/cell/spu/Makefile | 2 +- src/gallium/winsys/dri/Makefile.template | 2 +- src/mesa/Makefile | 2 +- src/mesa/sources | 66 ++++++++++++++++---------------- 8 files changed, 40 insertions(+), 40 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/Makefile b/src/gallium/Makefile index a13b9a52d36..89e068a4492 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -2,7 +2,7 @@ TOP = ../.. include $(TOP)/configs/current -SUBDIRS = aux drivers +SUBDIRS = auxiliary drivers default: subdirs diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 0717ed8dd24..83b25f9b47c 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -17,7 +17,7 @@ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/include/pipe \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/include \ diff --git a/src/gallium/auxiliary/llvm/Makefile b/src/gallium/auxiliary/llvm/Makefile index e6ac399d088..e0abf860c17 100644 --- a/src/gallium/auxiliary/llvm/Makefile +++ b/src/gallium/auxiliary/llvm/Makefile @@ -31,7 +31,7 @@ OBJECTS = $(C_SOURCES:.c=.o) \ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/drivers - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ -I$(TOP)/include diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 011863c11e1..a4c3f29e8a4 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -43,7 +43,7 @@ OBJECTS = $(SOURCES:.c=.o) \ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers .c.o: diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 7aa947299e7..30ef2450ece 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -34,7 +34,7 @@ SPU_ASM_OUT = $(SOURCES:.c=.s) \ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template index b96305c0940..2a261ed6694 100644 --- a/src/gallium/winsys/dri/Makefile.template +++ b/src/gallium/winsys/dri/Makefile.template @@ -49,7 +49,7 @@ SHARED_INCLUDES = \ -I$(TOP)/include \ -I$(TOP)/include/GL/internal \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main \ diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 561608fedd6..c8cb2b592fe 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -21,7 +21,7 @@ CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a endif ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/gallium/aux/llvm/libgallivm.a +LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a endif .SUFFIXES : .cpp diff --git a/src/mesa/sources b/src/mesa/sources index 2d07738210d..cecd8a830fe 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -158,45 +158,45 @@ VF_SOURCES = \ DRAW_SOURCES = \ - $(TOP)/src/gallium/aux/draw/draw_clip.c \ - $(TOP)/src/gallium/aux/draw/draw_context.c\ - $(TOP)/src/gallium/aux/draw/draw_cull.c \ - $(TOP)/src/gallium/aux/draw/draw_debug.c \ - $(TOP)/src/gallium/aux/draw/draw_flatshade.c \ - $(TOP)/src/gallium/aux/draw/draw_offset.c \ - $(TOP)/src/gallium/aux/draw/draw_prim.c \ - $(TOP)/src/gallium/aux/draw/draw_stipple.c \ - $(TOP)/src/gallium/aux/draw/draw_twoside.c \ - $(TOP)/src/gallium/aux/draw/draw_unfilled.c \ - $(TOP)/src/gallium/aux/draw/draw_validate.c \ - $(TOP)/src/gallium/aux/draw/draw_vbuf.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_cache.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_fetch.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_shader.c \ - $(TOP)/src/gallium/aux/draw/draw_vf.c \ - $(TOP)/src/gallium/aux/draw/draw_vf_generic.c \ - $(TOP)/src/gallium/aux/draw/draw_vf_sse.c \ - $(TOP)/src/gallium/aux/draw/draw_wide_prims.c + $(TOP)/src/gallium/auxiliary/draw/draw_clip.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_context.c\ + $(TOP)/src/gallium/auxiliary/draw/draw_cull.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_debug.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_flatshade.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_offset.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_prim.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_stipple.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_twoside.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_unfilled.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_validate.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vbuf.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_cache.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_fetch.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_shader.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf_generic.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf_sse.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_wide_prims.c TGSIEXEC_SOURCES = \ - $(TOP)/src/gallium/aux/tgsi/exec/tgsi_exec.c \ - $(TOP)/src/gallium/aux/tgsi/exec/tgsi_sse2.c + $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c \ + $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c TGSIUTIL_SOURCES = \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_build.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_dump.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_parse.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_util.c + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_build.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_dump.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_parse.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_util.c STATECACHE_SOURCES = \ - $(TOP)/src/gallium/aux/cso_cache/cso_hash.c \ - $(TOP)/src/gallium/aux/cso_cache/cso_cache.c + $(TOP)/src/gallium/auxiliary/cso_cache/cso_hash.c \ + $(TOP)/src/gallium/auxiliary/cso_cache/cso_cache.c PIPEUTIL_SOURCES = \ - $(TOP)/src/gallium/aux/util/p_debug.c \ - $(TOP)/src/gallium/aux/util/p_tile.c \ - $(TOP)/src/gallium/aux/util/p_util.c + $(TOP)/src/gallium/auxiliary/util/p_debug.c \ + $(TOP)/src/gallium/auxiliary/util/p_tile.c \ + $(TOP)/src/gallium/auxiliary/util/p_util.c STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ @@ -428,7 +428,7 @@ INCLUDE_DIRS = \ -I$(TOP)/src/mesa/main \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/aux + -I$(TOP)/src/gallium/auxiliary OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/tnl \ @@ -438,4 +438,4 @@ OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/shader \ -I$(TOP)/src/mesa/shader/grammar \ -I$(TOP)/src/mesa/shader/slang \ - -I$(TOP)/s$(TOP)/src/gallium/aux/tgsi + -I$(TOP)/s$(TOP)/src/gallium/auxiliary/tgsi -- cgit v1.2.3 From e822e09b89407d6cb8cd4a79e1c5c1e0955caf64 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Fri, 15 Feb 2008 13:35:46 +0000 Subject: softpipe: rename some functions to disambiguate --- src/gallium/drivers/softpipe/sp_fs_sse.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 28c5d8c556d..d90066e0257 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -100,7 +100,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef, static void -sse_prepare( struct sp_fragment_shader *base, +fs_sse_prepare( struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers ) { @@ -113,7 +113,7 @@ sse_prepare( struct sp_fragment_shader *base, * TODO: process >1 quad at a time */ static unsigned -sse_run( struct sp_fragment_shader *base, +fs_sse_run( struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { @@ -137,7 +137,7 @@ sse_run( struct sp_fragment_shader *base, static void -sse_delete( struct sp_fragment_shader *base ) +fs_sse_delete( struct sp_fragment_shader *base ) { struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base; @@ -170,9 +170,9 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, assert(shader->func); shader->base.shader = *templ; - shader->base.prepare = sse_prepare; - shader->base.run = sse_run; - shader->base.delete = sse_delete; + shader->base.prepare = fs_sse_prepare; + shader->base.run = fs_sse_run; + shader->base.delete = fs_sse_delete; return &shader->base; } -- cgit v1.2.3 From 397b81bd1c7984b1667af7ef954e053263a7a661 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Fri, 15 Feb 2008 09:43:13 -0800 Subject: Move cell_vertex_fetch.c for recent code reorg. --- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 392 +++++++++++++++++++++++ src/mesa/pipe/cell/ppu/cell_vertex_fetch.c | 392 ----------------------- 2 files changed, 392 insertions(+), 392 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_vertex_fetch.c delete mode 100644 src/mesa/pipe/cell/ppu/cell_vertex_fetch.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c new file mode 100644 index 00000000000..f2432f4ff52 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -0,0 +1,392 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_format.h" + +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" + +#include "pipe/cell/ppu/cell_context.h" +#include "ppc/rtasm/spe_asm.h" + +typedef uint64_t register_mask; + +int allocate_available_register(register_mask *m) +{ + unsigned i; + for (i = 0; i < 64; i++) { + const uint64_t mask = (1ULL << i); + + if ((m[0] & mask) != 0) { + m[0] &= ~mask; + return i; + } + } + + return -1; +} + + +int allocate_register(register_mask *m, unsigned reg) +{ + assert((m[0] & (1ULL << reg)) != 0); + + m[0] &= ~(1ULL << reg); + return reg; +} + + +void release_register(register_mask *m, unsigned reg) +{ + assert((m[0] & (1ULL << reg)) == 0); + + m[0] |= (1ULL << reg); +} + + +/** + * Emit a 4x4 matrix transpose operation + * + * \param p Function that the transpose operation is to be appended to + * \param m Live register mask + * \param row0 Register containing row 0 of the source matrix + * \param row1 Register containing row 1 of the source matrix + * \param row2 Register containing row 2 of the source matrix + * \param row3 Register containing row 3 of the source matrix + * \param dest_ptr Register containing the address of the destination matrix + * \param shuf_ptr Register containing the address of the shuffled data + * \param count Number of colums to actually be written to the destination + * + * \note + * This function assumes that the registers named by \c row0, \c row1, + * \c row2, and \c row3 are scratch and can be modified by the generated code. + * Furthermore, these registers will be released, via calls to + * \c release_register, by this function. + * + * \note + * This function requires that four temporary are available on entry. + */ +static void +emit_matrix_transpose(struct spe_function *p, register_mask *m, + unsigned row0, unsigned row1, unsigned row2, + unsigned row3, unsigned dest_ptr, + unsigned shuf_ptr, unsigned count) +{ + int shuf_hi = allocate_available_register(m); + int shuf_lo = allocate_available_register(m); + int t1 = allocate_available_register(m); + int t2 = allocate_available_register(m); + int t3; + int t4; + int col0; + int col1; + int col2; + int col3; + + + spe_lqd(p, shuf_hi, shuf_ptr, 3); + spe_lqd(p, shuf_lo, shuf_ptr, 4); + spe_shufb(p, t1, row0, row2, shuf_hi); + spe_shufb(p, t2, row0, row2, shuf_lo); + + + /* row0 and row2 are now no longer needed. Re-use those registers as + * temporaries. + */ + t3 = row0; + t4 = row2; + + spe_shufb(p, t3, row1, row3, shuf_hi); + spe_shufb(p, t4, row1, row3, shuf_lo); + + + /* row1 and row3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col0 = row1; + col1 = row3; + + spe_shufb(p, col0, t1, t3, shuf_hi); + if (count > 1) { + spe_shufb(p, col1, t1, t3, shuf_lo); + } + + /* t1 and t3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col2 = t1; + col3 = t3; + + if (count > 2) { + spe_shufb(p, col2, t2, t4, shuf_hi); + } + + if (count > 3) { + spe_shufb(p, col3, t2, t4, shuf_lo); + } + + + /* Store the results. Remember that the stqd instruction is encoded using + * the qword offset (stand-alone assemblers to the byte-offset to + * qword-offset conversion for you), so the byte-offset needs be divided by + * 16. + */ + switch (count) { + case 4: + spe_stqd(p, col3, dest_ptr, 3); + case 3: + spe_stqd(p, col2, dest_ptr, 2); + case 2: + spe_stqd(p, col1, dest_ptr, 1); + case 1: + spe_stqd(p, col0, dest_ptr, 0); + } + + + /* Release all of the temporary registers used. + */ + release_register(m, col0); + release_register(m, col1); + release_register(m, col2); + release_register(m, col3); + release_register(m, shuf_hi); + release_register(m, shuf_lo); + release_register(m, t2); + release_register(m, t4); +} + + +static void +emit_fetch(struct spe_function *p, register_mask *m, + unsigned in_ptr, unsigned *offset, + unsigned out_ptr, unsigned shuf_ptr, + enum pipe_format format) +{ + const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); + const unsigned type = pf_type(format); + const unsigned bytes = pf_size_x(format); + + int v0 = allocate_available_register(m); + int v1 = allocate_available_register(m); + int v2 = allocate_available_register(m); + int v3 = allocate_available_register(m); + int tmp = allocate_available_register(m); + int float_zero = -1; + int float_one = -1; + float scale_signed = 0.0; + float scale_unsigned = 0.0; + + spe_lqd(p, v0, in_ptr, 0 + offset[0]); + spe_lqd(p, v1, in_ptr, 1 + offset[0]); + spe_lqd(p, v2, in_ptr, 2 + offset[0]); + spe_lqd(p, v3, in_ptr, 3 + offset[0]); + offset[0] += 4; + + switch (bytes) { + case 1: + scale_signed = 1.0f / 127.0f; + scale_unsigned = 1.0f / 255.0f; + spe_lqd(p, tmp, shuf_ptr, 1); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 2: + scale_signed = 1.0f / 32767.0f; + scale_unsigned = 1.0f / 65535.0f; + spe_lqd(p, tmp, shuf_ptr, 2); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 4: + scale_signed = 1.0f / 2147483647.0f; + scale_unsigned = 1.0f / 4294967295.0f; + break; + default: + assert(0); + break; + } + + switch (type) { + case PIPE_FORMAT_TYPE_FLOAT: + break; + case PIPE_FORMAT_TYPE_UNORM: + spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); + spe_cuflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_SNORM: + spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); + spe_csflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_USCALED: + spe_cuflt(p, v0, v0, 0); + break; + case PIPE_FORMAT_TYPE_SSCALED: + spe_csflt(p, v0, v0, 0); + break; + } + + + if (count < 4) { + float_one = allocate_available_register(m); + spe_il(p, float_one, 1); + spe_cuflt(p, float_one, float_one, 0); + + if (count < 3) { + float_zero = allocate_available_register(m); + spe_il(p, float_zero, 0); + } + } + + release_register(m, tmp); + + emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + + switch (count) { + case 1: + spe_stqd(p, float_zero, out_ptr, 1); + case 2: + spe_stqd(p, float_zero, out_ptr, 2); + case 3: + spe_stqd(p, float_one, out_ptr, 3); + } + + if (float_zero != -1) { + release_register(m, float_zero); + } + + if (float_one != -1) { + release_register(m, float_one); + } +} + + +void cell_update_vertex_fetch(struct draw_context *draw) +{ + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + register_mask m = ~0; + struct spe_function *p = &cell->attrib_fetch; + unsigned function_index[PIPE_ATTRIB_MAX]; + unsigned unique_attr_formats; + int out_ptr; + int in_ptr; + int shuf_ptr; + unsigned i; + unsigned j; + + + /* Determine how many unique input attribute formats there are. At the + * same time, store the index of the lowest numbered attribute that has + * the same format as any non-unique format. + */ + unique_attr_formats = 1; + function_index[0] = 0; + for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; + + for (j = 0; j < i; j++) { + if (curr_fmt == draw->vertex_element[j].src_format) { + break; + } + } + + if (j == i) { + unique_attr_formats++; + } + + function_index[i] = j; + } + + + /* Each fetch function can be a maximum of 34 instructions (note: this is + * actually a slight over-estimate). That means (34 * 4) = 136 bytes + * each maximum. + */ + spe_init_func(p, 136 * unique_attr_formats); + + + /* Registers 0, 1, and 2 are reserved by the ABI. + */ + allocate_register(&m, 0); + allocate_register(&m, 1); + allocate_register(&m, 2); + + + /* Allocate registers for the function's input parameters. + */ + out_ptr = allocate_register(&m, 3); + in_ptr = allocate_register(&m, 4); + shuf_ptr = allocate_register(&m, 5); + + + /* Generate code for the individual attribute fetch functions. + */ + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + unsigned offset; + + if (function_index[i] == i) { + cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr + - (void *) p->store); + + offset = 0; + emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr, + draw->vertex_element[i].src_format); + spe_bi(p, 0, 0, 0); + + /* Round up to the next 16-byte boundary. + */ + if ((((unsigned) p->store) & 0x0f) != 0) { + const unsigned align = ((unsigned) p->store) & 0x0f; + p->store = (uint32_t *) (((void *) p->store) + align); + } + } else { + /* Use the same function entry-point as a previously seen attribute + * with the same format. + */ + cell->attrib_fetch_offsets[i] = + cell->attrib_fetch_offsets[function_index[i]]; + } + } + + static first_time = 1; + if (first_time) { + first_time = 0; + const unsigned instructions = p->csr - p->store; + for (i = 0; i < instructions; i++) { + printf("\t.long\t0x%08x\n", p->store[i]); + } + } +} diff --git a/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c b/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c deleted file mode 100644 index f2432f4ff52..00000000000 --- a/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <inttypes.h> -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_format.h" - -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" - -#include "pipe/cell/ppu/cell_context.h" -#include "ppc/rtasm/spe_asm.h" - -typedef uint64_t register_mask; - -int allocate_available_register(register_mask *m) -{ - unsigned i; - for (i = 0; i < 64; i++) { - const uint64_t mask = (1ULL << i); - - if ((m[0] & mask) != 0) { - m[0] &= ~mask; - return i; - } - } - - return -1; -} - - -int allocate_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) != 0); - - m[0] &= ~(1ULL << reg); - return reg; -} - - -void release_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) == 0); - - m[0] |= (1ULL << reg); -} - - -/** - * Emit a 4x4 matrix transpose operation - * - * \param p Function that the transpose operation is to be appended to - * \param m Live register mask - * \param row0 Register containing row 0 of the source matrix - * \param row1 Register containing row 1 of the source matrix - * \param row2 Register containing row 2 of the source matrix - * \param row3 Register containing row 3 of the source matrix - * \param dest_ptr Register containing the address of the destination matrix - * \param shuf_ptr Register containing the address of the shuffled data - * \param count Number of colums to actually be written to the destination - * - * \note - * This function assumes that the registers named by \c row0, \c row1, - * \c row2, and \c row3 are scratch and can be modified by the generated code. - * Furthermore, these registers will be released, via calls to - * \c release_register, by this function. - * - * \note - * This function requires that four temporary are available on entry. - */ -static void -emit_matrix_transpose(struct spe_function *p, register_mask *m, - unsigned row0, unsigned row1, unsigned row2, - unsigned row3, unsigned dest_ptr, - unsigned shuf_ptr, unsigned count) -{ - int shuf_hi = allocate_available_register(m); - int shuf_lo = allocate_available_register(m); - int t1 = allocate_available_register(m); - int t2 = allocate_available_register(m); - int t3; - int t4; - int col0; - int col1; - int col2; - int col3; - - - spe_lqd(p, shuf_hi, shuf_ptr, 3); - spe_lqd(p, shuf_lo, shuf_ptr, 4); - spe_shufb(p, t1, row0, row2, shuf_hi); - spe_shufb(p, t2, row0, row2, shuf_lo); - - - /* row0 and row2 are now no longer needed. Re-use those registers as - * temporaries. - */ - t3 = row0; - t4 = row2; - - spe_shufb(p, t3, row1, row3, shuf_hi); - spe_shufb(p, t4, row1, row3, shuf_lo); - - - /* row1 and row3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col0 = row1; - col1 = row3; - - spe_shufb(p, col0, t1, t3, shuf_hi); - if (count > 1) { - spe_shufb(p, col1, t1, t3, shuf_lo); - } - - /* t1 and t3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col2 = t1; - col3 = t3; - - if (count > 2) { - spe_shufb(p, col2, t2, t4, shuf_hi); - } - - if (count > 3) { - spe_shufb(p, col3, t2, t4, shuf_lo); - } - - - /* Store the results. Remember that the stqd instruction is encoded using - * the qword offset (stand-alone assemblers to the byte-offset to - * qword-offset conversion for you), so the byte-offset needs be divided by - * 16. - */ - switch (count) { - case 4: - spe_stqd(p, col3, dest_ptr, 3); - case 3: - spe_stqd(p, col2, dest_ptr, 2); - case 2: - spe_stqd(p, col1, dest_ptr, 1); - case 1: - spe_stqd(p, col0, dest_ptr, 0); - } - - - /* Release all of the temporary registers used. - */ - release_register(m, col0); - release_register(m, col1); - release_register(m, col2); - release_register(m, col3); - release_register(m, shuf_hi); - release_register(m, shuf_lo); - release_register(m, t2); - release_register(m, t4); -} - - -static void -emit_fetch(struct spe_function *p, register_mask *m, - unsigned in_ptr, unsigned *offset, - unsigned out_ptr, unsigned shuf_ptr, - enum pipe_format format) -{ - const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); - const unsigned type = pf_type(format); - const unsigned bytes = pf_size_x(format); - - int v0 = allocate_available_register(m); - int v1 = allocate_available_register(m); - int v2 = allocate_available_register(m); - int v3 = allocate_available_register(m); - int tmp = allocate_available_register(m); - int float_zero = -1; - int float_one = -1; - float scale_signed = 0.0; - float scale_unsigned = 0.0; - - spe_lqd(p, v0, in_ptr, 0 + offset[0]); - spe_lqd(p, v1, in_ptr, 1 + offset[0]); - spe_lqd(p, v2, in_ptr, 2 + offset[0]); - spe_lqd(p, v3, in_ptr, 3 + offset[0]); - offset[0] += 4; - - switch (bytes) { - case 1: - scale_signed = 1.0f / 127.0f; - scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 2: - scale_signed = 1.0f / 32767.0f; - scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 4: - scale_signed = 1.0f / 2147483647.0f; - scale_unsigned = 1.0f / 4294967295.0f; - break; - default: - assert(0); - break; - } - - switch (type) { - case PIPE_FORMAT_TYPE_FLOAT: - break; - case PIPE_FORMAT_TYPE_UNORM: - spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); - spe_cuflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_SNORM: - spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); - spe_csflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_USCALED: - spe_cuflt(p, v0, v0, 0); - break; - case PIPE_FORMAT_TYPE_SSCALED: - spe_csflt(p, v0, v0, 0); - break; - } - - - if (count < 4) { - float_one = allocate_available_register(m); - spe_il(p, float_one, 1); - spe_cuflt(p, float_one, float_one, 0); - - if (count < 3) { - float_zero = allocate_available_register(m); - spe_il(p, float_zero, 0); - } - } - - release_register(m, tmp); - - emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count); - - switch (count) { - case 1: - spe_stqd(p, float_zero, out_ptr, 1); - case 2: - spe_stqd(p, float_zero, out_ptr, 2); - case 3: - spe_stqd(p, float_one, out_ptr, 3); - } - - if (float_zero != -1) { - release_register(m, float_zero); - } - - if (float_one != -1) { - release_register(m, float_one); - } -} - - -void cell_update_vertex_fetch(struct draw_context *draw) -{ - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - register_mask m = ~0; - struct spe_function *p = &cell->attrib_fetch; - unsigned function_index[PIPE_ATTRIB_MAX]; - unsigned unique_attr_formats; - int out_ptr; - int in_ptr; - int shuf_ptr; - unsigned i; - unsigned j; - - - /* Determine how many unique input attribute formats there are. At the - * same time, store the index of the lowest numbered attribute that has - * the same format as any non-unique format. - */ - unique_attr_formats = 1; - function_index[0] = 0; - for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; - - for (j = 0; j < i; j++) { - if (curr_fmt == draw->vertex_element[j].src_format) { - break; - } - } - - if (j == i) { - unique_attr_formats++; - } - - function_index[i] = j; - } - - - /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). That means (34 * 4) = 136 bytes - * each maximum. - */ - spe_init_func(p, 136 * unique_attr_formats); - - - /* Registers 0, 1, and 2 are reserved by the ABI. - */ - allocate_register(&m, 0); - allocate_register(&m, 1); - allocate_register(&m, 2); - - - /* Allocate registers for the function's input parameters. - */ - out_ptr = allocate_register(&m, 3); - in_ptr = allocate_register(&m, 4); - shuf_ptr = allocate_register(&m, 5); - - - /* Generate code for the individual attribute fetch functions. - */ - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - unsigned offset; - - if (function_index[i] == i) { - cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr - - (void *) p->store); - - offset = 0; - emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr, - draw->vertex_element[i].src_format); - spe_bi(p, 0, 0, 0); - - /* Round up to the next 16-byte boundary. - */ - if ((((unsigned) p->store) & 0x0f) != 0) { - const unsigned align = ((unsigned) p->store) & 0x0f; - p->store = (uint32_t *) (((void *) p->store) + align); - } - } else { - /* Use the same function entry-point as a previously seen attribute - * with the same format. - */ - cell->attrib_fetch_offsets[i] = - cell->attrib_fetch_offsets[function_index[i]]; - } - } - - static first_time = 1; - if (first_time) { - first_time = 0; - const unsigned instructions = p->csr - p->store; - for (i = 0; i < instructions; i++) { - printf("\t.long\t0x%08x\n", p->store[i]); - } - } -} -- cgit v1.2.3 From 3320b1874e810583f95b93a89697b2955987b84f Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Fri, 15 Feb 2008 11:03:54 -0800 Subject: Cell: Enable code gen for SPE attribute fetch Doubles are still unsupported. --- src/gallium/drivers/cell/common.h | 15 +- src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_context.h | 4 + src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 15 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 21 +- src/gallium/drivers/cell/spu/spu_main.c | 22 +- src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 477 +--------------------- src/gallium/drivers/cell/spu/spu_vertex_shader.h | 6 +- 8 files changed, 71 insertions(+), 490 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 4de514c3586..74b131fbefc 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -90,6 +90,7 @@ #define CELL_CMD_STATE_VS_ARRAY_INFO 16 #define CELL_CMD_STATE_BLEND 17 #define CELL_CMD_VS_EXECUTE 18 +#define CELL_CMD_STATE_ATTRIB_FETCH 19 #define CELL_NUM_BUFFERS 4 @@ -128,13 +129,19 @@ struct cell_command_clear_surface */ struct cell_array_info { - uint64_t base; /**< Base address of the 0th element. */ - uint attr; /**< Attribute that this state is for. */ - uint pitch; /**< Byte pitch from one entry to the next. */ - uint format; /**< Pipe format of each entry. */ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint size; + uint function_offset; } ALIGN16_ATTRIB; +struct cell_attribute_fetch_code { + uint64_t base; + uint size; +}; + struct cell_shader_info { unsigned num_outputs; diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index a4c3f29e8a4..196ab777f54 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -34,6 +34,7 @@ SOURCES = \ cell_surface.c \ cell_texture.c \ cell_vbuf.c \ + cell_vertex_fetch.c \ cell_vertex_shader.c \ cell_winsys.c diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 6196c0c72f9..91f8e542a25 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "cell_winsys.h" #include "cell/common.h" +#include "ppc/rtasm/spe_asm.h" struct cell_vbuf_render; @@ -111,6 +112,9 @@ struct cell_context /** [4] to ensure 16-byte alignment for each status word */ uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + + struct spe_function attrib_fetch; + unsigned attrib_fetch_offsets[PIPE_ATTRIB_MAX]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index f2432f4ff52..f10689a959e 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -27,10 +27,10 @@ #include "pipe/p_context.h" #include "pipe/p_format.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "../auxiliary/draw/draw_context.h" +#include "../auxiliary/draw/draw_private.h" -#include "pipe/cell/ppu/cell_context.h" +#include "cell_context.h" #include "ppc/rtasm/spe_asm.h" typedef uint64_t register_mask; @@ -380,13 +380,4 @@ void cell_update_vertex_fetch(struct draw_context *draw) cell->attrib_fetch_offsets[function_index[i]]; } } - - static first_time = 1; - if (first_time) { - first_time = 0; - const unsigned instructions = p->csr - p->store; - for (i = 0; i < instructions; i++) { - printf("\t.long\t0x%08x\n", p->store[i]); - } - } } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 0ba4506505e..6a1d3bc20a1 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -55,14 +55,32 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) uint64_t *batch; struct cell_array_info *array_info; unsigned i, j; + struct cell_attribute_fetch_code *cf; assert(draw->vs.queue_nr != 0); /* XXX: do this on statechange: */ draw_update_vertex_fetch(draw); + cell_update_vertex_fetch(draw); + + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); + batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; + cf = (struct cell_attribute_fetch_code *) (&batch[1]); + cf->base = cell->attrib_fetch.store; + cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr + - (void *) cell->attrib_fetch.store)); + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format format = draw->vertex_element[i].src_format; + const unsigned count = ((pf_size_x(format) != 0) + + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + + (pf_size_w(format) != 0)); + const unsigned size = pf_size_x(format) * count; + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; @@ -72,7 +90,8 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; array_info->attr = i; array_info->pitch = draw->vertex_fetch.pitch[i]; - array_info->format = draw->vertex_element[i].src_format; + array_info->size = size; + array_info->function_offset = cell->attrib_fetch_offsets[i]; } batch = cell_batch_alloc(cell, sizeof(batch[0]) diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 1e7243b8639..fcbf0f841e6 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -54,6 +54,9 @@ struct spu_global spu; struct spu_vs_context draw; +static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] + ALIGN16_ATTRIB; + /** * Tell the PPU that this SPU has finished copying a buffer to * local store and that it may be reused by the PPU. @@ -306,7 +309,8 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info) ASSERT(attr < PIPE_ATTRIB_MAX); draw.vertex_fetch.src_ptr[attr] = vs_info->base; draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.format[attr] = vs_info->format; + draw.vertex_fetch.size[attr] = vs_info->size; + draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; draw.vertex_fetch.dirty = 1; } @@ -433,6 +437,22 @@ cmd_batch(uint opcode) cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; + case CELL_CMD_STATE_ATTRIB_FETCH: { + struct cell_attribute_fetch_code *code = + (struct cell_attribute_fetch_code *) &buffer[pos+1]; + + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; + pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); + break; + } default: printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); ASSERT(0); diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 45e3c26c001..55c6c287175 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * (C) Copyright IBM Corporation 2008 * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -28,10 +29,10 @@ /* * Authors: * Keith Whitwell <keith@tungstengraphics.com> + * Ian Romanick <idr@us.ibm.com> */ #include <spu_mfcio.h> -#include <transpose_matrix4x4.h> #include "pipe/p_util.h" #include "pipe/p_state.h" @@ -59,6 +60,10 @@ #define DRAW_DBG 0 +typedef void (*spu_fetch_func)(qword *out, const qword *in, + const qword *shuffle_data); + + static const qword fetch_shuffle_data[] = { /* Shuffle used by CVT_64_FLOAT */ @@ -97,22 +102,6 @@ static const qword fetch_shuffle_data[] = { }; -static INLINE void -trans4x4(qword row0, qword row1, qword row2, qword row3, qword *out, - const qword *shuffle) -{ - qword t1 = si_shufb(row0, row2, shuffle[3]); - qword t2 = si_shufb(row0, row2, shuffle[4]); - qword t3 = si_shufb(row1, row3, shuffle[3]); - qword t4 = si_shufb(row1, row3, shuffle[4]); - - out[0] = si_shufb(t1, t3, shuffle[3]); - out[1] = si_shufb(t1, t3, shuffle[4]); - out[2] = si_shufb(t2, t4, shuffle[3]); - out[3] = si_shufb(t2, t4, shuffle[4]); -} - - /** * Fetch between 1 and 32 bytes from an unaligned address */ @@ -151,446 +140,6 @@ fetch_unaligned(qword *dst, unsigned ea, unsigned size) } -#define CVT_32_FLOAT(q, s) (*(q)) - -static INLINE qword -CVT_64_FLOAT(const qword *qw, const qword *shuffle) -{ - qword a = si_frds(qw[0]); - qword b = si_frds(si_rotqbyi(qw[0], 8)); - qword c = si_frds(qw[1]); - qword d = si_frds(si_rotqbyi(qw[1], 8)); - - qword ab = si_shufb(a, b, shuffle[0]); - qword cd = si_shufb(c, d, si_rotqbyi(shuffle[0], 8)); - - return si_or(ab, cd); -} - - -static INLINE qword -CVT_8_USCALED(const qword *qw, const qword *shuffle) -{ - return si_cuflt(si_shufb(*qw, *qw, shuffle[1]), 0); -} - - -static INLINE qword -CVT_16_USCALED(const qword *qw, const qword *shuffle) -{ - return si_cuflt(si_shufb(*qw, *qw, shuffle[2]), 0); -} - - -static INLINE qword -CVT_32_USCALED(const qword *qw, const qword *shuffle) -{ - (void) shuffle; - return si_cuflt(*qw, 0); -} - -static INLINE qword -CVT_8_SSCALED(const qword *qw, const qword *shuffle) -{ - return si_csflt(si_shufb(*qw, *qw, shuffle[1]), 0); -} - - -static INLINE qword -CVT_16_SSCALED(const qword *qw, const qword *shuffle) -{ - return si_csflt(si_shufb(*qw, *qw, shuffle[2]), 0); -} - - -static INLINE qword -CVT_32_SSCALED(const qword *qw, const qword *shuffle) -{ - (void) shuffle; - return si_csflt(*qw, 0); -} - - -static INLINE qword -CVT_8_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 255.0f); - return si_fm(CVT_8_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_16_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 65535.0f); - return si_fm(CVT_16_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_32_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 4294967295.0f); - return si_fm(CVT_32_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_8_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 127.0f); - return si_fm(CVT_8_SSCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_16_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 32767.0f); - return si_fm(CVT_16_SSCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_32_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 2147483647.0f); - return si_fm(CVT_32_SSCALED(qw, shuffle), scale); -} - -#define SZ_4 si_il(0U) -#define SZ_3 si_fsmbi(0x000f) -#define SZ_2 si_fsmbi(0x00ff) -#define SZ_1 si_fsmbi(0x0fff) - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * - * This is probably needed/dupliocated elsewhere, eg format - * conversion, texture sampling etc. - */ -#define FETCH_ATTRIB( NAME, SZ, CVT, N ) \ -static void \ -fetch_##NAME(qword *out, const qword *in, qword defaults, \ - const qword *shuffle) \ -{ \ - qword tmp[4]; \ - \ - tmp[0] = si_selb(CVT(in + (0 * N), shuffle), defaults, SZ); \ - tmp[1] = si_selb(CVT(in + (1 * N), shuffle), defaults, SZ); \ - tmp[2] = si_selb(CVT(in + (2 * N), shuffle), defaults, SZ); \ - tmp[3] = si_selb(CVT(in + (3 * N), shuffle), defaults, SZ); \ - trans4x4(tmp[0], tmp[1], tmp[2], tmp[3], out, shuffle); \ -} - - -FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT, 2 ) - -FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT, 1 ) - -FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED, 1 ) - -FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED, 1 ) - -FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM, 1 ) - -FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM, 1 ) - -FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED, 1 ) - -FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED, 1 ) - -FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM, 1 ) - -FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM, 1 ) - -FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED, 1 ) - -FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED, 1 ) - -FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM, 1 ) - -FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM, 1 ) - -FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM, 1 ) - - - -static spu_fetch_func get_fetch_func( enum pipe_format format ) -{ - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; - - case 0: - return NULL; /* not sure why this is needed */ - - default: - assert(0); - return NULL; - } -} - - -static unsigned get_vertex_size( enum pipe_format format ) -{ - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return 8; - case PIPE_FORMAT_R64G64_FLOAT: - return 2 * 8; - case PIPE_FORMAT_R64G64B64_FLOAT: - return 3 * 8; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return 4 * 8; - - case PIPE_FORMAT_R32_SSCALED: - case PIPE_FORMAT_R32_SNORM: - case PIPE_FORMAT_R32_USCALED: - case PIPE_FORMAT_R32_UNORM: - case PIPE_FORMAT_R32_FLOAT: - return 4; - case PIPE_FORMAT_R32G32_SSCALED: - case PIPE_FORMAT_R32G32_SNORM: - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - return 2 * 4; - case PIPE_FORMAT_R32G32B32_SSCALED: - case PIPE_FORMAT_R32G32B32_SNORM: - case PIPE_FORMAT_R32G32B32_USCALED: - case PIPE_FORMAT_R32G32B32_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - return 3 * 4; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_USCALED: - case PIPE_FORMAT_R32G32B32A32_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return 4 * 4; - - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16_USCALED: - return 2; - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16_USCALED: - case PIPE_FORMAT_R16G16_UNORM: - return 2 * 2; - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16_UNORM: - return 3 * 2; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16A16_UNORM: - return 4 * 2; - - case PIPE_FORMAT_R8_SSCALED: - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8_USCALED: - case PIPE_FORMAT_R8_UNORM: - return 1; - case PIPE_FORMAT_R8G8_SSCALED: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8G8_USCALED: - case PIPE_FORMAT_R8G8_UNORM: - return 2 * 1; - case PIPE_FORMAT_R8G8B8_SSCALED: - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8G8B8_USCALED: - case PIPE_FORMAT_R8G8B8_UNORM: - return 3 * 1; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SSCALED: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_USCALED: - case PIPE_FORMAT_R8G8B8A8_UNORM: - return 4 * 1; - - case 0: - return 0; /* not sure why this is needed */ - - default: - assert(0); - return 0; - } -} - - /** * Fetch vertex attributes for 'count' vertices. */ @@ -612,10 +161,10 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* loop over vertex attributes (vertex shader inputs) */ for (attr = 0; attr < nr_attrs; attr++) { - const qword default_values = (qword)(vec_float4){ 0.0, 0.0, 0.0, 1.0 }; const unsigned pitch = draw->vertex_fetch.pitch[attr]; const uint64_t src = draw->vertex_fetch.src_ptr[attr]; - const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; + const spu_fetch_func fetch = (spu_fetch_func) + (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); unsigned i; unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; @@ -644,8 +193,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* Convert all 4 vertices to vectors of float. */ - (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, default_values, - fetch_shuffle_data); + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); } } @@ -662,12 +210,5 @@ void spu_update_vertex_fetch( struct spu_vs_context *draw ) } - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - draw->vertex_fetch.fetch[i] = - get_fetch_func(draw->vertex_fetch.format[i]); - draw->vertex_fetch.size[i] = - get_vertex_size(draw->vertex_fetch.format[i]); - } - draw->vertex_fetch.fetch_func = generic_vertex_fetch; } diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h index b5bf31e67db..0fb0bc28d03 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -6,8 +6,6 @@ struct spu_vs_context; -typedef void (*spu_fetch_func)(qword *out, const qword *in, qword defaults, - const qword *shuffle_data); typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, struct spu_exec_machine *machine, const unsigned *elts, @@ -20,12 +18,12 @@ struct spu_vs_context { uint64_t src_ptr[PIPE_ATTRIB_MAX]; unsigned pitch[PIPE_ATTRIB_MAX]; unsigned size[PIPE_ATTRIB_MAX]; - enum pipe_format format[PIPE_ATTRIB_MAX]; + unsigned code_offset[PIPE_ATTRIB_MAX]; unsigned nr_attrs; boolean dirty; - spu_fetch_func fetch[PIPE_ATTRIB_MAX]; spu_full_fetch_func fetch_func; + void *code; } vertex_fetch; /* Clip derived state: -- cgit v1.2.3 From 26add9288c88108e3485ffc57c51ea9bdc0ee719 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 16 Feb 2008 17:23:12 +1100 Subject: nouveau: match gallium code reorginisation. That was... fun.. --- src/gallium/drivers/Makefile | 3 +- src/gallium/drivers/nouveau/nouveau_bo.h | 51 + src/gallium/drivers/nouveau/nouveau_channel.h | 39 + src/gallium/drivers/nouveau/nouveau_class.h | 6134 ++++++++++++++++++++ src/gallium/drivers/nouveau/nouveau_gldefs.h | 196 + src/gallium/drivers/nouveau/nouveau_grobj.h | 35 + src/gallium/drivers/nouveau/nouveau_notifier.h | 43 + src/gallium/drivers/nouveau/nouveau_push.h | 83 + src/gallium/drivers/nouveau/nouveau_pushbuf.h | 32 + src/gallium/drivers/nouveau/nouveau_resource.h | 37 + src/gallium/drivers/nouveau/nouveau_stateobj.h | 141 + src/gallium/drivers/nouveau/nouveau_winsys.h | 61 + src/gallium/drivers/nv30/Makefile | 29 + src/gallium/drivers/nv30/nv30_clear.c | 12 + src/gallium/drivers/nv30/nv30_context.c | 431 ++ src/gallium/drivers/nv30/nv30_context.h | 136 + src/gallium/drivers/nv30/nv30_draw.c | 62 + src/gallium/drivers/nv30/nv30_fragprog.c | 835 +++ src/gallium/drivers/nv30/nv30_fragtex.c | 160 + src/gallium/drivers/nv30/nv30_miptree.c | 105 + src/gallium/drivers/nv30/nv30_query.c | 113 + src/gallium/drivers/nv30/nv30_shader.h | 490 ++ src/gallium/drivers/nv30/nv30_state.c | 739 +++ src/gallium/drivers/nv30/nv30_state.h | 147 + src/gallium/drivers/nv30/nv30_state_emit.c | 83 + src/gallium/drivers/nv30/nv30_surface.c | 137 + src/gallium/drivers/nv30/nv30_vbo.c | 425 ++ src/gallium/drivers/nv30/nv30_vertprog.c | 777 +++ src/gallium/drivers/nv40/Makefile | 29 + src/gallium/drivers/nv40/nv40_clear.c | 12 + src/gallium/drivers/nv40/nv40_context.c | 312 + src/gallium/drivers/nv40/nv40_context.h | 153 + src/gallium/drivers/nv40/nv40_dma.h | 66 + src/gallium/drivers/nv40/nv40_draw.c | 62 + src/gallium/drivers/nv40/nv40_fragprog.c | 842 +++ src/gallium/drivers/nv40/nv40_fragtex.c | 151 + src/gallium/drivers/nv40/nv40_miptree.c | 104 + src/gallium/drivers/nv40/nv40_query.c | 113 + src/gallium/drivers/nv40/nv40_shader.h | 554 ++ src/gallium/drivers/nv40/nv40_state.c | 823 +++ src/gallium/drivers/nv40/nv40_state.h | 80 + src/gallium/drivers/nv40/nv40_state_emit.c | 77 + src/gallium/drivers/nv40/nv40_surface.c | 137 + src/gallium/drivers/nv40/nv40_vbo.c | 424 ++ src/gallium/drivers/nv40/nv40_vertprog.c | 790 +++ src/gallium/drivers/nv50/Makefile | 25 + src/gallium/drivers/nv50/nv50_clear.c | 12 + src/gallium/drivers/nv50/nv50_context.c | 202 + src/gallium/drivers/nv50/nv50_context.h | 57 + src/gallium/drivers/nv50/nv50_draw.c | 55 + src/gallium/drivers/nv50/nv50_miptree.c | 25 + src/gallium/drivers/nv50/nv50_query.c | 47 + src/gallium/drivers/nv50/nv50_state.c | 213 + src/gallium/drivers/nv50/nv50_state.h | 7 + src/gallium/drivers/nv50/nv50_surface.c | 75 + src/gallium/drivers/nv50/nv50_vbo.c | 24 + src/gallium/winsys/dri/nouveau/Makefile | 43 + src/gallium/winsys/dri/nouveau/nouveau_bo.c | 402 ++ src/gallium/winsys/dri/nouveau/nouveau_channel.c | 118 + src/gallium/winsys/dri/nouveau/nouveau_context.c | 206 + src/gallium/winsys/dri/nouveau/nouveau_context.h | 89 + src/gallium/winsys/dri/nouveau/nouveau_device.c | 146 + src/gallium/winsys/dri/nouveau/nouveau_device.h | 29 + src/gallium/winsys/dri/nouveau/nouveau_dma.c | 219 + src/gallium/winsys/dri/nouveau/nouveau_dma.h | 143 + src/gallium/winsys/dri/nouveau/nouveau_dri.h | 28 + src/gallium/winsys/dri/nouveau/nouveau_drmif.h | 304 + src/gallium/winsys/dri/nouveau/nouveau_fence.c | 215 + src/gallium/winsys/dri/nouveau/nouveau_grobj.c | 107 + src/gallium/winsys/dri/nouveau/nouveau_local.h | 89 + src/gallium/winsys/dri/nouveau/nouveau_lock.c | 94 + src/gallium/winsys/dri/nouveau/nouveau_notifier.c | 137 + src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c | 261 + src/gallium/winsys/dri/nouveau/nouveau_resource.c | 111 + src/gallium/winsys/dri/nouveau/nouveau_screen.c | 308 + src/gallium/winsys/dri/nouveau/nouveau_screen.h | 19 + .../winsys/dri/nouveau/nouveau_swapbuffers.c | 86 + .../winsys/dri/nouveau/nouveau_swapbuffers.h | 10 + src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 124 + .../winsys/dri/nouveau/nouveau_winsys_pipe.c | 196 + .../winsys/dri/nouveau/nouveau_winsys_pipe.h | 34 + .../winsys/dri/nouveau/nouveau_winsys_softpipe.c | 83 + src/gallium/winsys/dri/nouveau/nv04_surface.c | 226 + src/gallium/winsys/dri/nouveau/nv50_surface.c | 160 + src/mesa/drivers/dri/nouveau_winsys/Makefile | 43 - src/mesa/drivers/dri/nouveau_winsys/nouveau_bo.c | 402 -- .../drivers/dri/nouveau_winsys/nouveau_channel.c | 118 - .../drivers/dri/nouveau_winsys/nouveau_context.c | 206 - .../drivers/dri/nouveau_winsys/nouveau_context.h | 89 - .../drivers/dri/nouveau_winsys/nouveau_device.c | 146 - .../drivers/dri/nouveau_winsys/nouveau_device.h | 29 - src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.c | 219 - src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.h | 143 - src/mesa/drivers/dri/nouveau_winsys/nouveau_dri.h | 28 - .../drivers/dri/nouveau_winsys/nouveau_drmif.h | 304 - .../drivers/dri/nouveau_winsys/nouveau_fence.c | 215 - .../drivers/dri/nouveau_winsys/nouveau_grobj.c | 107 - .../drivers/dri/nouveau_winsys/nouveau_local.h | 89 - src/mesa/drivers/dri/nouveau_winsys/nouveau_lock.c | 94 - .../drivers/dri/nouveau_winsys/nouveau_notifier.c | 137 - .../drivers/dri/nouveau_winsys/nouveau_pushbuf.c | 261 - .../drivers/dri/nouveau_winsys/nouveau_resource.c | 111 - .../drivers/dri/nouveau_winsys/nouveau_screen.c | 308 - .../drivers/dri/nouveau_winsys/nouveau_screen.h | 19 - .../dri/nouveau_winsys/nouveau_swapbuffers.c | 86 - .../dri/nouveau_winsys/nouveau_swapbuffers.h | 10 - .../drivers/dri/nouveau_winsys/nouveau_winsys.c | 124 - .../dri/nouveau_winsys/nouveau_winsys_pipe.c | 196 - .../dri/nouveau_winsys/nouveau_winsys_pipe.h | 34 - .../dri/nouveau_winsys/nouveau_winsys_softpipe.c | 83 - src/mesa/drivers/dri/nouveau_winsys/nv04_surface.c | 226 - src/mesa/drivers/dri/nouveau_winsys/nv50_surface.c | 160 - src/mesa/pipe/nouveau/nouveau_bo.h | 51 - src/mesa/pipe/nouveau/nouveau_channel.h | 39 - src/mesa/pipe/nouveau/nouveau_class.h | 6134 -------------------- src/mesa/pipe/nouveau/nouveau_gldefs.h | 196 - src/mesa/pipe/nouveau/nouveau_grobj.h | 35 - src/mesa/pipe/nouveau/nouveau_notifier.h | 43 - src/mesa/pipe/nouveau/nouveau_push.h | 83 - src/mesa/pipe/nouveau/nouveau_pushbuf.h | 32 - src/mesa/pipe/nouveau/nouveau_resource.h | 37 - src/mesa/pipe/nouveau/nouveau_stateobj.h | 141 - src/mesa/pipe/nouveau/nouveau_winsys.h | 61 - src/mesa/pipe/nv30/Makefile | 29 - src/mesa/pipe/nv30/nv30_clear.c | 12 - src/mesa/pipe/nv30/nv30_context.c | 431 -- src/mesa/pipe/nv30/nv30_context.h | 136 - src/mesa/pipe/nv30/nv30_draw.c | 62 - src/mesa/pipe/nv30/nv30_fragprog.c | 835 --- src/mesa/pipe/nv30/nv30_fragtex.c | 160 - src/mesa/pipe/nv30/nv30_miptree.c | 105 - src/mesa/pipe/nv30/nv30_query.c | 113 - src/mesa/pipe/nv30/nv30_shader.h | 490 -- src/mesa/pipe/nv30/nv30_state.c | 739 --- src/mesa/pipe/nv30/nv30_state.h | 147 - src/mesa/pipe/nv30/nv30_state_emit.c | 83 - src/mesa/pipe/nv30/nv30_surface.c | 136 - src/mesa/pipe/nv30/nv30_vbo.c | 425 -- src/mesa/pipe/nv30/nv30_vertprog.c | 777 --- src/mesa/pipe/nv40/Makefile | 29 - src/mesa/pipe/nv40/nv40_clear.c | 12 - src/mesa/pipe/nv40/nv40_context.c | 312 - src/mesa/pipe/nv40/nv40_context.h | 153 - src/mesa/pipe/nv40/nv40_dma.h | 66 - src/mesa/pipe/nv40/nv40_draw.c | 62 - src/mesa/pipe/nv40/nv40_fragprog.c | 842 --- src/mesa/pipe/nv40/nv40_fragtex.c | 151 - src/mesa/pipe/nv40/nv40_miptree.c | 104 - src/mesa/pipe/nv40/nv40_query.c | 113 - src/mesa/pipe/nv40/nv40_shader.h | 554 -- src/mesa/pipe/nv40/nv40_state.c | 823 --- src/mesa/pipe/nv40/nv40_state.h | 80 - src/mesa/pipe/nv40/nv40_state_emit.c | 77 - src/mesa/pipe/nv40/nv40_surface.c | 136 - src/mesa/pipe/nv40/nv40_vbo.c | 424 -- src/mesa/pipe/nv40/nv40_vertprog.c | 790 --- src/mesa/pipe/nv50/Makefile | 25 - src/mesa/pipe/nv50/nv50_clear.c | 12 - src/mesa/pipe/nv50/nv50_context.c | 202 - src/mesa/pipe/nv50/nv50_context.h | 57 - src/mesa/pipe/nv50/nv50_draw.c | 55 - src/mesa/pipe/nv50/nv50_miptree.c | 25 - src/mesa/pipe/nv50/nv50_query.c | 47 - src/mesa/pipe/nv50/nv50_state.c | 213 - src/mesa/pipe/nv50/nv50_state.h | 7 - src/mesa/pipe/nv50/nv50_surface.c | 74 - src/mesa/pipe/nv50/nv50_vbo.c | 24 - 167 files changed, 20993 insertions(+), 20989 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nouveau_bo.h create mode 100644 src/gallium/drivers/nouveau/nouveau_channel.h create mode 100644 src/gallium/drivers/nouveau/nouveau_class.h create mode 100644 src/gallium/drivers/nouveau/nouveau_gldefs.h create mode 100644 src/gallium/drivers/nouveau/nouveau_grobj.h create mode 100644 src/gallium/drivers/nouveau/nouveau_notifier.h create mode 100644 src/gallium/drivers/nouveau/nouveau_push.h create mode 100644 src/gallium/drivers/nouveau/nouveau_pushbuf.h create mode 100644 src/gallium/drivers/nouveau/nouveau_resource.h create mode 100644 src/gallium/drivers/nouveau/nouveau_stateobj.h create mode 100644 src/gallium/drivers/nouveau/nouveau_winsys.h create mode 100644 src/gallium/drivers/nv30/Makefile create mode 100644 src/gallium/drivers/nv30/nv30_clear.c create mode 100644 src/gallium/drivers/nv30/nv30_context.c create mode 100644 src/gallium/drivers/nv30/nv30_context.h create mode 100644 src/gallium/drivers/nv30/nv30_draw.c create mode 100644 src/gallium/drivers/nv30/nv30_fragprog.c create mode 100644 src/gallium/drivers/nv30/nv30_fragtex.c create mode 100644 src/gallium/drivers/nv30/nv30_miptree.c create mode 100644 src/gallium/drivers/nv30/nv30_query.c create mode 100644 src/gallium/drivers/nv30/nv30_shader.h create mode 100644 src/gallium/drivers/nv30/nv30_state.c create mode 100644 src/gallium/drivers/nv30/nv30_state.h create mode 100644 src/gallium/drivers/nv30/nv30_state_emit.c create mode 100644 src/gallium/drivers/nv30/nv30_surface.c create mode 100644 src/gallium/drivers/nv30/nv30_vbo.c create mode 100644 src/gallium/drivers/nv30/nv30_vertprog.c create mode 100644 src/gallium/drivers/nv40/Makefile create mode 100644 src/gallium/drivers/nv40/nv40_clear.c create mode 100644 src/gallium/drivers/nv40/nv40_context.c create mode 100644 src/gallium/drivers/nv40/nv40_context.h create mode 100644 src/gallium/drivers/nv40/nv40_dma.h create mode 100644 src/gallium/drivers/nv40/nv40_draw.c create mode 100644 src/gallium/drivers/nv40/nv40_fragprog.c create mode 100644 src/gallium/drivers/nv40/nv40_fragtex.c create mode 100644 src/gallium/drivers/nv40/nv40_miptree.c create mode 100644 src/gallium/drivers/nv40/nv40_query.c create mode 100644 src/gallium/drivers/nv40/nv40_shader.h create mode 100644 src/gallium/drivers/nv40/nv40_state.c create mode 100644 src/gallium/drivers/nv40/nv40_state.h create mode 100644 src/gallium/drivers/nv40/nv40_state_emit.c create mode 100644 src/gallium/drivers/nv40/nv40_surface.c create mode 100644 src/gallium/drivers/nv40/nv40_vbo.c create mode 100644 src/gallium/drivers/nv40/nv40_vertprog.c create mode 100644 src/gallium/drivers/nv50/Makefile create mode 100644 src/gallium/drivers/nv50/nv50_clear.c create mode 100644 src/gallium/drivers/nv50/nv50_context.c create mode 100644 src/gallium/drivers/nv50/nv50_context.h create mode 100644 src/gallium/drivers/nv50/nv50_draw.c create mode 100644 src/gallium/drivers/nv50/nv50_miptree.c create mode 100644 src/gallium/drivers/nv50/nv50_query.c create mode 100644 src/gallium/drivers/nv50/nv50_state.c create mode 100644 src/gallium/drivers/nv50/nv50_state.h create mode 100644 src/gallium/drivers/nv50/nv50_surface.c create mode 100644 src/gallium/drivers/nv50/nv50_vbo.c create mode 100644 src/gallium/winsys/dri/nouveau/Makefile create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_bo.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_channel.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_context.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_context.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_device.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_device.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_dma.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_dma.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_dri.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_drmif.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_fence.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_grobj.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_local.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_lock.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_notifier.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_resource.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_screen.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_screen.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_winsys.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.h create mode 100644 src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c create mode 100644 src/gallium/winsys/dri/nouveau/nv04_surface.c create mode 100644 src/gallium/winsys/dri/nouveau/nv50_surface.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/Makefile delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_bo.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_channel.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_context.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_context.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_device.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_device.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_dri.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_drmif.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_fence.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_grobj.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_local.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_lock.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_notifier.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_pushbuf.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_resource.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_screen.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_screen.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_swapbuffers.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_swapbuffers.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_pipe.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_pipe.h delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_softpipe.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nv04_surface.c delete mode 100644 src/mesa/drivers/dri/nouveau_winsys/nv50_surface.c delete mode 100644 src/mesa/pipe/nouveau/nouveau_bo.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_channel.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_class.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_gldefs.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_grobj.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_notifier.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_push.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_pushbuf.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_resource.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_stateobj.h delete mode 100644 src/mesa/pipe/nouveau/nouveau_winsys.h delete mode 100644 src/mesa/pipe/nv30/Makefile delete mode 100644 src/mesa/pipe/nv30/nv30_clear.c delete mode 100644 src/mesa/pipe/nv30/nv30_context.c delete mode 100644 src/mesa/pipe/nv30/nv30_context.h delete mode 100644 src/mesa/pipe/nv30/nv30_draw.c delete mode 100644 src/mesa/pipe/nv30/nv30_fragprog.c delete mode 100644 src/mesa/pipe/nv30/nv30_fragtex.c delete mode 100644 src/mesa/pipe/nv30/nv30_miptree.c delete mode 100644 src/mesa/pipe/nv30/nv30_query.c delete mode 100644 src/mesa/pipe/nv30/nv30_shader.h delete mode 100644 src/mesa/pipe/nv30/nv30_state.c delete mode 100644 src/mesa/pipe/nv30/nv30_state.h delete mode 100644 src/mesa/pipe/nv30/nv30_state_emit.c delete mode 100644 src/mesa/pipe/nv30/nv30_surface.c delete mode 100644 src/mesa/pipe/nv30/nv30_vbo.c delete mode 100644 src/mesa/pipe/nv30/nv30_vertprog.c delete mode 100644 src/mesa/pipe/nv40/Makefile delete mode 100644 src/mesa/pipe/nv40/nv40_clear.c delete mode 100644 src/mesa/pipe/nv40/nv40_context.c delete mode 100644 src/mesa/pipe/nv40/nv40_context.h delete mode 100644 src/mesa/pipe/nv40/nv40_dma.h delete mode 100644 src/mesa/pipe/nv40/nv40_draw.c delete mode 100644 src/mesa/pipe/nv40/nv40_fragprog.c delete mode 100644 src/mesa/pipe/nv40/nv40_fragtex.c delete mode 100644 src/mesa/pipe/nv40/nv40_miptree.c delete mode 100644 src/mesa/pipe/nv40/nv40_query.c delete mode 100644 src/mesa/pipe/nv40/nv40_shader.h delete mode 100644 src/mesa/pipe/nv40/nv40_state.c delete mode 100644 src/mesa/pipe/nv40/nv40_state.h delete mode 100644 src/mesa/pipe/nv40/nv40_state_emit.c delete mode 100644 src/mesa/pipe/nv40/nv40_surface.c delete mode 100644 src/mesa/pipe/nv40/nv40_vbo.c delete mode 100644 src/mesa/pipe/nv40/nv40_vertprog.c delete mode 100644 src/mesa/pipe/nv50/Makefile delete mode 100644 src/mesa/pipe/nv50/nv50_clear.c delete mode 100644 src/mesa/pipe/nv50/nv50_context.c delete mode 100644 src/mesa/pipe/nv50/nv50_context.h delete mode 100644 src/mesa/pipe/nv50/nv50_draw.c delete mode 100644 src/mesa/pipe/nv50/nv50_miptree.c delete mode 100644 src/mesa/pipe/nv50/nv50_query.c delete mode 100644 src/mesa/pipe/nv50/nv50_state.c delete mode 100644 src/mesa/pipe/nv50/nv50_state.h delete mode 100644 src/mesa/pipe/nv50/nv50_surface.c delete mode 100644 src/mesa/pipe/nv50/nv50_vbo.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile index c0345a9cb54..58df6c50093 100644 --- a/src/gallium/drivers/Makefile +++ b/src/gallium/drivers/Makefile @@ -6,7 +6,8 @@ ifeq ($(CONFIG_NAME), linux-cell) CELL_DIR = cell endif -SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) +SUBDIRS = softpipe i915simple i965simple nv30 nv40 nv50 \ + failover pipebuffer $(CELL_DIR) default: subdirs diff --git a/src/gallium/drivers/nouveau/nouveau_bo.h b/src/gallium/drivers/nouveau/nouveau_bo.h new file mode 100644 index 00000000000..18020e9c652 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_bo.h @@ -0,0 +1,51 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_BO_H__ +#define __NOUVEAU_BO_H__ + +/* Relocation/Buffer type flags */ +#define NOUVEAU_BO_VRAM (1 << 0) +#define NOUVEAU_BO_GART (1 << 1) +#define NOUVEAU_BO_RD (1 << 2) +#define NOUVEAU_BO_WR (1 << 3) +#define NOUVEAU_BO_RDWR (NOUVEAU_BO_RD | NOUVEAU_BO_WR) +#define NOUVEAU_BO_MAP (1 << 4) +#define NOUVEAU_BO_PIN (1 << 5) +#define NOUVEAU_BO_LOW (1 << 6) +#define NOUVEAU_BO_HIGH (1 << 7) +#define NOUVEAU_BO_OR (1 << 8) +#define NOUVEAU_BO_LOCAL (1 << 9) +#define NOUVEAU_BO_DUMMY (1 << 31) + +struct nouveau_bo { + struct nouveau_device *device; + uint64_t handle; + + uint64_t size; + void *map; + + uint32_t flags; + uint64_t offset; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_channel.h b/src/gallium/drivers/nouveau/nouveau_channel.h new file mode 100644 index 00000000000..b99de9add86 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_channel.h @@ -0,0 +1,39 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_CHANNEL_H__ +#define __NOUVEAU_CHANNEL_H__ + +struct nouveau_channel { + struct nouveau_device *device; + int id; + + struct nouveau_pushbuf *pushbuf; + + struct nouveau_grobj *vram; + struct nouveau_grobj *gart; + + void *user_private; + void (*hang_notify)(struct nouveau_channel *); +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h new file mode 100644 index 00000000000..5998945677b --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -0,0 +1,6134 @@ +/************************************************************************* + + Autogenerated file, do not edit ! + +************************************************************************** + + Copyright (C) 2006-2007 : + Dmitry Baryshkov, + Laurent Carlier, + Matthieu Castet, + Dawid Gajownik, + Jeremy Kolb, + Stephane Loeuillet, + Patrice Mandin, + Stephane Marchesin, + Serge Martin, + Sylvain Munaut, + Simon Raffeiner, + Ben Skeggs, + Erik Waling, + koala_br, + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*************************************************************************/ + + +#ifndef NOUVEAU_REG_H +#define NOUVEAU_REG_H 1 + + +#define NV01_ROOT 0x00000001 + + + +#define NV01_CONTEXT_DMA 0x00000002 + + + +#define NV01_DEVICE 0x00000003 + + + +#define NV01_TIMER 0x00000004 + +#define NV01_TIMER_SYNCHRONIZE 0x00000100 +#define NV01_TIMER_STOP_ALARM 0x00000104 +#define NV01_TIMER_DMA_NOTIFY 0x00000180 +#define NV01_TIMER_TIME(x) (0x00000300+((x)*4)) +#define NV01_TIMER_TIME__SIZE 0x00000002 +#define NV01_TIMER_ALARM_NOTIFY 0x00000308 + + +#define NV_IMAGE_STENCIL 0x00000010 + +#define NV_IMAGE_STENCIL_NOTIFY 0x00000104 +#define NV_IMAGE_STENCIL_DMA_NOTIFY 0x00000180 +#define NV_IMAGE_STENCIL_IMAGE_OUTPUT 0x00000200 +#define NV_IMAGE_STENCIL_IMAGE_INPUT(x) (0x00000204+((x)*4)) +#define NV_IMAGE_STENCIL_IMAGE_INPUT__SIZE 0x00000002 + + +#define NV_IMAGE_BLEND_AND 0x00000011 + +#define NV_IMAGE_BLEND_AND_NOP 0x00000100 +#define NV_IMAGE_BLEND_AND_NOTIFY 0x00000104 +#define NV_IMAGE_BLEND_AND_DMA_NOTIFY 0x00000180 +#define NV_IMAGE_BLEND_AND_IMAGE_OUTPUT 0x00000200 +#define NV_IMAGE_BLEND_AND_BETA_INPUT 0x00000204 +#define NV_IMAGE_BLEND_AND_IMAGE_INPUT 0x00000208 + + +#define NV01_CONTEXT_BETA1 0x00000012 + +#define NV01_CONTEXT_BETA1_NOP 0x00000100 +#define NV01_CONTEXT_BETA1_NOTIFY 0x00000104 +#define NV01_CONTEXT_BETA1_DMA_NOTIFY 0x00000180 +#define NV01_CONTEXT_BETA1_BETA_1D31 0x00000300 + + +#define NV_IMAGE_ROP_AND 0x00000013 + +#define NV_IMAGE_ROP_AND_NOTIFY 0x00000104 +#define NV_IMAGE_ROP_AND_DMA_NOTIFY 0x00000180 +#define NV_IMAGE_ROP_AND_IMAGE_OUTPUT 0x00000200 +#define NV_IMAGE_ROP_AND_ROP_INPUT 0x00000204 +#define NV_IMAGE_ROP_AND_IMAGE_INPUT(x) (0x00000208+((x)*4)) +#define NV_IMAGE_ROP_AND_IMAGE_INPUT__SIZE 0x00000002 + + +#define NV_IMAGE_COLOR_KEY 0x00000015 + + + +#define NV01_CONTEXT_COLOR_KEY 0x00000017 + +#define NV01_CONTEXT_COLOR_KEY_NOP 0x00000100 +#define NV01_CONTEXT_COLOR_KEY_NOTIFY 0x00000104 +#define NV01_CONTEXT_COLOR_KEY_DMA_NOTIFY 0x00000180 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT 0x00000300 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A8Y8 0x00000001 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X24Y8 0x00000002 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A1R5G5B5 0x00000003 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X17R5G5B5 0x00000004 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A8R8G8B8 0x00000005 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X8R8G8B8 0x00000006 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A16Y16 0x00000007 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16Y16 0x00000008 +#define NV01_CONTEXT_COLOR_KEY_COLOR 0x00000304 + + +#define NV01_CONTEXT_PATTERN 0x00000018 + +#define NV01_CONTEXT_PATTERN_NOP 0x00000100 +#define NV01_CONTEXT_PATTERN_NOTIFY 0x00000104 +#define NV01_CONTEXT_PATTERN_DMA_NOTIFY 0x00000180 +#define NV01_CONTEXT_PATTERN_COLOR_FORMAT 0x00000300 +#define NV01_CONTEXT_PATTERN_MONOCHROME_FORMAT 0x00000304 +#define NV01_CONTEXT_PATTERN_SHAPE 0x00000308 +#define NV01_CONTEXT_PATTERN_COLOR(x) (0x00000310+((x)*4)) +#define NV01_CONTEXT_PATTERN_COLOR__SIZE 0x00000002 +#define NV01_CONTEXT_PATTERN_PATTERN(x) (0x00000318+((x)*4)) +#define NV01_CONTEXT_PATTERN_PATTERN__SIZE 0x00000002 + + +#define NV01_CONTEXT_CLIP_RECTANGLE 0x00000019 + +#define NV01_CONTEXT_CLIP_RECTANGLE_NOP 0x00000100 +#define NV01_CONTEXT_CLIP_RECTANGLE_NOTIFY 0x00000104 +#define NV01_CONTEXT_CLIP_RECTANGLE_DMA_NOTIFY 0x00000180 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT 0x00000300 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_SHIFT 0 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_MASK 0x0000ffff +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_SHIFT 16 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_MASK 0xffff0000 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE 0x00000304 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_SHIFT 0 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_MASK 0x0000ffff +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_SHIFT 16 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_MASK 0xffff0000 + + +#define NV01_RENDER_SOLID_LINE 0x0000001c + +#define NV01_RENDER_SOLID_LINE_NOP 0x00000100 +#define NV01_RENDER_SOLID_LINE_NOTIFY 0x00000104 +#define NV01_RENDER_SOLID_LINE_PATCH 0x0000010c +#define NV01_RENDER_SOLID_LINE_DMA_NOTIFY 0x00000180 +#define NV01_RENDER_SOLID_LINE_CLIP_RECTANGLE 0x00000184 +#define NV01_RENDER_SOLID_LINE_PATTERN 0x00000188 +#define NV01_RENDER_SOLID_LINE_ROP 0x0000018c +#define NV01_RENDER_SOLID_LINE_BETA1 0x00000190 +#define NV01_RENDER_SOLID_LINE_SURFACE 0x00000194 +#define NV01_RENDER_SOLID_LINE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_LINE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_PREMULT 0x00000005 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A8Y8 0x00000001 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X24Y8 0x00000002 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A1R5G5B5 0x00000003 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X17R5G5B5 0x00000004 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A8R8G8B8 0x00000005 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X8R8G8B8 0x00000006 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A16Y16 0x00000007 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16Y16 0x00000008 +#define NV01_RENDER_SOLID_LINE_COLOR 0x00000304 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0(x) (0x00000400+((x)*8)) +#define NV01_RENDER_SOLID_LINE_LINE_POINT0__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_SHIFT 0 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_SHIFT 16 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1(x) (0x00000404+((x)*8)) +#define NV01_RENDER_SOLID_LINE_LINE_POINT1__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_SHIFT 0 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_SHIFT 16 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X(x) (0x00000480+((x)*16)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y(x) (0x00000484+((x)*16)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X(x) (0x00000488+((x)*16)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y(x) (0x0000048c+((x)*16)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_POLYLINE(x) (0x00000500+((x)*4)) +#define NV01_RENDER_SOLID_LINE_POLYLINE__SIZE 0x00000020 +#define NV01_RENDER_SOLID_LINE_POLYLINE_X_SHIFT 0 +#define NV01_RENDER_SOLID_LINE_POLYLINE_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_SHIFT 16 +#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X(x) (0x00000580+((x)*8)) +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y(x) (0x00000584+((x)*8)) +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR(x) (0x00000600+((x)*8)) +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT(x) (0x00000604+((x)*8)) +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_SHIFT 0 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_SHIFT 16 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_MASK 0xffff0000 + + +#define NV01_RENDER_SOLID_TRIANGLE 0x0000001d + +#define NV01_RENDER_SOLID_TRIANGLE_NOP 0x00000100 +#define NV01_RENDER_SOLID_TRIANGLE_NOTIFY 0x00000104 +#define NV01_RENDER_SOLID_TRIANGLE_PATCH 0x0000010c +#define NV01_RENDER_SOLID_TRIANGLE_DMA_NOTIFY 0x00000180 +#define NV01_RENDER_SOLID_TRIANGLE_CLIP_RECTANGLE 0x00000184 +#define NV01_RENDER_SOLID_TRIANGLE_PATTERN 0x00000188 +#define NV01_RENDER_SOLID_TRIANGLE_ROP 0x0000018c +#define NV01_RENDER_SOLID_TRIANGLE_BETA1 0x00000190 +#define NV01_RENDER_SOLID_TRIANGLE_SURFACE 0x00000194 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_PREMULT 0x00000005 +#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_TRIANGLE_COLOR 0x00000304 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0 0x00000310 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1 0x00000314 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2 0x00000318 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_X 0x00000320 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_Y 0x00000324 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_X 0x00000328 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_Y 0x0000032c +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_X 0x00000330 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_Y 0x00000334 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH(x) (0x00000400+((x)*4)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__SIZE 0x00000020 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X(x) (0x00000480+((x)*8)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__SIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y(x) (0x00000484+((x)*8)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__SIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR(x) (0x00000500+((x)*16)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__SIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0(x) (0x00000504+((x)*16)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__SIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1(x) (0x00000508+((x)*16)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__SIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2(x) (0x0000050c+((x)*16)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__SIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR(x) (0x00000580+((x)*8)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__SIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT(x) (0x00000584+((x)*8)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__SIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_MASK 0xffff0000 + + +#define NV01_RENDER_SOLID_RECTANGLE 0x0000001e + +#define NV01_RENDER_SOLID_RECTANGLE_NOP 0x00000100 +#define NV01_RENDER_SOLID_RECTANGLE_NOTIFY 0x00000104 +#define NV01_RENDER_SOLID_RECTANGLE_PATCH 0x0000010c +#define NV01_RENDER_SOLID_RECTANGLE_DMA_NOTIFY 0x00000180 +#define NV01_RENDER_SOLID_RECTANGLE_CLIP_RECTANGLE 0x00000184 +#define NV01_RENDER_SOLID_RECTANGLE_PATTERN 0x00000188 +#define NV01_RENDER_SOLID_RECTANGLE_ROP 0x0000018c +#define NV01_RENDER_SOLID_RECTANGLE_BETA1 0x00000190 +#define NV01_RENDER_SOLID_RECTANGLE_SURFACE 0x00000194 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_PREMULT 0x00000005 +#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_RECTANGLE_COLOR 0x00000304 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT(x) (0x00000400+((x)*8)) +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__SIZE 0x00000010 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_SHIFT 0 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_SHIFT 16 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE(x) (0x00000404+((x)*8)) +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__SIZE 0x00000010 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_SHIFT 0 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_MASK 0x0000ffff +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_SHIFT 16 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_MASK 0xffff0000 + + +#define NV01_IMAGE_BLIT 0x0000001f + +#define NV01_IMAGE_BLIT_NOP 0x00000100 +#define NV01_IMAGE_BLIT_NOTIFY 0x00000104 +#define NV01_IMAGE_BLIT_PATCH 0x0000010c +#define NV01_IMAGE_BLIT_DMA_NOTIFY 0x00000180 +#define NV01_IMAGE_BLIT_COLOR_KEY 0x00000184 +#define NV01_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188 +#define NV01_IMAGE_BLIT_PATTERN 0x0000018c +#define NV01_IMAGE_BLIT_ROP 0x00000190 +#define NV01_IMAGE_BLIT_BETA1 0x00000194 +#define NV01_IMAGE_BLIT_SURFACE 0x0000019c +#define NV01_IMAGE_BLIT_OPERATION 0x000002fc +#define NV01_IMAGE_BLIT_IMAGE_INPUT 0x00000204 +#define NV01_IMAGE_BLIT_POINT_IN 0x00000300 +#define NV01_IMAGE_BLIT_POINT_IN_X_SHIFT 0 +#define NV01_IMAGE_BLIT_POINT_IN_X_MASK 0x0000ffff +#define NV01_IMAGE_BLIT_POINT_IN_Y_SHIFT 16 +#define NV01_IMAGE_BLIT_POINT_IN_Y_MASK 0xffff0000 +#define NV01_IMAGE_BLIT_POINT_OUT 0x00000304 +#define NV01_IMAGE_BLIT_POINT_OUT_X_SHIFT 0 +#define NV01_IMAGE_BLIT_POINT_OUT_X_MASK 0x0000ffff +#define NV01_IMAGE_BLIT_POINT_OUT_Y_SHIFT 16 +#define NV01_IMAGE_BLIT_POINT_OUT_Y_MASK 0xffff0000 +#define NV01_IMAGE_BLIT_SIZE 0x00000308 +#define NV01_IMAGE_BLIT_SIZE_W_SHIFT 0 +#define NV01_IMAGE_BLIT_SIZE_W_MASK 0x0000ffff +#define NV01_IMAGE_BLIT_SIZE_H_SHIFT 16 +#define NV01_IMAGE_BLIT_SIZE_H_MASK 0xffff0000 + + +#define NV01_IMAGE_FROM_CPU 0x00000021 + +#define NV01_IMAGE_FROM_CPU_NOP 0x00000100 +#define NV01_IMAGE_FROM_CPU_NOTIFY 0x00000104 +#define NV01_IMAGE_FROM_CPU_PATCH 0x0000010c +#define NV01_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 +#define NV01_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 +#define NV01_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x00000188 +#define NV01_IMAGE_FROM_CPU_PATTERN 0x0000018c +#define NV01_IMAGE_FROM_CPU_ROP 0x00000190 +#define NV01_IMAGE_FROM_CPU_BETA1 0x00000194 +#define NV01_IMAGE_FROM_CPU_SURFACE 0x00000198 +#define NV01_IMAGE_FROM_CPU_OPERATION 0x000002fc +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_IMAGE_FROM_CPU_OPERATION_ROP_AND 0x00000001 +#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_AND 0x00000002 +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY 0x00000003 +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_PREMULT 0x00000005 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_Y8 0x00000001 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A1R5G5B5 0x00000002 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X1R5G5B5 0x00000003 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A8R8G8B8 0x00000004 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X8R8G8B8 0x00000005 +#define NV01_IMAGE_FROM_CPU_POINT 0x00000304 +#define NV01_IMAGE_FROM_CPU_POINT_X_SHIFT 0 +#define NV01_IMAGE_FROM_CPU_POINT_X_MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_POINT_Y_SHIFT 16 +#define NV01_IMAGE_FROM_CPU_POINT_Y_MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT 0x00000308 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_SHIFT 0 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_SHIFT 16 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_SIZE_IN 0x0000030c +#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0 +#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16 +#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) +#define NV01_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020 + + +#define NV01_NULL 0x00000030 + + + +#define NV03_STRETCHED_IMAGE_FROM_CPU 0x00000036 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_NOP 0x00000100 +#define NV03_STRETCHED_IMAGE_FROM_CPU_NOTIFY 0x00000104 +#define NV03_STRETCHED_IMAGE_FROM_CPU_PATCH 0x0000010c +#define NV03_STRETCHED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 +#define NV03_STRETCHED_IMAGE_FROM_CPU_PATTERN 0x00000188 +#define NV03_STRETCHED_IMAGE_FROM_CPU_ROP 0x0000018c +#define NV03_STRETCHED_IMAGE_FROM_CPU_BETA1 0x00000190 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000194 +#define NV03_STRETCHED_IMAGE_FROM_CPU_OPERATION 0x000002fc +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN 0x00000304 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_DX_DU 0x00000308 +#define NV03_STRETCHED_IMAGE_FROM_CPU_DY_DV 0x0000030c +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT 0x00000310 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_SHIFT 16 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE 0x00000314 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_SHIFT 16 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4 0x00000318 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_SHIFT 16 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020 + + +#define NV03_SCALED_IMAGE_FROM_MEMORY 0x00000037 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100 +#define NV03_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104 +#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180 +#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184 +#define NV03_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188 +#define NV03_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c +#define NV03_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190 +#define NV03_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000194 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT 0x00000310 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE 0x00000314 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DU_DX 0x00000318 +#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DV_DY 0x0000031c +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE 0x00000400 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT 0x00000404 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_MASK 0x00ff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CENTER 0x00010000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CORNER 0x00020000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_SHIFT 24 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_MASK 0xff000000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_OFFSET 0x00000408 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT 0x0000040c +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_MASK 0xffff0000 + + +#define NV04_DVD_SUBPICTURE 0x00000038 + +#define NV04_DVD_SUBPICTURE_NOP 0x00000100 +#define NV04_DVD_SUBPICTURE_NOTIFY 0x00000104 +#define NV04_DVD_SUBPICTURE_WAIT_FOR_IDLE 0x00000108 +#define NV04_DVD_SUBPICTURE_DMA_NOTIFY 0x00000180 +#define NV04_DVD_SUBPICTURE_DMA_OVERLAY 0x00000184 +#define NV04_DVD_SUBPICTURE_DMA_IMAGEIN 0x00000188 +#define NV04_DVD_SUBPICTURE_DMA_IMAGEOUT 0x0000018c +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT 0x00000300 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE 0x00000304 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT 0x00000308 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_OFFSET 0x0000030c +#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DU_DX 0x00000310 +#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DV_DY 0x00000314 +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE 0x00000318 +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT 0x0000031c +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEIN_OFFSET 0x00000320 +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT 0x00000324 +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DU_DX 0x00000328 +#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DV_DY 0x0000032c +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE 0x00000330 +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_SHIFT 0 +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_SHIFT 16 +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT 0x00000334 +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_SHIFT 0 +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_SHIFT 16 +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_OVERLAY_OFFSET 0x00000338 +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT 0x0000033c +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_SHIFT 0 +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_SHIFT 16 +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_MASK 0xffff0000 + + +#define NV04_MEMORY_TO_MEMORY_FORMAT 0x00000039 + +#define NV04_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100 +#define NV04_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104 +#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY 0x00000180 +#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN 0x00000184 +#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_OUT 0x00000188 +#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c +#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT 0x00000310 +#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314 +#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318 +#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c +#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320 +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT 0x00000324 +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_SHIFT 0 +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_MASK 0x0000000f +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_SHIFT 8 +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_MASK 0x00000f00 +#define NV04_MEMORY_TO_MEMORY_FORMAT_BUF_NOTIFY 0x00000328 + + +#define NV01_MEMORY_LOCAL_BANKED 0x0000003d + + + +#define NV01_MAPPING_SYSTEM 0x0000003e + + + +#define NV03_MEMORY_LOCAL_CURSOR 0x0000003f + + + +#define NV01_MEMORY_LOCAL_LINEAR 0x00000040 + + + +#define NV01_MAPPING_LOCAL 0x00000041 + + + +#define NV04_CONTEXT_SURFACES_2D 0x00000042 + +#define NV04_CONTEXT_SURFACES_2D_NOP 0x00000100 +#define NV04_CONTEXT_SURFACES_2D_NOTIFY 0x00000104 +#define NV04_CONTEXT_SURFACES_2D_PM_TRIGGER 0x00000140 +#define NV04_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180 +#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE 0x00000184 +#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_DESTIN 0x00000188 +#define NV04_CONTEXT_SURFACES_2D_FORMAT 0x00000300 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y8 0x00000001 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_Z1R5G5B5 0x00000002 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5 0x00000003 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5 0x00000004 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y16 0x00000005 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_Z8R8G8B8 0x00000006 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8 0x00000007 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_X1A7R8G8B8 0x00000009 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8 0x0000000a +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y32 0x0000000b +#define NV04_CONTEXT_SURFACES_2D_PITCH 0x00000304 +#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0 +#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16 +#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308 +#define NV04_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c + + +#define NV03_CONTEXT_ROP 0x00000043 + +#define NV03_CONTEXT_ROP_NOP 0x00000100 +#define NV03_CONTEXT_ROP_NOTIFY 0x00000104 +#define NV03_CONTEXT_ROP_DMA_NOTIFY 0x00000180 +#define NV03_CONTEXT_ROP_ROP 0x00000300 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SHIFT 0 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_MASK 0x0000000f +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_CLEAR 0x00000000 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOR 0x00000001 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_INVERTED 0x00000002 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY_INVERTED 0x00000003 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_REVERSE 0x00000004 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_INVERT 0x00000005 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_XOR 0x00000006 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NAND 0x00000007 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND 0x00000008 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_EQUI 0x00000009 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOOP 0x0000000a +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_INVERTED 0x0000000b +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY 0x0000000c +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_REVERSE 0x0000000d +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR 0x0000000e +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SET 0x0000000f +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SHIFT 4 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_MASK 0x000000f0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_CLEAR 0x00000000 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOR 0x00000010 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_INVERTED 0x00000020 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY_INVERTED 0x00000030 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_REVERSE 0x00000040 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_INVERT 0x00000050 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_XOR 0x00000060 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NAND 0x00000070 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND 0x00000080 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_EQUI 0x00000090 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOOP 0x000000a0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_INVERTED 0x000000b0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY 0x000000c0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_REVERSE 0x000000d0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR 0x000000e0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SET 0x000000f0 + + +#define NV04_IMAGE_PATTERN 0x00000044 + +#define NV04_IMAGE_PATTERN_NOP 0x00000100 +#define NV04_IMAGE_PATTERN_NOTIFY 0x00000104 +#define NV04_IMAGE_PATTERN_DMA_NOTIFY 0x00000180 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT 0x00000300 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT 0x00000304 +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_CGA6 0x00000001 +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE 0x00000002 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE 0x00000308 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8 0x00000000 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_64X1 0x00000001 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_1X64 0x00000002 +#define NV04_IMAGE_PATTERN_PATTERN_SELECT 0x0000030c +#define NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO 0x00000001 +#define NV04_IMAGE_PATTERN_PATTERN_SELECT_COLOR 0x00000002 +#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR0 0x00000310 +#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR1 0x00000314 +#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN0 0x00000318 +#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN1 0x0000031c +#define NV04_IMAGE_PATTERN_PATTERN_Y8(x) (0x00000400+((x)*4)) +#define NV04_IMAGE_PATTERN_PATTERN_Y8__SIZE 0x00000010 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_MASK 0x000000ff +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_SHIFT 8 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_MASK 0x0000ff00 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_MASK 0x00ff0000 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_SHIFT 24 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_MASK 0xff000000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5(x) (0x00000500+((x)*4)) +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__SIZE 0x00000020 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_MASK 0x0000001f +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_SHIFT 5 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_MASK 0x000007e0 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_SHIFT 11 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_MASK 0x0000f800 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_MASK 0x001f0000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_SHIFT 21 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_MASK 0x07e00000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_SHIFT 27 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_MASK 0xf8000000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5(x) (0x00000600+((x)*4)) +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__SIZE 0x00000020 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_MASK 0x0000001f +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_SHIFT 5 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_MASK 0x000003e0 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_SHIFT 10 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_MASK 0x00007c00 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_MASK 0x001f0000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_SHIFT 21 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_MASK 0x03e00000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_SHIFT 26 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_MASK 0x7c000000 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8(x) (0x00000700+((x)*4)) +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__SIZE 0x00000040 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_MASK 0x000000ff +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_SHIFT 8 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_MASK 0x0000ff00 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_MASK 0x00ff0000 + + +#define NV03_VIDEO_LUT_CURSOR_DAC 0x00000046 + +#define NV03_VIDEO_LUT_CURSOR_DAC_SYNCHRONIZE 0x00000100 +#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_IMAGE 0x00000104 +#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_CURSOR 0x00000108 +#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_DAC 0x0000010c +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_NOTIFY 0x00000180 +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE(x) (0x00000184+((x)*4)) +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT(x) (0x0000018c+((x)*4)) +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR(x) (0x00000194+((x)*4)) +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_GET 0x000002fc +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET(x) (0x00000300+((x)*8)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT(x) (0x00000304+((x)*8)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_MASK 0x0fff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_SHIFT 28 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_MASK 0xf0000000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET(x) (0x00000340+((x)*12)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT(x) (0x00000344+((x)*12)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_MASK 0xffff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT(x) (0x00000348+((x)*12)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A 0x00000358 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_MASK 0xffff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE(x) (0x00000380+((x)*16)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_MASK 0xffff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC(x) (0x00000384+((x)*16)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_MASK 0x0fff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_SHIFT 28 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_MASK 0xf0000000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC(x) (0x00000388+((x)*16)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_MASK 0x0fff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_SHIFT 28 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_MASK 0xf0000000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE(x) (0x0000038c+((x)*16)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_MASK 0x0fff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_SHIFT 28 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_MASK 0xf0000000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_PIXEL_CLOCK 0x000003a0 + + +#define NV03_DX3_TEXTURED_TRIANGLE 0x00000048 + +#define NV03_DX3_TEXTURED_TRIANGLE_NOP 0x00000100 +#define NV03_DX3_TEXTURED_TRIANGLE_NOTIFY 0x00000104 +#define NV03_DX3_TEXTURED_TRIANGLE_PATCH 0x0000010c +#define NV03_DX3_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180 +#define NV03_DX3_TEXTURED_TRIANGLE_DMA_TEXTURE 0x00000184 +#define NV03_DX3_TEXTURED_TRIANGLE_CLIP_RECTANGLE 0x00000188 +#define NV03_DX3_TEXTURED_TRIANGLE_SURFACE 0x0000018c +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_OFFSET 0x00000304 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT 0x00000308 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_MASK 0x0000ffff +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_MASK 0x000f0000 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_SHIFT 20 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_MASK 0x00f00000 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_SHIFT 24 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_MASK 0x0f000000 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_SHIFT 28 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_MASK 0xf0000000 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER 0x0000030c +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_MASK 0x0000001f +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_MASK 0x00001f00 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_MASK 0x00ff0000 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR 0x00000310 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_MASK 0x000000ff +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_MASK 0x0000ff00 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_MASK 0x00ff0000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT 0x00000314 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_MASK 0x0000000f +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_SHIFT 4 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_MASK 0x00000030 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_SHIFT 6 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_MASK 0x000000c0 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_MASK 0x00000f00 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_SHIFT 12 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_MASK 0x00007000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_PERSPECTIVE_ENABLE (1 << 15) +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_MASK 0x000f0000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_SHIFT 20 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_MASK 0x00f00000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_SHIFT 24 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_MASK 0x07000000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_SHIFT 27 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_MASK 0x18000000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_BETA (1 << 29) +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_DST_BLEND (1 << 30) +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SRC_BLEND (1 << 31) +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL 0x00000318 +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_MASK 0x000000ff +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_MASK 0xffffff00 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR(x) (0x00001000+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_MASK 0x0000000f +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_SHIFT 4 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_MASK 0x000000f0 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_MASK 0x00000f00 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_SHIFT 12 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_MASK 0x0000f000 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_MASK 0x000f0000 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_SHIFT 20 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_MASK 0x00f00000 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_SHIFT 24 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_MASK 0xff000000 +#define NV03_DX3_TEXTURED_TRIANGLE_COLOR(x) (0x00001004+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_COLOR__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_X(x) (0x00001008+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_X__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_Y(x) (0x0000100c+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_Y__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_Z(x) (0x00001010+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_Z__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_M(x) (0x00001014+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_M__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_U(x) (0x00001018+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_U__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_V(x) (0x0000101c+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_V__SIZE 0x00000040 + + +#define NV04_GDI_RECTANGLE_TEXT 0x0000004a + +#define NV04_GDI_RECTANGLE_TEXT_NOP 0x00000100 +#define NV04_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104 +#define NV04_GDI_RECTANGLE_TEXT_PATCH 0x0000010c +#define NV04_GDI_RECTANGLE_TEXT_PM_TRIGGER 0x00000140 +#define NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180 +#define NV04_GDI_RECTANGLE_TEXT_DMA_FONTS 0x00000184 +#define NV04_GDI_RECTANGLE_TEXT_PATTERN 0x00000188 +#define NV04_GDI_RECTANGLE_TEXT_ROP 0x0000018c +#define NV04_GDI_RECTANGLE_TEXT_BETA1 0x00000190 +#define NV04_GDI_RECTANGLE_TEXT_BETA4 0x00000194 +#define NV04_GDI_RECTANGLE_TEXT_SURFACE 0x00000198 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_AND 0x00000002 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY 0x00000003 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_PREMULT 0x00000005 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_CGA6 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE 0x00000002 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(x) (0x00000400+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__SIZE 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE(x) (0x00000404+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__SIZE 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0 0x000005f4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1 0x000005f8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_B 0x000005fc +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0(x) (0x00000600+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__SIZE 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1(x) (0x00000604+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__SIZE 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x000007ec +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x000007f0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_C 0x000007f4 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C 0x000007f8 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_POINT_C 0x000007fc +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000800+((x)*4)) +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000080 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x00000be4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x00000be8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR0_E 0x00000bec +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_E 0x00000bf0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x00000bf4 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x00000bf8 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_POINT_E 0x00000bfc +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00000c00+((x)*4)) +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000080 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F 0x00000ff0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_MASK 0x0fffffff +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_SHIFT 28 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_MASK 0xf0000000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0 0x00000ff4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1 0x00000ff8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_F 0x00000ffc +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F(x) (0x00001000+((x)*4)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__SIZE 0x00000100 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_MASK 0x000000ff +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_SHIFT 8 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_MASK 0x000fff00 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_SHIFT 20 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_MASK 0xfff00000 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G 0x000017f0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_MASK 0x0fffffff +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_SHIFT 28 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_MASK 0xf0000000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0 0x000017f4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1 0x000017f8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_G 0x000017fc +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT(x) (0x00001800+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__SIZE 0x00000100 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX(x) (0x00001804+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__SIZE 0x00000100 + + +#define NV03_GDI_RECTANGLE_TEXT 0x0000004b + +#define NV03_GDI_RECTANGLE_TEXT_NOP 0x00000100 +#define NV03_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104 +#define NV03_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180 +#define NV03_GDI_RECTANGLE_TEXT_PATTERN 0x00000184 +#define NV03_GDI_RECTANGLE_TEXT_ROP 0x00000188 +#define NV03_GDI_RECTANGLE_TEXT_BETA1 0x0000018c +#define NV03_GDI_RECTANGLE_TEXT_SURFACE 0x00000190 +#define NV03_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc +#define NV03_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304 +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT 0x00000400 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE 0x00000404 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B 0x000007f4 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B 0x000007f8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_B 0x000007fc +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0 0x00000800 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1 0x00000804 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x00000bec +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x00000bf0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_C 0x00000bf4 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C 0x00000bf8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_C 0x00000bfc +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000c00+((x)*4)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000020 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0 0x00000fe8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1 0x00000fec +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_D 0x00000ff0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D 0x00000ff4 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D 0x00000ff8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_D 0x00000ffc +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D(x) (0x00001000+((x)*4)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__SIZE 0x00000020 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x000013e4 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x000013e8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_COLOR0_E 0x000013ec +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_E 0x000013f0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x000013f4 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x000013f8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_E 0x000013fc +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00001400+((x)*4)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000020 + + +#define NV04_SWIZZLED_SURFACE 0x00000052 + +#define NV04_SWIZZLED_SURFACE_NOP 0x00000100 +#define NV04_SWIZZLED_SURFACE_NOTIFY 0x00000104 +#define NV04_SWIZZLED_SURFACE_DMA_NOTIFY 0x00000180 +#define NV04_SWIZZLED_SURFACE_DMA_IMAGE 0x00000184 +#define NV04_SWIZZLED_SURFACE_FORMAT 0x00000300 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_SHIFT 0 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_MASK 0x000000ff +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8 0x00000001 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000002 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000003 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5 0x00000004 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y16 0x00000005 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000006 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000007 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000009 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8 0x0000000a +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y32 0x0000000b +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_MASK 0x00ff0000 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT 24 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_MASK 0xff000000 +#define NV04_SWIZZLED_SURFACE_OFFSET 0x00000304 + + +#define NV04_CONTEXT_SURFACES_3D 0x00000053 + +#define NV04_CONTEXT_SURFACES_3D_NOP 0x00000100 +#define NV04_CONTEXT_SURFACES_3D_NOTIFY 0x00000104 +#define NV04_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180 +#define NV04_CONTEXT_SURFACES_3D_DMA_COLOR 0x00000184 +#define NV04_CONTEXT_SURFACES_3D_DMA_ZETA 0x00000188 +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL 0x000002f8 +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL 0x000002fc +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_3D_FORMAT 0x00000300 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_MASK 0x000000ff +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000001 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000002 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000004 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000005 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000006 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000007 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SHIFT 8 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_MASK 0x0000ff00 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_PITCH 0x00000100 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SWIZZLE 0x00000200 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_MASK 0x00ff0000 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_SHIFT 24 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_MASK 0xff000000 +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE 0x00000304 +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_3D_PITCH 0x00000308 +#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x0000030c +#define NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000310 + + +#define NV04_DX5_TEXTURED_TRIANGLE 0x00000054 + +#define NV04_DX5_TEXTURED_TRIANGLE_NOP 0x00000100 +#define NV04_DX5_TEXTURED_TRIANGLE_NOTIFY 0x00000104 +#define NV04_DX5_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180 +#define NV04_DX5_TEXTURED_TRIANGLE_DMA_A 0x00000184 +#define NV04_DX5_TEXTURED_TRIANGLE_DMA_B 0x00000188 +#define NV04_DX5_TEXTURED_TRIANGLE_SURFACE 0x0000018c +#define NV04_DX5_TEXTURED_TRIANGLE_COLORKEY 0x00000300 +#define NV04_DX5_TEXTURED_TRIANGLE_OFFSET 0x00000304 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT 0x00000308 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_MASK 0x00000003 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_SHIFT 2 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_MASK 0x0000000c +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CENTER 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER 0x00000020 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CENTER 0x00000040 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER 0x00000080 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_Y8 0x00000100 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A1R5G5B5 0x00000200 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X1R5G5B5 0x00000300 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A4R4G4B4 0x00000400 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_R5G6B5 0x00000500 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8 0x00000600 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X8R8G8B8 0x00000700 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT 0x01000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT 0x02000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE 0x03000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER 0x04000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP 0x05000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPU (1 << 27) +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_REPEAT 0x10000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MIRRORED_REPEAT 0x20000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE 0x30000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_BORDER 0x40000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP 0x50000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPV (1 << 31) +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER 0x0000030c +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15) +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_MASK 0x07000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST 0x01000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR 0x02000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27) +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_SHIFT 28 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST 0x10000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR 0x20000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31) +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND 0x00000310 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_MASK 0x0000000f +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_SHIFT 4 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT 0x00000040 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD 0x00000080 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_PHONG 0x000000c0 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_MASK 0x0f000000 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_SHIFT 28 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_MASK 0xf0000000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL 0x00000314 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_MASK 0x00000f00 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE (1 << 12) +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN (1 << 13) +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT 14 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_MASK 0x0000c000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_MASK 0x000f0000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT 20 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_MASK 0x00300000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE (1 << 22) +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE (1 << 23) +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_MASK 0x3f000000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT 30 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_MASK 0xc0000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR 0x00000318 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_MASK 0xff000000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(x) (0x00000400+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY(x) (0x00000404+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ(x) (0x00000408+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW(x) (0x0000040c+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR(x) (0x00000410+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_MASK 0x0000ff00 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_MASK 0x00ff0000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_MASK 0xff000000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR(x) (0x00000414+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_MASK 0x0000ff00 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_MASK 0x00ff0000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_MASK 0xff000000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU(x) (0x00000418+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV(x) (0x0000041c+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(x) (0x00000600+((x)*4)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE__SIZE 0x00000040 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_SHIFT 4 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_SHIFT 12 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_SHIFT 20 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000 + + +#define NV04_DX6_MULTITEX_TRIANGLE 0x00000055 + +#define NV04_DX6_MULTITEX_TRIANGLE_NOP 0x00000100 +#define NV04_DX6_MULTITEX_TRIANGLE_NOTIFY 0x00000104 +#define NV04_DX6_MULTITEX_TRIANGLE_DMA_NOTIFY 0x00000180 +#define NV04_DX6_MULTITEX_TRIANGLE_DMA_A 0x00000184 +#define NV04_DX6_MULTITEX_TRIANGLE_DMA_B 0x00000188 +#define NV04_DX6_MULTITEX_TRIANGLE_SURFACE 0x0000018c +#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET(x) (0x00000308+((x)*4)) +#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET__SIZE 0x00000002 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT(x) (0x00000310+((x)*4)) +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT__SIZE 0x00000002 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_MASK 0x0000000f +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPU (1 << 27) +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPV (1 << 31) +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER(x) (0x00000318+((x)*4)) +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER__SIZE 0x00000002 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15) +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_MASK 0x07000000 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27) +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_SHIFT 28 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA 0x00000320 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE0 (1 << 0) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA0 (1 << 1) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_SHIFT 2 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_MASK 0x000000fc +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE1 (1 << 8) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA1 (1 << 9) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_SHIFT 10 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_MASK 0x0000fc00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE2 (1 << 16) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA2 (1 << 17) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_SHIFT 18 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_MASK 0x00fc0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE3 (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA3 (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_SHIFT 26 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_MASK 0x1c000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_SHIFT 29 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_MASK 0xe0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR 0x00000324 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE0 (1 << 0) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA0 (1 << 1) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_SHIFT 2 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_MASK 0x000000fc +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE1 (1 << 8) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA1 (1 << 9) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_SHIFT 10 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_MASK 0x0000fc00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE2 (1 << 16) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA2 (1 << 17) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_SHIFT 18 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_MASK 0x00fc0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE3 (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA3 (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_SHIFT 26 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_MASK 0x1c000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_SHIFT 29 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_MASK 0xe0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA 0x0000032c +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE0 (1 << 0) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA0 (1 << 1) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_SHIFT 2 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_MASK 0x000000fc +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE1 (1 << 8) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA1 (1 << 9) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_SHIFT 10 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_MASK 0x0000fc00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE2 (1 << 16) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA2 (1 << 17) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_SHIFT 18 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_MASK 0x00fc0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE3 (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA3 (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_SHIFT 26 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_MASK 0x1c000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_SHIFT 29 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_MASK 0xe0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR 0x00000330 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE0 (1 << 0) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA0 (1 << 1) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_SHIFT 2 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_MASK 0x000000fc +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE1 (1 << 8) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA1 (1 << 9) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_SHIFT 10 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_MASK 0x0000fc00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE2 (1 << 16) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA2 (1 << 17) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_SHIFT 18 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_MASK 0x00fc0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE3 (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA3 (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_SHIFT 26 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_MASK 0x1c000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_SHIFT 29 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_MASK 0xe0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR 0x00000334 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND 0x00000338 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_MASK 0x0f000000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_SHIFT 28 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_MASK 0xf0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0 0x0000033c +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_TEST_ENABLE (1 << 12) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ORIGIN (1 << 13) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_SHIFT 14 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_MASK 0x0000c000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_MASK 0x000f0000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_SHIFT 20 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_MASK 0x00300000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_DITHER_ENABLE (1 << 22) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_PERSPECTIVE_ENABLE (1 << 23) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_WRITE_ENABLE (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE_ENABLE (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_WRITE_ENABLE (1 << 26) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_RED_WRITE_ENABLE (1 << 27) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_GREEN_WRITE_ENABLE (1 << 28) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_BLUE_WRITE_ENABLE (1 << 29) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_SHIFT 30 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_MASK 0xc0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1 0x00000340 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_MASK 0x0000000f +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_MASK 0x000000f0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2 0x00000344 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_MASK 0x0000000f +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_MASK 0x000000f0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR 0x00000348 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX(x) (0x00000400+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY(x) (0x00000404+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ(x) (0x00000408+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW(x) (0x0000040c+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR(x) (0x00000410+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR(x) (0x00000414+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0(x) (0x00000418+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0(x) (0x0000041c+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1(x) (0x00000420+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1(x) (0x00000424+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE(x) (0x00000540+((x)*4)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE__SIZE 0x00000030 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_SHIFT 12 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_SHIFT 20 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000 + + +#define NV10TCL 0x00000056 + +#define NV10TCL_NOP 0x00000100 +#define NV10TCL_NOTIFY 0x00000104 +#define NV10TCL_DMA_NOTIFY 0x00000180 +#define NV10TCL_DMA_IN_MEMORY0 0x00000184 +#define NV10TCL_DMA_IN_MEMORY1 0x00000188 +#define NV10TCL_DISPLAY_LIST 0x0000018c +#define NV10TCL_DMA_IN_MEMORY2 0x00000194 +#define NV10TCL_DMA_IN_MEMORY3 0x00000198 +#define NV10TCL_VIEWPORT_HORIZ 0x00000200 +#define NV10TCL_VIEWPORT_HORIZ_X_SHIFT 0 +#define NV10TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff +#define NV10TCL_VIEWPORT_HORIZ_W_SHIFT 16 +#define NV10TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 +#define NV10TCL_VIEWPORT_VERT 0x00000204 +#define NV10TCL_VIEWPORT_VERT_Y_SHIFT 0 +#define NV10TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff +#define NV10TCL_VIEWPORT_VERT_H_SHIFT 16 +#define NV10TCL_VIEWPORT_VERT_H_MASK 0xffff0000 +#define NV10TCL_BUFFER_FORMAT 0x00000208 +#define NV10TCL_BUFFER_PITCH 0x0000020c +#define NV10TCL_BUFFER_PITCH_COLOR_PITCH_SHIFT 0 +#define NV10TCL_BUFFER_PITCH_COLOR_PITCH_MASK 0x0000ffff +#define NV10TCL_BUFFER_PITCH_ZETA_PITCH_SHIFT 16 +#define NV10TCL_BUFFER_PITCH_ZETA_PITCH_MASK 0xffff0000 +#define NV10TCL_COLOR_OFFSET 0x00000210 +#define NV10TCL_ZETA_OFFSET 0x00000214 +#define NV10TCL_TX_OFFSET(x) (0x00000218+((x)*4)) +#define NV10TCL_TX_OFFSET__SIZE 0x00000002 +#define NV10TCL_TX_FORMAT(x) (0x00000220+((x)*4)) +#define NV10TCL_TX_FORMAT__SIZE 0x00000002 +#define NV10TCL_TX_FORMAT_CUBE_MAP (1 << 2) +#define NV10TCL_TX_FORMAT_FORMAT_SHIFT 7 +#define NV10TCL_TX_FORMAT_FORMAT_MASK 0x00000780 +#define NV10TCL_TX_FORMAT_FORMAT_L8 0x00000000 +#define NV10TCL_TX_FORMAT_FORMAT_A8 0x00000080 +#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000100 +#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000180 +#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000200 +#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000300 +#define NV10TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000380 +#define NV10TCL_TX_FORMAT_FORMAT_INDEX8 0x00000580 +#define NV10TCL_TX_FORMAT_FORMAT_DXT1 0x00000600 +#define NV10TCL_TX_FORMAT_FORMAT_DXT3 0x00000700 +#define NV10TCL_TX_FORMAT_FORMAT_DXT5 0x00000780 +#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00000800 +#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00000900 +#define NV10TCL_TX_FORMAT_FORMAT_L8_RECT 0x00000980 +#define NV10TCL_TX_FORMAT_FORMAT_A8L8 0x00000d00 +#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00000d80 +#define NV10TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00000f00 +#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00000e80 +#define NV10TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00001000 +#define NV10TCL_TX_FORMAT_FORMAT_A16 0x00001900 +#define NV10TCL_TX_FORMAT_FORMAT_A16_RECT 0x00001a80 +#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00002500 +#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00002580 +#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00002600 +#define NV10TCL_TX_FORMAT_NPOT (1 << 11) +#define NV10TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT 12 +#define NV10TCL_TX_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000 +#define NV10TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV10TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x000f0000 +#define NV10TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 20 +#define NV10TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x00f00000 +#define NV10TCL_TX_FORMAT_WRAP_S_SHIFT 24 +#define NV10TCL_TX_FORMAT_WRAP_S_MASK 0x0f000000 +#define NV10TCL_TX_FORMAT_WRAP_S_REPEAT 0x01000000 +#define NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT 0x02000000 +#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE 0x03000000 +#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER 0x04000000 +#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP 0x05000000 +#define NV10TCL_TX_FORMAT_WRAP_T_SHIFT 28 +#define NV10TCL_TX_FORMAT_WRAP_T_MASK 0xf0000000 +#define NV10TCL_TX_FORMAT_WRAP_T_REPEAT 0x10000000 +#define NV10TCL_TX_FORMAT_WRAP_T_MIRRORED_REPEAT 0x20000000 +#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE 0x30000000 +#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_BORDER 0x40000000 +#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP 0x50000000 +#define NV10TCL_TX_ENABLE(x) (0x00000228+((x)*4)) +#define NV10TCL_TX_ENABLE__SIZE 0x00000002 +#define NV10TCL_TX_ENABLE_ANISOTROPY_SHIFT 4 +#define NV10TCL_TX_ENABLE_ANISOTROPY_MASK 0x00000030 +#define NV10TCL_TX_ENABLE_ENABLE (1 << 30) +#define NV10TCL_TX_NPOT_PITCH(x) (0x00000230+((x)*4)) +#define NV10TCL_TX_NPOT_PITCH__SIZE 0x00000002 +#define NV10TCL_TX_NPOT_PITCH_PITCH_SHIFT 16 +#define NV10TCL_TX_NPOT_PITCH_PITCH_MASK 0xffff0000 +#define NV10TCL_TX_NPOT_SIZE(x) (0x00000240+((x)*4)) +#define NV10TCL_TX_NPOT_SIZE__SIZE 0x00000002 +#define NV10TCL_TX_NPOT_SIZE_H_SHIFT 0 +#define NV10TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff +#define NV10TCL_TX_NPOT_SIZE_W_SHIFT 16 +#define NV10TCL_TX_NPOT_SIZE_W_MASK 0xffff0000 +#define NV10TCL_TX_FILTER(x) (0x00000248+((x)*4)) +#define NV10TCL_TX_FILTER__SIZE 0x00000002 +#define NV10TCL_TX_FILTER_MINIFY_SHIFT 24 +#define NV10TCL_TX_FILTER_MINIFY_MASK 0x0f000000 +#define NV10TCL_TX_FILTER_MINIFY_NEAREST 0x01000000 +#define NV10TCL_TX_FILTER_MINIFY_LINEAR 0x02000000 +#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000 +#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000 +#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000 +#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000 +#define NV10TCL_TX_FILTER_MAGNIFY_SHIFT 28 +#define NV10TCL_TX_FILTER_MAGNIFY_MASK 0xf0000000 +#define NV10TCL_TX_FILTER_MAGNIFY_NEAREST 0x10000000 +#define NV10TCL_TX_FILTER_MAGNIFY_LINEAR 0x20000000 +#define NV10TCL_TX_PALETTE_OFFSET(x) (0x00000250+((x)*4)) +#define NV10TCL_TX_PALETTE_OFFSET__SIZE 0x00000002 +#define NV10TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4)) +#define NV10TCL_RC_IN_ALPHA__SIZE 0x00000002 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f +#define NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4) +#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000 +#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12) +#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000 +#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20) +#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000 +#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28) +#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000 +#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV10TCL_RC_IN_RGB(x) (0x00000268+((x)*4)) +#define NV10TCL_RC_IN_RGB__SIZE 0x00000002 +#define NV10TCL_RC_IN_RGB_D_INPUT_SHIFT 0 +#define NV10TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f +#define NV10TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003 +#define NV10TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4) +#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV10TCL_RC_IN_RGB_D_MAPPING_SHIFT 5 +#define NV10TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0 +#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV10TCL_RC_IN_RGB_C_INPUT_SHIFT 8 +#define NV10TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00 +#define NV10TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300 +#define NV10TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12) +#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_SHIFT 13 +#define NV10TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SHIFT 16 +#define NV10TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000 +#define NV10TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV10TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000 +#define NV10TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV10TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20) +#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_SHIFT 21 +#define NV10TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SHIFT 24 +#define NV10TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28) +#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_SHIFT 29 +#define NV10TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV10TCL_RC_COLOR(x) (0x00000270+((x)*4)) +#define NV10TCL_RC_COLOR__SIZE 0x00000002 +#define NV10TCL_RC_COLOR_B_SHIFT 0 +#define NV10TCL_RC_COLOR_B_MASK 0x000000ff +#define NV10TCL_RC_COLOR_G_SHIFT 8 +#define NV10TCL_RC_COLOR_G_MASK 0x0000ff00 +#define NV10TCL_RC_COLOR_R_SHIFT 16 +#define NV10TCL_RC_COLOR_R_MASK 0x00ff0000 +#define NV10TCL_RC_COLOR_A_SHIFT 24 +#define NV10TCL_RC_COLOR_A_MASK 0xff000000 +#define NV10TCL_RC_OUT_ALPHA(x) (0x00000278+((x)*4)) +#define NV10TCL_RC_OUT_ALPHA__SIZE 0x00000002 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12) +#define NV10TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13) +#define NV10TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14) +#define NV10TCL_RC_OUT_ALPHA_BIAS (1 << 15) +#define NV10TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_SHIFT 17 +#define NV10TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 +#define NV10TCL_RC_OUT_RGB(x) (0x00000280+((x)*4)) +#define NV10TCL_RC_OUT_RGB__SIZE 0x00000002 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12) +#define NV10TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13) +#define NV10TCL_RC_OUT_RGB_MUX_SUM (1 << 14) +#define NV10TCL_RC_OUT_RGB_BIAS (1 << 15) +#define NV10TCL_RC_OUT_RGB_BIAS_NONE 0x00000000 +#define NV10TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 +#define NV10TCL_RC_OUT_RGB_SCALE_SHIFT 17 +#define NV10TCL_RC_OUT_RGB_SCALE_MASK 0x00000000 +#define NV10TCL_RC_OUT_RGB_SCALE_NONE 0x00000000 +#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000 +#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000 +#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 +#define NV10TCL_RC_OUT_RGB_OPERATION_SHIFT 27 +#define NV10TCL_RC_OUT_RGB_OPERATION_MASK 0x38000000 +#define NV10TCL_RC_FINAL0 0x00000288 +#define NV10TCL_RC_FINAL0_D_INPUT_SHIFT 0 +#define NV10TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f +#define NV10TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_FINAL0_D_INPUT_FOG 0x00000003 +#define NV10TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4) +#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV10TCL_RC_FINAL0_D_MAPPING_SHIFT 5 +#define NV10TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0 +#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV10TCL_RC_FINAL0_C_INPUT_SHIFT 8 +#define NV10TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00 +#define NV10TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_FINAL0_C_INPUT_FOG 0x00000300 +#define NV10TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12) +#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV10TCL_RC_FINAL0_C_MAPPING_SHIFT 13 +#define NV10TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000 +#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV10TCL_RC_FINAL0_B_INPUT_SHIFT 16 +#define NV10TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000 +#define NV10TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV10TCL_RC_FINAL0_B_INPUT_FOG 0x00030000 +#define NV10TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV10TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000 +#define NV10TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000 +#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV10TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20) +#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV10TCL_RC_FINAL0_B_MAPPING_SHIFT 21 +#define NV10TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000 +#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV10TCL_RC_FINAL0_A_INPUT_SHIFT 24 +#define NV10TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000 +#define NV10TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV10TCL_RC_FINAL0_A_INPUT_FOG 0x03000000 +#define NV10TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV10TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000 +#define NV10TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000 +#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV10TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28) +#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_SHIFT 29 +#define NV10TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV10TCL_RC_FINAL1 0x0000028c +#define NV10TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7) +#define NV10TCL_RC_FINAL1_G_INPUT_SHIFT 8 +#define NV10TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00 +#define NV10TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_FINAL1_G_INPUT_FOG 0x00000300 +#define NV10TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12) +#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV10TCL_RC_FINAL1_G_MAPPING_SHIFT 13 +#define NV10TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000 +#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV10TCL_RC_FINAL1_F_INPUT_SHIFT 16 +#define NV10TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000 +#define NV10TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV10TCL_RC_FINAL1_F_INPUT_FOG 0x00030000 +#define NV10TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV10TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000 +#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000 +#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000 +#define NV10TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000 +#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV10TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20) +#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV10TCL_RC_FINAL1_F_MAPPING_SHIFT 21 +#define NV10TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000 +#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV10TCL_RC_FINAL1_E_INPUT_SHIFT 24 +#define NV10TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000 +#define NV10TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV10TCL_RC_FINAL1_E_INPUT_FOG 0x03000000 +#define NV10TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV10TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000 +#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000 +#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000 +#define NV10TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000 +#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV10TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28) +#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_SHIFT 29 +#define NV10TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV10TCL_LIGHT_MODEL 0x00000294 +#define NV10TCL_LIGHT_MODEL_COLOR_CONTROL (1 << 1) +#define NV10TCL_LIGHT_MODEL_LOCAL_VIEWER (1 << 16) +#define NV10TCL_COLOR_MATERIAL_ENABLE 0x00000298 +#define NV10TCL_COLOR_MATERIAL_ENABLE_SPECULAR (1 << 0) +#define NV10TCL_COLOR_MATERIAL_ENABLE_DIFFUSE (1 << 1) +#define NV10TCL_COLOR_MATERIAL_ENABLE_AMBIENT (1 << 2) +#define NV10TCL_COLOR_MATERIAL_ENABLE_EMISSION (1 << 3) +#define NV10TCL_FOG_MODE 0x0000029c +#define NV10TCL_FOG_MODE_EXP 0x00000800 +#define NV10TCL_FOG_MODE_EXP_2 0x00000802 +#define NV10TCL_FOG_MODE_EXP2 0x00000803 +#define NV10TCL_FOG_MODE_LINEAR 0x00000804 +#define NV10TCL_FOG_MODE_LINEAR_2 0x00002601 +#define NV10TCL_FOG_COORD_DIST 0x000002a0 +#define NV10TCL_FOG_ENABLE 0x000002a4 +#define NV10TCL_FOG_COLOR 0x000002a8 +#define NV10TCL_FOG_COLOR_R_SHIFT 0 +#define NV10TCL_FOG_COLOR_R_MASK 0x000000ff +#define NV10TCL_FOG_COLOR_G_SHIFT 8 +#define NV10TCL_FOG_COLOR_G_MASK 0x0000ff00 +#define NV10TCL_FOG_COLOR_B_SHIFT 16 +#define NV10TCL_FOG_COLOR_B_MASK 0x00ff0000 +#define NV10TCL_FOG_COLOR_A_SHIFT 24 +#define NV10TCL_FOG_COLOR_A_MASK 0xff000000 +#define NV10TCL_VIEWPORT_CLIP_MODE 0x000002b4 +#define NV10TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4)) +#define NV10TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_SHIFT 0 +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_MASK 0x000007ff +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_LEFT_ENABLE (1 << 11) +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_SHIFT 16 +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_MASK 0x07ff0000 +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_RIGHT_ENABLE (1 << 27) +#define NV10TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4)) +#define NV10TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_SHIFT 0 +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_MASK 0x000007ff +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_TOP_ENABLE (1 << 11) +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_SHIFT 16 +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_MASK 0x07ff0000 +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_BOTTOM_ENABLE (1 << 27) +#define NV10TCL_ALPHA_FUNC_ENABLE 0x00000300 +#define NV10TCL_BLEND_FUNC_ENABLE 0x00000304 +#define NV10TCL_CULL_FACE_ENABLE 0x00000308 +#define NV10TCL_DEPTH_TEST_ENABLE 0x0000030c +#define NV10TCL_DITHER_ENABLE 0x00000310 +#define NV10TCL_LIGHTING_ENABLE 0x00000314 +#define NV10TCL_POINT_PARAMETERS_ENABLE 0x00000318 +#define NV10TCL_POINT_SMOOTH_ENABLE 0x0000031c +#define NV10TCL_LINE_SMOOTH_ENABLE 0x00000320 +#define NV10TCL_POLYGON_SMOOTH_ENABLE 0x00000324 +#define NV10TCL_VERTEX_WEIGHT_ENABLE 0x00000328 +#define NV10TCL_STENCIL_ENABLE 0x0000032c +#define NV10TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330 +#define NV10TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334 +#define NV10TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338 +#define NV10TCL_ALPHA_FUNC_FUNC 0x0000033c +#define NV10TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 +#define NV10TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 +#define NV10TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 +#define NV10TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 +#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV10TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV10TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 +#define NV10TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 +#define NV10TCL_ALPHA_FUNC_REF 0x00000340 +#define NV10TCL_BLEND_FUNC_SRC 0x00000344 +#define NV10TCL_BLEND_FUNC_SRC_ZERO 0x00000000 +#define NV10TCL_BLEND_FUNC_SRC_ONE 0x00000001 +#define NV10TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV10TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV10TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307 +#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308 +#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV10TCL_BLEND_FUNC_DST 0x00000348 +#define NV10TCL_BLEND_FUNC_DST_ZERO 0x00000000 +#define NV10TCL_BLEND_FUNC_DST_ONE 0x00000001 +#define NV10TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV10TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV10TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307 +#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308 +#define NV10TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV10TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV10TCL_BLEND_COLOR 0x0000034c +#define NV10TCL_BLEND_COLOR_B_SHIFT 0 +#define NV10TCL_BLEND_COLOR_B_MASK 0x000000ff +#define NV10TCL_BLEND_COLOR_G_SHIFT 8 +#define NV10TCL_BLEND_COLOR_G_MASK 0x0000ff00 +#define NV10TCL_BLEND_COLOR_R_SHIFT 16 +#define NV10TCL_BLEND_COLOR_R_MASK 0x00ff0000 +#define NV10TCL_BLEND_COLOR_A_SHIFT 24 +#define NV10TCL_BLEND_COLOR_A_MASK 0xff000000 +#define NV10TCL_BLEND_EQUATION 0x00000350 +#define NV10TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 +#define NV10TCL_BLEND_EQUATION_MIN 0x00008007 +#define NV10TCL_BLEND_EQUATION_MAX 0x00008008 +#define NV10TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV10TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV10TCL_DEPTH_FUNC 0x00000354 +#define NV10TCL_DEPTH_FUNC_NEVER 0x00000200 +#define NV10TCL_DEPTH_FUNC_LESS 0x00000201 +#define NV10TCL_DEPTH_FUNC_EQUAL 0x00000202 +#define NV10TCL_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV10TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV10TCL_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV10TCL_DEPTH_FUNC_ALWAYS 0x00000207 +#define NV10TCL_COLOR_MASK 0x00000358 +#define NV10TCL_COLOR_MASK_B (1 << 0) +#define NV10TCL_COLOR_MASK_G (1 << 8) +#define NV10TCL_COLOR_MASK_R (1 << 16) +#define NV10TCL_COLOR_MASK_A (1 << 24) +#define NV10TCL_DEPTH_WRITE_ENABLE 0x0000035c +#define NV10TCL_STENCIL_MASK 0x00000360 +#define NV10TCL_STENCIL_FUNC_FUNC 0x00000364 +#define NV10TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200 +#define NV10TCL_STENCIL_FUNC_FUNC_LESS 0x00000201 +#define NV10TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202 +#define NV10TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203 +#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV10TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV10TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206 +#define NV10TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207 +#define NV10TCL_STENCIL_FUNC_REF 0x00000368 +#define NV10TCL_STENCIL_FUNC_MASK 0x0000036c +#define NV10TCL_STENCIL_OP_FAIL 0x00000370 +#define NV10TCL_STENCIL_OP_FAIL_ZERO 0x00000000 +#define NV10TCL_STENCIL_OP_FAIL_INVERT 0x0000150a +#define NV10TCL_STENCIL_OP_FAIL_KEEP 0x00001e00 +#define NV10TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01 +#define NV10TCL_STENCIL_OP_FAIL_INCR 0x00001e02 +#define NV10TCL_STENCIL_OP_FAIL_DECR 0x00001e03 +#define NV10TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507 +#define NV10TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508 +#define NV10TCL_STENCIL_OP_ZFAIL 0x00000374 +#define NV10TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000 +#define NV10TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a +#define NV10TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00 +#define NV10TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01 +#define NV10TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02 +#define NV10TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03 +#define NV10TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV10TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV10TCL_STENCIL_OP_ZPASS 0x00000378 +#define NV10TCL_STENCIL_OP_ZPASS_ZERO 0x00000000 +#define NV10TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a +#define NV10TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00 +#define NV10TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01 +#define NV10TCL_STENCIL_OP_ZPASS_INCR 0x00001e02 +#define NV10TCL_STENCIL_OP_ZPASS_DECR 0x00001e03 +#define NV10TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV10TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV10TCL_SHADE_MODEL 0x0000037c +#define NV10TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV10TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV10TCL_LINE_WIDTH 0x00000380 +#define NV10TCL_POLYGON_OFFSET_FACTOR 0x00000384 +#define NV10TCL_POLYGON_OFFSET_UNITS 0x00000388 +#define NV10TCL_POLYGON_MODE_FRONT 0x0000038c +#define NV10TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV10TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV10TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV10TCL_POLYGON_MODE_BACK 0x00000390 +#define NV10TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV10TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV10TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV10TCL_DEPTH_RANGE_NEAR 0x00000394 +#define NV10TCL_DEPTH_RANGE_FAR 0x00000398 +#define NV10TCL_CULL_FACE 0x0000039c +#define NV10TCL_CULL_FACE_FRONT 0x00000404 +#define NV10TCL_CULL_FACE_BACK 0x00000405 +#define NV10TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV10TCL_FRONT_FACE 0x000003a0 +#define NV10TCL_FRONT_FACE_CW 0x00000900 +#define NV10TCL_FRONT_FACE_CCW 0x00000901 +#define NV10TCL_NORMALIZE_ENABLE 0x000003a4 +#define NV10TCL_COLOR_MATERIAL_R 0x000003a8 +#define NV10TCL_COLOR_MATERIAL_G 0x000003ac +#define NV10TCL_COLOR_MATERIAL_B 0x000003b0 +#define NV10TCL_COLOR_MATERIAL_A 0x000003b4 +#define NV10TCL_COLOR_CONTROL 0x000003b8 +#define NV10TCL_ENABLED_LIGHTS 0x000003bc +#define NV10TCL_ENABLED_LIGHTS_LIGHT0 (1 << 0) +#define NV10TCL_ENABLED_LIGHTS_LIGHT1 (1 << 2) +#define NV10TCL_ENABLED_LIGHTS_LIGHT2 (1 << 4) +#define NV10TCL_ENABLED_LIGHTS_LIGHT3 (1 << 6) +#define NV10TCL_ENABLED_LIGHTS_LIGHT4 (1 << 8) +#define NV10TCL_ENABLED_LIGHTS_LIGHT5 (1 << 10) +#define NV10TCL_ENABLED_LIGHTS_LIGHT6 (1 << 12) +#define NV10TCL_ENABLED_LIGHTS_LIGHT7 (1 << 14) +#define NV10TCL_CLIP_PLANE_ENABLE(x) (0x000003c0+((x)*4)) +#define NV10TCL_CLIP_PLANE_ENABLE__SIZE 0x00000008 +#define NV10TCL_CLIP_PLANE_ENABLE_FALSE 0x00000000 +#define NV10TCL_CLIP_PLANE_ENABLE_EYE_LINEAR 0x00002400 +#define NV10TCL_CLIP_PLANE_ENABLE_OBJECT_LINEAR 0x00002401 +#define NV10TCL_TX_MATRIX_ENABLE(x) (0x000003e0+((x)*4)) +#define NV10TCL_TX_MATRIX_ENABLE__SIZE 0x00000002 +#define NV10TCL_VIEW_MATRIX_ENABLE 0x000003e8 +#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW1 (1 << 0) +#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW0 (1 << 1) +#define NV10TCL_VIEW_MATRIX_ENABLE_PROJECTION (1 << 2) +#define NV10TCL_POINT_SIZE 0x000003ec +#define NV10TCL_MODELVIEW0_MATRIX(x) (0x00000400+((x)*4)) +#define NV10TCL_MODELVIEW0_MATRIX__SIZE 0x00000010 +#define NV10TCL_MODELVIEW1_MATRIX(x) (0x00000440+((x)*4)) +#define NV10TCL_MODELVIEW1_MATRIX__SIZE 0x00000010 +#define NV10TCL_INVERSE_MODELVIEW0_MATRIX(x) (0x00000480+((x)*4)) +#define NV10TCL_INVERSE_MODELVIEW0_MATRIX__SIZE 0x00000010 +#define NV10TCL_INVERSE_MODELVIEW1_MATRIX(x) (0x000004c0+((x)*4)) +#define NV10TCL_INVERSE_MODELVIEW1_MATRIX__SIZE 0x00000010 +#define NV10TCL_PROJECTION_MATRIX(x) (0x00000500+((x)*4)) +#define NV10TCL_PROJECTION_MATRIX__SIZE 0x00000010 +#define NV10TCL_TX0_MATRIX(x) (0x00000540+((x)*4)) +#define NV10TCL_TX0_MATRIX__SIZE 0x00000010 +#define NV10TCL_TX1_MATRIX(x) (0x00000580+((x)*4)) +#define NV10TCL_TX1_MATRIX__SIZE 0x00000010 +#define NV10TCL_CLIP_PLANE_A(x) (0x00000600+((x)*16)) +#define NV10TCL_CLIP_PLANE_A__SIZE 0x00000008 +#define NV10TCL_CLIP_PLANE_B(x) (0x00000604+((x)*16)) +#define NV10TCL_CLIP_PLANE_B__SIZE 0x00000008 +#define NV10TCL_CLIP_PLANE_C(x) (0x00000608+((x)*16)) +#define NV10TCL_CLIP_PLANE_C__SIZE 0x00000008 +#define NV10TCL_CLIP_PLANE_D(x) (0x0000060c+((x)*16)) +#define NV10TCL_CLIP_PLANE_D__SIZE 0x00000008 +#define NV10TCL_FOG_EQUATION_CONSTANT 0x00000680 +#define NV10TCL_FOG_EQUATION_LINEAR 0x00000684 +#define NV10TCL_FOG_EQUATION_QUADRATIC 0x00000688 +#define NV10TCL_FRONT_MATERIAL_SHININESS(x) (0x000006a0+((x)*4)) +#define NV10TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000006c4 +#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000006c8 +#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000006cc +#define NV10TCL_VIEWPORT_SCALE_X 0x000006e8 +#define NV10TCL_VIEWPORT_SCALE_Y 0x000006ec +#define NV10TCL_VIEWPORT_SCALE_Z 0x000006f0 +#define NV10TCL_VIEWPORT_SCALE_W 0x000006f4 +#define NV10TCL_POINT_PARAMETER(x) (0x000006f8+((x)*4)) +#define NV10TCL_POINT_PARAMETER__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00000800+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00000804+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00000808+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000080c+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00000810+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00000814+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00000818+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000081c+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00000820+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008 +#define NV10TCL_LIGHT_HALF_VECTOR_X(x) (0x00000828+((x)*128)) +#define NV10TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008 +#define NV10TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000082c+((x)*128)) +#define NV10TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008 +#define NV10TCL_LIGHT_HALF_VECTOR_Z(x) (0x00000830+((x)*128)) +#define NV10TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008 +#define NV10TCL_LIGHT_DIRECTION_X(x) (0x00000834+((x)*128)) +#define NV10TCL_LIGHT_DIRECTION_X__SIZE 0x00000008 +#define NV10TCL_LIGHT_DIRECTION_Y(x) (0x00000838+((x)*128)) +#define NV10TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008 +#define NV10TCL_LIGHT_DIRECTION_Z(x) (0x0000083c+((x)*128)) +#define NV10TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00000840+((x)*128)) +#define NV10TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00000844+((x)*128)) +#define NV10TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00000848+((x)*128)) +#define NV10TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_DIR_X(x) (0x0000084c+((x)*128)) +#define NV10TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_DIR_Y(x) (0x00000850+((x)*128)) +#define NV10TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_DIR_Z(x) (0x00000854+((x)*128)) +#define NV10TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00000858+((x)*128)) +#define NV10TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008 +#define NV10TCL_LIGHT_POSITION_X(x) (0x0000085c+((x)*128)) +#define NV10TCL_LIGHT_POSITION_X__SIZE 0x00000008 +#define NV10TCL_LIGHT_POSITION_Y(x) (0x00000860+((x)*128)) +#define NV10TCL_LIGHT_POSITION_Y__SIZE 0x00000008 +#define NV10TCL_LIGHT_POSITION_Z(x) (0x00000864+((x)*128)) +#define NV10TCL_LIGHT_POSITION_Z__SIZE 0x00000008 +#define NV10TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00000868+((x)*128)) +#define NV10TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008 +#define NV10TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000086c+((x)*128)) +#define NV10TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008 +#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00000870+((x)*128)) +#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008 +#define NV10TCL_VERTEX_POS_3F_X 0x00000c00 +#define NV10TCL_VERTEX_POS_3F_Y 0x00000c04 +#define NV10TCL_VERTEX_POS_3F_Z 0x00000c08 +#define NV10TCL_VERTEX_POS_4F_X 0x00000c18 +#define NV10TCL_VERTEX_POS_4F_Y 0x00000c1c +#define NV10TCL_VERTEX_POS_4F_Z 0x00000c20 +#define NV10TCL_VERTEX_POS_4F_W 0x00000c24 +#define NV10TCL_VERTEX_NOR_3F_X 0x00000c30 +#define NV10TCL_VERTEX_NOR_3F_Y 0x00000c34 +#define NV10TCL_VERTEX_NOR_3F_Z 0x00000c38 +#define NV10TCL_VERTEX_NOR_3I_XY 0x00000c40 +#define NV10TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 +#define NV10TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff +#define NV10TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 +#define NV10TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 +#define NV10TCL_VERTEX_NOR_3I_Z 0x00000c44 +#define NV10TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 +#define NV10TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff +#define NV10TCL_VERTEX_COL_4F_R 0x00000c50 +#define NV10TCL_VERTEX_COL_4F_G 0x00000c54 +#define NV10TCL_VERTEX_COL_4F_B 0x00000c58 +#define NV10TCL_VERTEX_COL_4F_A 0x00000c5c +#define NV10TCL_VERTEX_COL_3F_R 0x00000c60 +#define NV10TCL_VERTEX_COL_3F_G 0x00000c64 +#define NV10TCL_VERTEX_COL_3F_B 0x00000c68 +#define NV10TCL_VERTEX_COL_4I 0x00000c6c +#define NV10TCL_VERTEX_COL_4I_R_SHIFT 0 +#define NV10TCL_VERTEX_COL_4I_R_MASK 0x000000ff +#define NV10TCL_VERTEX_COL_4I_G_SHIFT 8 +#define NV10TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 +#define NV10TCL_VERTEX_COL_4I_B_SHIFT 16 +#define NV10TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 +#define NV10TCL_VERTEX_COL_4I_A_SHIFT 24 +#define NV10TCL_VERTEX_COL_4I_A_MASK 0xff000000 +#define NV10TCL_VERTEX_COL2_3F_R 0x00000c80 +#define NV10TCL_VERTEX_COL2_3F_G 0x00000c84 +#define NV10TCL_VERTEX_COL2_3F_B 0x00000c88 +#define NV10TCL_VERTEX_COL2_3I 0x00000c8c +#define NV10TCL_VERTEX_COL2_3I_R_SHIFT 0 +#define NV10TCL_VERTEX_COL2_3I_R_MASK 0x000000ff +#define NV10TCL_VERTEX_COL2_3I_G_SHIFT 8 +#define NV10TCL_VERTEX_COL2_3I_G_MASK 0x0000ff00 +#define NV10TCL_VERTEX_COL2_3I_B_SHIFT 16 +#define NV10TCL_VERTEX_COL2_3I_B_MASK 0x00ff0000 +#define NV10TCL_VERTEX_TX0_2F_S 0x00000c90 +#define NV10TCL_VERTEX_TX0_2F_T 0x00000c94 +#define NV10TCL_VERTEX_TX0_2I 0x00000c98 +#define NV10TCL_VERTEX_TX0_2I_S_SHIFT 0 +#define NV10TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX0_2I_T_SHIFT 16 +#define NV10TCL_VERTEX_TX0_2I_T_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX0_4F_S 0x00000ca0 +#define NV10TCL_VERTEX_TX0_4F_T 0x00000ca4 +#define NV10TCL_VERTEX_TX0_4F_R 0x00000ca8 +#define NV10TCL_VERTEX_TX0_4F_Q 0x00000cac +#define NV10TCL_VERTEX_TX0_4I_ST 0x00000cb0 +#define NV10TCL_VERTEX_TX0_4I_ST_S_SHIFT 0 +#define NV10TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX0_4I_ST_T_SHIFT 16 +#define NV10TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX0_4I_RQ 0x00000cb4 +#define NV10TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0 +#define NV10TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16 +#define NV10TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX1_2F_S 0x00000cb8 +#define NV10TCL_VERTEX_TX1_2F_T 0x00000cbc +#define NV10TCL_VERTEX_TX1_2I 0x00000cc0 +#define NV10TCL_VERTEX_TX1_2I_S_SHIFT 0 +#define NV10TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX1_2I_T_SHIFT 16 +#define NV10TCL_VERTEX_TX1_2I_T_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX1_4F_S 0x00000cc8 +#define NV10TCL_VERTEX_TX1_4F_T 0x00000ccc +#define NV10TCL_VERTEX_TX1_4F_R 0x00000cd0 +#define NV10TCL_VERTEX_TX1_4F_Q 0x00000cd4 +#define NV10TCL_VERTEX_TX1_4I_ST 0x00000cd8 +#define NV10TCL_VERTEX_TX1_4I_ST_S_SHIFT 0 +#define NV10TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX1_4I_ST_T_SHIFT 16 +#define NV10TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX1_4I_RQ 0x00000cdc +#define NV10TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0 +#define NV10TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16 +#define NV10TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000 +#define NV10TCL_VERTEX_FOG_1F 0x00000ce0 +#define NV10TCL_VERTEX_WGH_1F 0x00000ce4 +#define NV10TCL_EDGEFLAG_ENABLE 0x00000cec +#define NV10TCL_VERTEX_ARRAY_VALIDATE 0x00000cf0 +#define NV10TCL_VERTEX_ARRAY_OFFSET_POS 0x00000d00 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS 0x00000d04 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_COL 0x00000d08 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL 0x00000d0c +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_COL2 0x00000d10 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2 0x00000d14 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_TX0 0x00000d18 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0 0x00000d1c +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_TX1 0x00000d20 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1 0x00000d24 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_NOR 0x00000d28 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR 0x00000d2c +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_WGH 0x00000d30 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH 0x00000d34 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_FOG 0x00000d38 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG 0x00000d3c +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_BEGIN_END 0x00000dfc +#define NV10TCL_VERTEX_BEGIN_END_STOP 0x00000000 +#define NV10TCL_VERTEX_BEGIN_END_POINTS 0x00000001 +#define NV10TCL_VERTEX_BEGIN_END_LINES 0x00000002 +#define NV10TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 +#define NV10TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 +#define NV10TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005 +#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV10TCL_VERTEX_BEGIN_END_QUADS 0x00000008 +#define NV10TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV10TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a +#define NV10TCL_DRAW_INDEX 0x00000e00 +#define NV10TCL_DRAW_INDEX_I0_SHIFT 0 +#define NV10TCL_DRAW_INDEX_I0_MASK 0x0000ffff +#define NV10TCL_DRAW_INDEX_I1_SHIFT 24 +#define NV10TCL_DRAW_INDEX_I1_MASK 0xff000000 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END 0x000013fc +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_STOP 0x00000000 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POINTS 0x00000001 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINES 0x00000002 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_LOOP 0x00000003 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_STRIP 0x00000004 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLES 0x00000005 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUADS 0x00000008 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POLYGON 0x0000000a +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS 0x00001400 +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_SHIFT 0 +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_MASK 0x0000ffff +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_SHIFT 24 +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_MASK 0xff000000 +#define NV10TCL_VERTEX_ARRAY_DATA 0x00001800 + + +#define NV04_CONTEXT_COLOR_KEY 0x00000057 + + + +#define NV03_CONTEXT_SURFACES_2D 0x00000058 + +#define NV03_CONTEXT_SURFACES_2D_SYNCHRONIZE 0x00000100 +#define NV03_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180 +#define NV03_CONTEXT_SURFACES_2D_DMA_SOURCE 0x00000184 +#define NV03_CONTEXT_SURFACES_2D_DMA_DESTIN 0x00000188 +#define NV03_CONTEXT_SURFACES_2D_COLOR_FORMAT 0x00000300 +#define NV03_CONTEXT_SURFACES_2D_PITCH 0x00000304 +#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0 +#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff +#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16 +#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000 +#define NV03_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308 +#define NV03_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c + + +#define NV03_CONTEXT_SURFACES_3D 0x0000005a + +#define NV03_CONTEXT_SURFACES_3D_SYNCHRONIZE 0x00000100 +#define NV03_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180 +#define NV03_CONTEXT_SURFACES_3D_DMA_SURFACE 0x00000184 +#define NV03_CONTEXT_SURFACES_3D_PITCH 0x00000300 +#define NV03_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x00000304 +#define NV03_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000308 + + +#define NV04_RENDER_SOLID_LINE 0x0000005c + +#define NV04_RENDER_SOLID_LINE_SURFACE 0x00000198 + + +#define NV04_RENDER_SOLID_TRIANGLE 0x0000005d + + + +#define NV04_RENDER_SOLID_RECTANGLE 0x0000005e + +#define NV04_RENDER_SOLID_RECTANGLE_SURFACE 0x00000198 + + +#define NV04_IMAGE_BLIT 0x0000005f + +#define NV04_IMAGE_BLIT_NOP 0x00000100 +#define NV04_IMAGE_BLIT_NOTIFY 0x00000104 +#define NV04_IMAGE_BLIT_DMA_NOTIFY 0x00000180 +#define NV04_IMAGE_BLIT_COLOR_KEY 0x00000184 +#define NV04_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188 +#define NV04_IMAGE_BLIT_PATTERN 0x0000018c +#define NV04_IMAGE_BLIT_ROP 0x00000190 +#define NV04_IMAGE_BLIT_BETA4 0x00000198 +#define NV04_IMAGE_BLIT_SURFACE 0x0000019c +#define NV04_IMAGE_BLIT_OPERATION 0x000002fc +#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV04_IMAGE_BLIT_OPERATION_ROP_AND 0x00000001 +#define NV04_IMAGE_BLIT_OPERATION_BLEND_AND 0x00000002 +#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY 0x00000003 +#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV04_IMAGE_BLIT_OPERATION_BLEND_PREMULT 0x00000005 + + +#define NV04_INDEXED_IMAGE_FROM_CPU 0x00000060 + +#define NV04_INDEXED_IMAGE_FROM_CPU_NOP 0x00000100 +#define NV04_INDEXED_IMAGE_FROM_CPU_NOTIFY 0x00000104 +#define NV04_INDEXED_IMAGE_FROM_CPU_PATCH 0x0000010c +#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 +#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_LUT 0x00000184 +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_FORMAT 0x000003e8 +#define NV04_INDEXED_IMAGE_FROM_CPU_INDEX_FORMAT 0x000003ec +#define NV04_INDEXED_IMAGE_FROM_CPU_LUT_OFFSET 0x000003f0 +#define NV04_INDEXED_IMAGE_FROM_CPU_POINT 0x000003f4 +#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_OUT 0x000003f8 +#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_IN 0x000003fc +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR 0x00000400 + + +#define NV04_IMAGE_FROM_CPU 0x00000061 + +#define NV04_IMAGE_FROM_CPU_BETA4 0x00000198 +#define NV04_IMAGE_FROM_CPU_SURFACE 0x0000019c + + +#define NV10_CONTEXT_SURFACES_2D 0x00000062 + + + +#define NV05_SCALED_IMAGE_FROM_MEMORY 0x00000063 + +#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc +#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000 +#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001 +#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002 + + +#define NV01_IMAGE_SRCCOPY_AND 0x00000064 + +#define NV01_IMAGE_SRCCOPY_AND_NOTIFY 0x00000104 +#define NV01_IMAGE_SRCCOPY_AND_DMA_NOTIFY 0x00000180 +#define NV01_IMAGE_SRCCOPY_AND_IMAGE_OUTPUT 0x00000200 +#define NV01_IMAGE_SRCCOPY_AND_IMAGE_INPUT 0x00000204 + + +#define NV05_INDEXED_IMAGE_FROM_CPU 0x00000064 + +#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_KEY 0x00000188 +#define NV05_INDEXED_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x0000018c +#define NV05_INDEXED_IMAGE_FROM_CPU_PATTERN 0x00000190 +#define NV05_INDEXED_IMAGE_FROM_CPU_ROP 0x00000194 +#define NV05_INDEXED_IMAGE_FROM_CPU_BETA1 0x00000198 +#define NV05_INDEXED_IMAGE_FROM_CPU_BETA4 0x0000019c +#define NV05_INDEXED_IMAGE_FROM_CPU_SURFACE 0x000001a0 +#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000003e0 +#define NV05_INDEXED_IMAGE_FROM_CPU_OPERATION 0x000003e4 +#define NV05_INDEXED_IMAGE_FROM_CPU_INDICES 0x00000400 + + +#define NV05_IMAGE_FROM_CPU 0x00000065 + +#define NV05_IMAGE_FROM_CPU_BETA4 0x00000198 +#define NV05_IMAGE_FROM_CPU_SURFACE 0x0000019c + + +#define NV05_STRETCHED_IMAGE_FROM_CPU 0x00000066 + +#define NV05_STRETCHED_IMAGE_FROM_CPU_BETA4 0x00000194 +#define NV05_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000198 +#define NV05_STRETCHED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8 + + +#define NV04_IMAGE_BLEND_PREMULT 0x00000067 + +#define NV04_IMAGE_BLEND_PREMULT_NOP 0x00000100 +#define NV04_IMAGE_BLEND_PREMULT_NOTIFY 0x00000104 +#define NV04_IMAGE_BLEND_PREMULT_DMA_NOTIFY 0x00000180 +#define NV04_IMAGE_BLEND_PREMULT_IMAGE_OUTPUT 0x00000200 +#define NV04_IMAGE_BLEND_PREMULT_BETA_INPUT 0x00000204 +#define NV04_IMAGE_BLEND_PREMULT_IMAGE_INPUT 0x00000208 + + +#define NV03_CHANNEL_PIO 0x0000006a + + + +#define NV03_CHANNEL_DMA 0x0000006b + + + +#define NV04_BETA_SOLID 0x00000072 + +#define NV04_BETA_SOLID_NOP 0x00000100 +#define NV04_BETA_SOLID_NOTIFY 0x00000104 +#define NV04_BETA_SOLID_DMA_NOTIFY 0x00000180 +#define NV04_BETA_SOLID_BETA_OUTPUT 0x00000200 +#define NV04_BETA_SOLID_BETA_FACTOR 0x00000300 + + +#define NV04_STRETCHED_IMAGE_FROM_CPU 0x00000076 + + + +#define NV04_SCALED_IMAGE_FROM_MEMORY 0x00000077 + +#define NV04_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100 +#define NV04_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104 +#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180 +#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184 +#define NV04_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188 +#define NV04_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c +#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190 +#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA4 0x00000194 +#define NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000198 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT 0x00000310 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE 0x00000314 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_DU_DX 0x00000318 +#define NV04_SCALED_IMAGE_FROM_MEMORY_DV_DY 0x0000031c +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE 0x00000400 +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT 0x00000404 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_MASK 0x00ff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER 0x00010000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CORNER 0x00020000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_SHIFT 24 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_MASK 0xff000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE 0x00000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_BILINEAR 0x01000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_ADDRESS 0x00000408 +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT 0x0000040c +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_MASK 0xffff0000 + + +#define NV10_TEXTURE_FROM_CPU 0x0000007b + +#define NV10_TEXTURE_FROM_CPU_NOP 0x00000100 +#define NV10_TEXTURE_FROM_CPU_NOTIFY 0x00000104 +#define NV10_TEXTURE_FROM_CPU_WAIT_FOR_IDLE 0x00000108 +#define NV10_TEXTURE_FROM_CPU_PM_TRIGGER 0x00000140 +#define NV10_TEXTURE_FROM_CPU_DMA_NOTIFY 0x00000180 +#define NV10_TEXTURE_FROM_CPU_SURFACE 0x00000184 +#define NV10_TEXTURE_FROM_CPU_COLOR_FORMAT 0x00000300 +#define NV10_TEXTURE_FROM_CPU_POINT 0x00000304 +#define NV10_TEXTURE_FROM_CPU_POINT_X_SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_POINT_X_MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_POINT_Y_SHIFT 16 +#define NV10_TEXTURE_FROM_CPU_POINT_Y_MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_SIZE 0x00000308 +#define NV10_TEXTURE_FROM_CPU_SIZE_W_SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_SIZE_W_MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_SIZE_H_SHIFT 16 +#define NV10_TEXTURE_FROM_CPU_SIZE_H_MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL 0x0000030c +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_SHIFT 16 +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL 0x00000310 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_SHIFT 16 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) +#define NV10_TEXTURE_FROM_CPU_COLOR__SIZE 0x00000700 + + +#define NV10_VIDEO_DISPLAY 0x0000007c + + + +#define NV10_DVD_SUBPICTURE 0x00000088 + + + +#define NV10_SCALED_IMAGE_FROM_MEMORY 0x00000089 + +#define NV10_SCALED_IMAGE_FROM_MEMORY_WAIT_FOR_IDLE 0x00000108 + + +#define NV10_IMAGE_FROM_CPU 0x0000008a + +#define NV10_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8 + + +#define NV10_CONTEXT_SURFACES_3D 0x00000093 + + + +#define NV10_DX5_TEXTURE_TRIANGLE 0x00000094 + + + +#define NV10_DX6_MULTI_TEXTURE_TRIANGLE 0x00000095 + + + +#define NV11TCL 0x00000096 + +#define NV11TCL_COLOR_LOGIC_OP_ENABLE 0x00000d40 +#define NV11TCL_COLOR_LOGIC_OP_OP 0x00000d44 +#define NV11TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 +#define NV11TCL_COLOR_LOGIC_OP_OP_AND 0x00001501 +#define NV11TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 +#define NV11TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503 +#define NV11TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 +#define NV11TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505 +#define NV11TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506 +#define NV11TCL_COLOR_LOGIC_OP_OP_OR 0x00001507 +#define NV11TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508 +#define NV11TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 +#define NV11TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a +#define NV11TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b +#define NV11TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c +#define NV11TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d +#define NV11TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e +#define NV11TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f + + +#define NV20TCL 0x00000097 + +#define NV20TCL_NOP 0x00000100 +#define NV20TCL_NOTIFY 0x00000104 +#define NV20TCL_DMA_NOTIFY 0x00000180 +#define NV20TCL_DMA_IN_MEMORY0 0x00000184 +#define NV20TCL_DMA_IN_MEMORY1 0x00000188 +#define NV20TCL_DMA_IN_MEMORY2 0x00000194 +#define NV20TCL_DMA_IN_MEMORY3 0x00000198 +#define NV20TCL_DMA_IN_MEMORY6 0x000001a4 +#define NV20TCL_DMA_IN_MEMORY7 0x000001a8 +#define NV20TCL_VIEWPORT_HORIZ 0x00000200 +#define NV20TCL_VIEWPORT_VERT 0x00000204 +#define NV20TCL_BUFFER_FORMAT 0x00000208 +#define NV20TCL_BUFFER_PITCH 0x0000020c +#define NV20TCL_COLOR_OFFSET 0x00000210 +#define NV20TCL_ZETA_OFFSET 0x00000214 +#define NV20TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4)) +#define NV20TCL_RC_IN_ALPHA__SIZE 0x00000008 +#define NV20TCL_RC_FINAL0 0x00000288 +#define NV20TCL_RC_FINAL1 0x0000028c +#define NV20TCL_LIGHT_CONTROL 0x00000294 +#define NV20TCL_FOG_MODE 0x0000029c +#define NV20TCL_FOG_COORD_DIST 0x000002a0 +#define NV20TCL_FOG_ENABLE 0x000002a4 +#define NV20TCL_FOG_COLOR 0x000002a8 +#define NV20TCL_VIEWPORT_CLIP_MODE 0x000002b4 +#define NV20TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4)) +#define NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV20TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4)) +#define NV20TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV20TCL_ALPHA_FUNC_ENABLE 0x00000300 +#define NV20TCL_BLEND_FUNC_ENABLE 0x00000304 +#define NV20TCL_CULL_FACE_ENABLE 0x00000308 +#define NV20TCL_DEPTH_TEST_ENABLE 0x0000030c +#define NV20TCL_DITHER_ENABLE 0x00000310 +#define NV20TCL_LIGHTING_ENABLE 0x00000314 +#define NV20TCL_POINT_PARAMETERS_ENABLE 0x00000318 +#define NV20TCL_LINE_SMOOTH_ENABLE 0x00000320 +#define NV20TCL_POLYGON_SMOOTH_ENABLE 0x00000324 +#define NV20TCL_STENCIL_ENABLE 0x0000032c +#define NV20TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330 +#define NV20TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334 +#define NV20TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338 +#define NV20TCL_ALPHA_FUNC_FUNC 0x0000033c +#define NV20TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 +#define NV20TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 +#define NV20TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 +#define NV20TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 +#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV20TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV20TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 +#define NV20TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 +#define NV20TCL_ALPHA_FUNC_REF 0x00000340 +#define NV20TCL_BLEND_FUNC_SRC 0x00000344 +#define NV20TCL_BLEND_FUNC_SRC_ZERO 0x00000000 +#define NV20TCL_BLEND_FUNC_SRC_ONE 0x00000001 +#define NV20TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV20TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV20TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307 +#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308 +#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV20TCL_BLEND_FUNC_DST 0x00000348 +#define NV20TCL_BLEND_FUNC_DST_ZERO 0x00000000 +#define NV20TCL_BLEND_FUNC_DST_ONE 0x00000001 +#define NV20TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV20TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV20TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307 +#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308 +#define NV20TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV20TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV20TCL_BLEND_COLOR 0x0000034c +#define NV20TCL_BLEND_EQUATION 0x00000350 +#define NV20TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 +#define NV20TCL_BLEND_EQUATION_MIN 0x00008007 +#define NV20TCL_BLEND_EQUATION_MAX 0x00008008 +#define NV20TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV20TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV20TCL_DEPTH_FUNC 0x00000354 +#define NV20TCL_DEPTH_FUNC_NEVER 0x00000200 +#define NV20TCL_DEPTH_FUNC_LESS 0x00000201 +#define NV20TCL_DEPTH_FUNC_EQUAL 0x00000202 +#define NV20TCL_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV20TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV20TCL_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV20TCL_DEPTH_FUNC_ALWAYS 0x00000207 +#define NV20TCL_COLOR_MASK 0x00000358 +#define NV20TCL_DEPTH_WRITE_ENABLE 0x0000035c +#define NV20TCL_STENCIL_MASK 0x00000360 +#define NV20TCL_STENCIL_FUNC_FUNC 0x00000364 +#define NV20TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200 +#define NV20TCL_STENCIL_FUNC_FUNC_LESS 0x00000201 +#define NV20TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202 +#define NV20TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203 +#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV20TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV20TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206 +#define NV20TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207 +#define NV20TCL_STENCIL_FUNC_REF 0x00000368 +#define NV20TCL_STENCIL_FUNC_MASK 0x0000036c +#define NV20TCL_STENCIL_OP_FAIL 0x00000370 +#define NV20TCL_STENCIL_OP_FAIL_ZERO 0x00000000 +#define NV20TCL_STENCIL_OP_FAIL_INVERT 0x0000150a +#define NV20TCL_STENCIL_OP_FAIL_KEEP 0x00001e00 +#define NV20TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01 +#define NV20TCL_STENCIL_OP_FAIL_INCR 0x00001e02 +#define NV20TCL_STENCIL_OP_FAIL_DECR 0x00001e03 +#define NV20TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507 +#define NV20TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508 +#define NV20TCL_STENCIL_OP_ZFAIL 0x00000374 +#define NV20TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000 +#define NV20TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a +#define NV20TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00 +#define NV20TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01 +#define NV20TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02 +#define NV20TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03 +#define NV20TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV20TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV20TCL_STENCIL_OP_ZPASS 0x00000378 +#define NV20TCL_STENCIL_OP_ZPASS_ZERO 0x00000000 +#define NV20TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a +#define NV20TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00 +#define NV20TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01 +#define NV20TCL_STENCIL_OP_ZPASS_INCR 0x00001e02 +#define NV20TCL_STENCIL_OP_ZPASS_DECR 0x00001e03 +#define NV20TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV20TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV20TCL_SHADE_MODEL 0x0000037c +#define NV20TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV20TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV20TCL_LINE_WIDTH 0x00000380 +#define NV20TCL_POLYGON_OFFSET_FACTOR 0x00000384 +#define NV20TCL_POLYGON_OFFSET_UNITS 0x00000388 +#define NV20TCL_POLYGON_MODE_FRONT 0x0000038c +#define NV20TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV20TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV20TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV20TCL_POLYGON_MODE_BACK 0x00000390 +#define NV20TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV20TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV20TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV20TCL_DEPTH_RANGE_NEAR 0x00000394 +#define NV20TCL_DEPTH_RANGE_FAR 0x00000398 +#define NV20TCL_CULL_FACE 0x0000039c +#define NV20TCL_CULL_FACE_FRONT 0x00000404 +#define NV20TCL_CULL_FACE_BACK 0x00000405 +#define NV20TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV20TCL_FRONT_FACE 0x000003a0 +#define NV20TCL_FRONT_FACE_CW 0x00000900 +#define NV20TCL_FRONT_FACE_CCW 0x00000901 +#define NV20TCL_NORMALIZE_ENABLE 0x000003a4 +#define NV20TCL_SEPARATE_SPECULAR_ENABLE 0x000003b8 +#define NV20TCL_ENABLED_LIGHTS 0x000003bc +#define NV20TCL_CLIP_PLANE_ENABLE(x) (0x000003c0+((x)*4)) +#define NV20TCL_CLIP_PLANE_ENABLE__SIZE 0x00000010 +#define NV20TCL_TX_MATRIX_ENABLE(x) (0x00000420+((x)*4)) +#define NV20TCL_TX_MATRIX_ENABLE__SIZE 0x00000004 +#define NV20TCL_POINT_SIZE 0x0000043c +#define NV20TCL_MODELVIEW_MATRIX(x) (0x00000480+((x)*4)) +#define NV20TCL_MODELVIEW_MATRIX__SIZE 0x00000010 +#define NV20TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4)) +#define NV20TCL_PROJECTION_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX0_MATRIX(x) (0x000006c0+((x)*4)) +#define NV20TCL_TX0_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX1_MATRIX(x) (0x00000700+((x)*4)) +#define NV20TCL_TX1_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX2_MATRIX(x) (0x00000740+((x)*4)) +#define NV20TCL_TX2_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX3_MATRIX(x) (0x00000780+((x)*4)) +#define NV20TCL_TX3_MATRIX__SIZE 0x00000010 +#define NV20TCL_FOG_EQUATION_CONSTANT 0x000009c0 +#define NV20TCL_FOG_EQUATION_LINEAR 0x000009c4 +#define NV20TCL_FOG_EQUATION_QUADRATIC 0x000009c8 +#define NV20TCL_VIEWPORT_SCALE0_X 0x00000a20 +#define NV20TCL_VIEWPORT_SCALE0_Y 0x00000a24 +#define NV20TCL_VIEWPORT_SCALE0_Z 0x00000a28 +#define NV20TCL_VIEWPORT_SCALE0_W 0x00000a2c +#define NV20TCL_POINT_PARAMETER(x) (0x00000a30+((x)*4)) +#define NV20TCL_POINT_PARAMETER__SIZE 0x00000007 +#define NV20TCL_RC_CONSTANT_COLOR0(x) (0x00000a60+((x)*4)) +#define NV20TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008 +#define NV20TCL_RC_CONSTANT_COLOR1(x) (0x00000a80+((x)*4)) +#define NV20TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008 +#define NV20TCL_RC_OUT_ALPHA(x) (0x00000aa0+((x)*4)) +#define NV20TCL_RC_OUT_ALPHA__SIZE 0x00000008 +#define NV20TCL_RC_IN_RGB(x) (0x00000ac0+((x)*4)) +#define NV20TCL_RC_IN_RGB__SIZE 0x00000008 +#define NV20TCL_VIEWPORT_SCALE1_X 0x00000af0 +#define NV20TCL_VIEWPORT_SCALE1_Y 0x00000af4 +#define NV20TCL_VIEWPORT_SCALE1_Z 0x00000af8 +#define NV20TCL_VIEWPORT_SCALE1_W 0x00000afc +#define NV20TCL_VP_UPLOAD_INST(x) (0x00000b00+((x)*4)) +#define NV20TCL_VP_UPLOAD_INST__SIZE 0x00000004 +#define NV20TCL_VP_UPLOAD_CONST(x) (0x00000b80+((x)*4)) +#define NV20TCL_VP_UPLOAD_CONST__SIZE 0x00000004 +#define NV20TCL_POLYGON_STIPPLE_ENABLE 0x0000147c +#define NV20TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) +#define NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 +#define NV20TCL_VERTEX_POS_3F_X 0x00001500 +#define NV20TCL_VERTEX_POS_3F_Y 0x00001504 +#define NV20TCL_VERTEX_POS_3F_Z 0x00001508 +#define NV20TCL_VERTEX_POS_4F_X 0x00001518 +#define NV20TCL_VERTEX_POS_4F_Y 0x0000151c +#define NV20TCL_VERTEX_POS_4F_Z 0x00001520 +#define NV20TCL_VERTEX_POS_3I_XY 0x00001528 +#define NV20TCL_VERTEX_POS_3I_XY_X_SHIFT 0 +#define NV20TCL_VERTEX_POS_3I_XY_X_MASK 0x0000ffff +#define NV20TCL_VERTEX_POS_3I_XY_Y_SHIFT 16 +#define NV20TCL_VERTEX_POS_3I_XY_Y_MASK 0xffff0000 +#define NV20TCL_VERTEX_POS_3I_Z 0x0000152c +#define NV20TCL_VERTEX_POS_3I_Z_Z_SHIFT 0 +#define NV20TCL_VERTEX_POS_3I_Z_Z_MASK 0x0000ffff +#define NV20TCL_VERTEX_NOR_3F_X 0x00001530 +#define NV20TCL_VERTEX_NOR_3F_Y 0x00001534 +#define NV20TCL_VERTEX_NOR_3F_Z 0x00001538 +#define NV20TCL_VERTEX_NOR_3I_XY 0x00001540 +#define NV20TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 +#define NV20TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff +#define NV20TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 +#define NV20TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 +#define NV20TCL_VERTEX_NOR_3I_Z 0x00001544 +#define NV20TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 +#define NV20TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff +#define NV20TCL_VERTEX_COL_4F_X 0x00001550 +#define NV20TCL_VERTEX_COL_4F_Y 0x00001554 +#define NV20TCL_VERTEX_COL_4F_Z 0x00001558 +#define NV20TCL_VERTEX_COL_4F_W 0x0000155c +#define NV20TCL_VERTEX_COL_3F_X 0x00001560 +#define NV20TCL_VERTEX_COL_3F_Y 0x00001564 +#define NV20TCL_VERTEX_COL_3F_Z 0x00001568 +#define NV20TCL_VERTEX_COL_4I 0x0000156c +#define NV20TCL_VERTEX_COL_4I_R_SHIFT 0 +#define NV20TCL_VERTEX_COL_4I_R_MASK 0x000000ff +#define NV20TCL_VERTEX_COL_4I_G_SHIFT 8 +#define NV20TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 +#define NV20TCL_VERTEX_COL_4I_B_SHIFT 16 +#define NV20TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 +#define NV20TCL_VERTEX_COL_4I_A_SHIFT 24 +#define NV20TCL_VERTEX_COL_4I_A_MASK 0xff000000 +#define NV20TCL_VERTEX_COL2_3F_X 0x00001580 +#define NV20TCL_VERTEX_COL2_3F_Y 0x00001584 +#define NV20TCL_VERTEX_COL2_3F_Z 0x00001588 +#define NV20TCL_VERTEX_COL2_4I 0x0000158c +#define NV20TCL_VERTEX_COL2_4I_R_SHIFT 0 +#define NV20TCL_VERTEX_COL2_4I_R_MASK 0x000000ff +#define NV20TCL_VERTEX_COL2_4I_G_SHIFT 8 +#define NV20TCL_VERTEX_COL2_4I_G_MASK 0x0000ff00 +#define NV20TCL_VERTEX_COL2_4I_B_SHIFT 16 +#define NV20TCL_VERTEX_COL2_4I_B_MASK 0x00ff0000 +#define NV20TCL_VERTEX_COL2_4I_A_SHIFT 24 +#define NV20TCL_VERTEX_COL2_4I_A_MASK 0xff000000 +#define NV20TCL_VERTEX_TX0_2F_S 0x00001590 +#define NV20TCL_VERTEX_TX0_2F_T 0x00001594 +#define NV20TCL_VERTEX_TX0_2I 0x00001598 +#define NV20TCL_VERTEX_TX0_2I_S_SHIFT 0 +#define NV20TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX0_2I_T_SHIFT 16 +#define NV20TCL_VERTEX_TX0_2I_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX0_4F_S 0x000015a0 +#define NV20TCL_VERTEX_TX0_4F_T 0x000015a4 +#define NV20TCL_VERTEX_TX0_4F_R 0x000015a8 +#define NV20TCL_VERTEX_TX0_4F_Q 0x000015ac +#define NV20TCL_VERTEX_TX0_4I_ST 0x000015b0 +#define NV20TCL_VERTEX_TX0_4I_ST_S_SHIFT 0 +#define NV20TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX0_4I_ST_T_SHIFT 16 +#define NV20TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX0_4I_RQ 0x000015b4 +#define NV20TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0 +#define NV20TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16 +#define NV20TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX1_2F_S 0x000015b8 +#define NV20TCL_VERTEX_TX1_2F_T 0x000015bc +#define NV20TCL_VERTEX_TX1_2I 0x000015c0 +#define NV20TCL_VERTEX_TX1_2I_S_SHIFT 0 +#define NV20TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX1_2I_T_SHIFT 16 +#define NV20TCL_VERTEX_TX1_2I_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX1_4F_S 0x000015c8 +#define NV20TCL_VERTEX_TX1_4F_T 0x000015cc +#define NV20TCL_VERTEX_TX1_4F_R 0x000015d0 +#define NV20TCL_VERTEX_TX1_4F_Q 0x000015d4 +#define NV20TCL_VERTEX_TX1_4I_ST 0x000015d8 +#define NV20TCL_VERTEX_TX1_4I_ST_S_SHIFT 0 +#define NV20TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX1_4I_ST_T_SHIFT 16 +#define NV20TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX1_4I_RQ 0x000015dc +#define NV20TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0 +#define NV20TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16 +#define NV20TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX2_2F_S 0x000015e0 +#define NV20TCL_VERTEX_TX2_2F_T 0x000015e4 +#define NV20TCL_VERTEX_TX2_2I 0x000015e8 +#define NV20TCL_VERTEX_TX2_2I_S_SHIFT 0 +#define NV20TCL_VERTEX_TX2_2I_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX2_2I_T_SHIFT 16 +#define NV20TCL_VERTEX_TX2_2I_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX2_4F_S 0x000015f0 +#define NV20TCL_VERTEX_TX2_4F_T 0x000015f4 +#define NV20TCL_VERTEX_TX2_4F_R 0x000015f8 +#define NV20TCL_VERTEX_TX2_4F_Q 0x000015fc +#define NV20TCL_VERTEX_TX2_4I_ST 0x00001600 +#define NV20TCL_VERTEX_TX2_4I_ST_S_SHIFT 0 +#define NV20TCL_VERTEX_TX2_4I_ST_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX2_4I_ST_T_SHIFT 16 +#define NV20TCL_VERTEX_TX2_4I_ST_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX2_4I_RQ 0x00001604 +#define NV20TCL_VERTEX_TX2_4I_RQ_R_SHIFT 0 +#define NV20TCL_VERTEX_TX2_4I_RQ_R_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX2_4I_RQ_Q_SHIFT 16 +#define NV20TCL_VERTEX_TX2_4I_RQ_Q_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX3_2F_S 0x00001608 +#define NV20TCL_VERTEX_TX3_2F_T 0x0000160c +#define NV20TCL_VERTEX_TX3_2I 0x00001610 +#define NV20TCL_VERTEX_TX3_2I_S_SHIFT 0 +#define NV20TCL_VERTEX_TX3_2I_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX3_2I_T_SHIFT 16 +#define NV20TCL_VERTEX_TX3_2I_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX3_4F_S 0x00001620 +#define NV20TCL_VERTEX_TX3_4F_T 0x00001624 +#define NV20TCL_VERTEX_TX3_4F_R 0x00001628 +#define NV20TCL_VERTEX_TX3_4F_Q 0x0000162c +#define NV20TCL_VERTEX_TX3_4I_ST 0x00001630 +#define NV20TCL_VERTEX_TX3_4I_ST_S_SHIFT 0 +#define NV20TCL_VERTEX_TX3_4I_ST_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX3_4I_ST_T_SHIFT 16 +#define NV20TCL_VERTEX_TX3_4I_ST_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX3_4I_RQ 0x00001634 +#define NV20TCL_VERTEX_TX3_4I_RQ_R_SHIFT 0 +#define NV20TCL_VERTEX_TX3_4I_RQ_R_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX3_4I_RQ_Q_SHIFT 16 +#define NV20TCL_VERTEX_TX3_4I_RQ_Q_MASK 0xffff0000 +#define NV20TCL_EDGEFLAG_ENABLE 0x000016bc +#define NV20TCL_VERTEX_ATTR_OFFSET(x) (0x00001720+((x)*4)) +#define NV20TCL_VERTEX_ATTR_OFFSET__SIZE 0x00000010 +#define NV20TCL_VERTEX_ARRAY_FORMAT(x) (0x00001760+((x)*4)) +#define NV20TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 +#define NV20TCL_VERTEX_ARRAY_FORMAT_TYPE_SHIFT 0 +#define NV20TCL_VERTEX_ARRAY_FORMAT_TYPE_MASK 0x0000000f +#define NV20TCL_VERTEX_ARRAY_FORMAT_FIELDS_SHIFT 4 +#define NV20TCL_VERTEX_ARRAY_FORMAT_FIELDS_MASK 0x000000f0 +#define NV20TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 8 +#define NV20TCL_VERTEX_ARRAY_FORMAT_STRIDE_MASK 0x0000ff00 +#define NV20TCL_COLOR_LOGIC_OP_ENABLE 0x000017bc +#define NV20TCL_COLOR_LOGIC_OP_OP 0x000017c0 +#define NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE 0x000017c4 +#define NV20TCL_TX_SHADER_CULL_MODE 0x000017f8 +#define NV20TCL_VERTEX_BEGIN_END 0x000017fc +#define NV20TCL_VERTEX_BUFFER_DRAW_ARRAYS 0x00001810 +#define NV20TCL_VERTEX_ARRAY_DATA 0x00001818 +#define NV20TCL_TX_OFFSET(x) (0x00001b00+((x)*64)) +#define NV20TCL_TX_OFFSET__SIZE 0x00000004 +#define NV20TCL_TX_FORMAT(x) (0x00001b04+((x)*64)) +#define NV20TCL_TX_FORMAT__SIZE 0x00000004 +#define NV20TCL_TX_WRAP(x) (0x00001b08+((x)*64)) +#define NV20TCL_TX_WRAP__SIZE 0x00000004 +#define NV20TCL_TX_ENABLE(x) (0x00001b0c+((x)*64)) +#define NV20TCL_TX_ENABLE__SIZE 0x00000004 +#define NV20TCL_TX_FILTER(x) (0x00001b14+((x)*64)) +#define NV20TCL_TX_FILTER__SIZE 0x00000004 +#define NV20TCL_TX_BORDER_COLOR(x) (0x00001b24+((x)*64)) +#define NV20TCL_TX_BORDER_COLOR__SIZE 0x00000004 +#define NV20TCL_SCISSOR_HORIZ 0x00001c30 +#define NV20TCL_SCISSOR_VERT 0x00001c50 +#define NV20TCL_MULTISAMPLE_CONTROL 0x00001d7c +#define NV20TCL_CLEAR_VALUE 0x00001d90 +#define NV20TCL_CLEAR_BUFFERS 0x00001d94 +#define NV20TCL_RC_COLOR0 0x00001e20 +#define NV20TCL_RC_COLOR1 0x00001e24 +#define NV20TCL_RC_OUT_RGB(x) (0x00001e40+((x)*4)) +#define NV20TCL_RC_OUT_RGB__SIZE 0x00000008 +#define NV20TCL_RC_ENABLE 0x00001e60 +#define NV20TCL_TX_SHADER_OP 0x00001e70 +#define NV20TCL_VP_UPLOAD_CONST_ID 0x00001ea4 + + +#define NV17TCL 0x00000099 + +#define NV17TCL_DMA_IN_MEMORY4 0x000001ac +#define NV17TCL_DMA_IN_MEMORY5 0x000001b0 +#define NV17TCL_COLOR_MASK_ENABLE 0x000002bc +#define NV17TCL_LMA_DEPTH_BUFFER_PITCH 0x00000d5c +#define NV17TCL_LMA_DEPTH_BUFFER_OFFSET 0x00000d60 +#define NV17TCL_LMA_DEPTH_FILL_VALUE 0x00000d68 +#define NV17TCL_LMA_DEPTH_BUFFER_CLEAR 0x00000d6c +#define NV17TCL_LMA_DEPTH_ENABLE 0x00001658 + + +#define NV20_SWIZZLED_SURFACE 0x0000009e + + + +#define NV12_IMAGE_BLIT 0x0000009f + + + +#define NV30_CONTEXT_SURFACES_2D 0x00000362 + + + +#define NV30_STRETCHED_IMAGE_FROM_CPU 0x00000366 + + + +#define NV30_TEXTURE_FROM_CPU 0x0000037b + + + +#define NV30_SCALED_IMAGE_FROM_MEMORY 0x00000389 + + + +#define NV30_IMAGE_FROM_CPU 0x0000038a + + + +#define NV30TCL 0x00000397 + + + +#define NV30_SWIZZLED_SURFACE 0x0000039e + + + +#define NV35TCL 0x00000497 + + + +#define NV25TCL 0x00000597 + +#define NV25TCL_DMA_IN_MEMORY4 0x0000019c +#define NV25TCL_DMA_IN_MEMORY5 0x000001a0 +#define NV25TCL_DMA_IN_MEMORY8 0x000001ac +#define NV25TCL_DMA_IN_MEMORY9 0x000001b0 + + +#define NV34TCL 0x00000697 + +#define NV34TCL_NOP 0x00000100 +#define NV34TCL_NOTIFY 0x00000104 +#define NV34TCL_DMA_NOTIFY 0x00000180 +#define NV34TCL_DMA_TEXTURE0 0x00000184 +#define NV34TCL_DMA_TEXTURE1 0x00000188 +#define NV34TCL_DMA_COLOR1 0x0000018c +#define NV34TCL_DMA_COLOR0 0x00000194 +#define NV34TCL_DMA_ZETA 0x00000198 +#define NV34TCL_DMA_VTXBUF0 0x0000019c +#define NV34TCL_DMA_VTXBUF1 0x000001a0 +#define NV34TCL_DMA_FENCE 0x000001a4 +#define NV34TCL_DMA_QUERY 0x000001a8 +#define NV34TCL_DMA_IN_MEMORY7 0x000001ac +#define NV34TCL_DMA_IN_MEMORY8 0x000001b0 +#define NV34TCL_RT_HORIZ 0x00000200 +#define NV34TCL_RT_HORIZ_X_SHIFT 0 +#define NV34TCL_RT_HORIZ_X_MASK 0x0000ffff +#define NV34TCL_RT_HORIZ_W_SHIFT 16 +#define NV34TCL_RT_HORIZ_W_MASK 0xffff0000 +#define NV34TCL_RT_VERT 0x00000204 +#define NV34TCL_RT_VERT_Y_SHIFT 0 +#define NV34TCL_RT_VERT_Y_MASK 0x0000ffff +#define NV34TCL_RT_VERT_H_SHIFT 16 +#define NV34TCL_RT_VERT_H_MASK 0xffff0000 +#define NV34TCL_RT_FORMAT 0x00000208 +#define NV34TCL_RT_FORMAT_TYPE_SHIFT 8 +#define NV34TCL_RT_FORMAT_TYPE_MASK 0x00000f00 +#define NV34TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV34TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV34TCL_RT_FORMAT_ZETA_SHIFT 5 +#define NV34TCL_RT_FORMAT_ZETA_MASK 0x000000e0 +#define NV34TCL_RT_FORMAT_ZETA_Z16 0x00000020 +#define NV34TCL_RT_FORMAT_ZETA_Z24S8 0x00000040 +#define NV34TCL_RT_FORMAT_COLOR_SHIFT 0 +#define NV34TCL_RT_FORMAT_COLOR_MASK 0x0000001f +#define NV34TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV34TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV34TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV34TCL_RT_FORMAT_COLOR_B8 0x00000009 +#define NV34TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV34TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV34TCL_COLOR0_PITCH 0x0000020c +#define NV34TCL_COLOR0_PITCH_COLOR0_SHIFT 0 +#define NV34TCL_COLOR0_PITCH_COLOR0_MASK 0x0000ffff +#define NV34TCL_COLOR0_PITCH_ZETA_SHIFT 16 +#define NV34TCL_COLOR0_PITCH_ZETA_MASK 0xffff0000 +#define NV34TCL_COLOR0_OFFSET 0x00000210 +#define NV34TCL_ZETA_OFFSET 0x00000214 +#define NV34TCL_COLOR1_OFFSET 0x00000218 +#define NV34TCL_COLOR1_PITCH 0x0000021c +#define NV34TCL_RT_ENABLE 0x00000220 +#define NV34TCL_RT_ENABLE_MRT (1 << 4) +#define NV34TCL_RT_ENABLE_COLOR3 (1 << 3) +#define NV34TCL_RT_ENABLE_COLOR2 (1 << 2) +#define NV34TCL_RT_ENABLE_COLOR1 (1 << 1) +#define NV34TCL_RT_ENABLE_COLOR0 (1 << 0) +#define NV34TCL_ZETA_PITCH 0x0000022c +#define NV34TCL_LMA_DEPTH_OFFSET 0x00000230 +#define NV34TCL_TX_UNITS_ENABLE 0x0000023c +#define NV34TCL_TX_UNITS_ENABLE_TX0 (1 << 0) +#define NV34TCL_TX_UNITS_ENABLE_TX1 (1 << 1) +#define NV34TCL_TX_UNITS_ENABLE_TX2 (1 << 2) +#define NV34TCL_TX_UNITS_ENABLE_TX3 (1 << 3) +#define NV34TCL_TX_MATRIX_ENABLE(x) (0x00000240+((x)*4)) +#define NV34TCL_TX_MATRIX_ENABLE__SIZE 0x00000004 +#define NV34TCL_UNK0250(x) (0x00000250+((x)*4)) +#define NV34TCL_UNK0250__SIZE 0x00000004 +#define NV34TCL_VIEWPORT_TX_ORIGIN 0x000002b8 +#define NV34TCL_VIEWPORT_TX_ORIGIN_X_SHIFT 0 +#define NV34TCL_VIEWPORT_TX_ORIGIN_X_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_SHIFT 16 +#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_MASK 0xffff0000 +#define NV34TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8)) +#define NV34TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_SHIFT 0 +#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_SHIFT 16 +#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_MASK 0xffff0000 +#define NV34TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8)) +#define NV34TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV34TCL_VIEWPORT_CLIP_VERT_T_SHIFT 0 +#define NV34TCL_VIEWPORT_CLIP_VERT_T_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_CLIP_VERT_D_SHIFT 16 +#define NV34TCL_VIEWPORT_CLIP_VERT_D_MASK 0xffff0000 +#define NV34TCL_DITHER_ENABLE 0x00000300 +#define NV34TCL_ALPHA_FUNC_ENABLE 0x00000304 +#define NV34TCL_ALPHA_FUNC_FUNC 0x00000308 +#define NV34TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 +#define NV34TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 +#define NV34TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 +#define NV34TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 +#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV34TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 +#define NV34TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 +#define NV34TCL_ALPHA_FUNC_REF 0x0000030c +#define NV34TCL_BLEND_FUNC_ENABLE 0x00000310 +#define NV34TCL_BLEND_FUNC_SRC 0x00000314 +#define NV34TCL_BLEND_FUNC_SRC_RGB_SHIFT 0 +#define NV34TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff +#define NV34TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 +#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 +#define NV34TCL_BLEND_FUNC_DST 0x00000318 +#define NV34TCL_BLEND_FUNC_DST_RGB_SHIFT 0 +#define NV34TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff +#define NV34TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 +#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV34TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV34TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 +#define NV34TCL_BLEND_FUNC_COLOR 0x0000031c +#define NV34TCL_BLEND_FUNC_EQUATION 0x00000320 +#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_ADD 0x00008006 +#define NV34TCL_BLEND_FUNC_EQUATION_MIN 0x00008007 +#define NV34TCL_BLEND_FUNC_EQUATION_MAX 0x00008008 +#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV34TCL_COLOR_MASK 0x00000324 +#define NV34TCL_COLOR_MASK_B_SHIFT 0 +#define NV34TCL_COLOR_MASK_B_MASK 0x000000ff +#define NV34TCL_COLOR_MASK_G_SHIFT 8 +#define NV34TCL_COLOR_MASK_G_MASK 0x0000ff00 +#define NV34TCL_COLOR_MASK_R_SHIFT 16 +#define NV34TCL_COLOR_MASK_R_MASK 0x00ff0000 +#define NV34TCL_COLOR_MASK_A_SHIFT 24 +#define NV34TCL_COLOR_MASK_A_MASK 0xff000000 +#define NV34TCL_STENCIL_BACK_ENABLE 0x00000328 +#define NV34TCL_STENCIL_BACK_MASK 0x0000032c +#define NV34TCL_STENCIL_BACK_FUNC_FUNC 0x00000330 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 +#define NV34TCL_STENCIL_BACK_FUNC_REF 0x00000334 +#define NV34TCL_STENCIL_BACK_FUNC_MASK 0x00000338 +#define NV34TCL_STENCIL_BACK_OP_FAIL 0x0000033c +#define NV34TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV34TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV34TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV34TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL 0x00000340 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_BACK_OP_ZPASS 0x00000344 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV34TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_FRONT_ENABLE 0x00000348 +#define NV34TCL_STENCIL_FRONT_MASK 0x0000034c +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC 0x00000350 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 +#define NV34TCL_STENCIL_FRONT_FUNC_REF 0x00000354 +#define NV34TCL_STENCIL_FRONT_FUNC_MASK 0x00000358 +#define NV34TCL_STENCIL_FRONT_OP_FAIL 0x0000035c +#define NV34TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV34TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL 0x00000360 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS 0x00000364 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV34TCL_SHADE_MODEL 0x00000368 +#define NV34TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV34TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV34TCL_FOG_ENABLE 0x0000036c +#define NV34TCL_FOG_COLOR 0x00000370 +#define NV34TCL_COLOR_LOGIC_OP_ENABLE 0x00000374 +#define NV34TCL_COLOR_LOGIC_OP_OP 0x00000378 +#define NV34TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 +#define NV34TCL_COLOR_LOGIC_OP_OP_AND 0x00001501 +#define NV34TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 +#define NV34TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503 +#define NV34TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 +#define NV34TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505 +#define NV34TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506 +#define NV34TCL_COLOR_LOGIC_OP_OP_OR 0x00001507 +#define NV34TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508 +#define NV34TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 +#define NV34TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a +#define NV34TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b +#define NV34TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c +#define NV34TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d +#define NV34TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e +#define NV34TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f +#define NV34TCL_NORMALIZE_ENABLE 0x0000037c +#define NV34TCL_COLOR_MATERIAL 0x00000390 +#define NV34TCL_COLOR_MATERIAL_FRONT_EMISSION_ENABLE (1 << 0) +#define NV34TCL_COLOR_MATERIAL_FRONT_AMBIENT_ENABLE (1 << 2) +#define NV34TCL_COLOR_MATERIAL_FRONT_DIFFUSE_ENABLE (1 << 4) +#define NV34TCL_COLOR_MATERIAL_FRONT_SPECULAR_ENABLE (1 << 6) +#define NV34TCL_COLOR_MATERIAL_BACK_EMISSION_ENABLE (1 << 8) +#define NV34TCL_COLOR_MATERIAL_BACK_AMBIENT_ENABLE (1 << 10) +#define NV34TCL_COLOR_MATERIAL_BACK_DIFFUSE_ENABLE (1 << 12) +#define NV34TCL_COLOR_MATERIAL_BACK_SPECULAR_ENABLE (1 << 14) +#define NV34TCL_DEPTH_RANGE_NEAR 0x00000394 +#define NV34TCL_DEPTH_RANGE_FAR 0x00000398 +#define NV34TCL_COLOR_MATERIAL_FRONT_R 0x000003a0 +#define NV34TCL_COLOR_MATERIAL_FRONT_G 0x000003a4 +#define NV34TCL_COLOR_MATERIAL_FRONT_B 0x000003a8 +#define NV34TCL_COLOR_MATERIAL_FRONT_A 0x000003b4 +#define NV34TCL_LINE_WIDTH 0x000003b8 +#define NV34TCL_LINE_SMOOTH_ENABLE 0x000003bc +#define NV34TCL_CLIP_PLANE_ENABLE(x) (0x00000400+((x)*4)) +#define NV34TCL_CLIP_PLANE_ENABLE__SIZE 0x00000020 +#define NV34TCL_CLIP_PLANE_ENABLE_FALSE 0x00000000 +#define NV34TCL_CLIP_PLANE_ENABLE_EYE_LINEAR 0x00002400 +#define NV34TCL_CLIP_PLANE_ENABLE_OBJECT_LINEAR 0x00002401 +#define NV34TCL_MODELVIEW_MATRIX(x) (0x00000480+((x)*4)) +#define NV34TCL_MODELVIEW_MATRIX__SIZE 0x00000010 +#define NV34TCL_INVERSE_MODELVIEW_MATRIX(x) (0x00000580+((x)*4)) +#define NV34TCL_INVERSE_MODELVIEW_MATRIX__SIZE 0x0000000c +#define NV34TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4)) +#define NV34TCL_PROJECTION_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX0_MATRIX(x) (0x000006c0+((x)*4)) +#define NV34TCL_TX0_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX1_MATRIX(x) (0x00000700+((x)*4)) +#define NV34TCL_TX1_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX2_MATRIX(x) (0x00000740+((x)*4)) +#define NV34TCL_TX2_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX3_MATRIX(x) (0x00000780+((x)*4)) +#define NV34TCL_TX3_MATRIX__SIZE 0x00000010 +#define NV34TCL_SCISSOR_HORIZ 0x000008c0 +#define NV34TCL_SCISSOR_HORIZ_X_SHIFT 0 +#define NV34TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff +#define NV34TCL_SCISSOR_HORIZ_W_SHIFT 16 +#define NV34TCL_SCISSOR_HORIZ_W_MASK 0xffff0000 +#define NV34TCL_SCISSOR_VERT 0x000008c4 +#define NV34TCL_SCISSOR_VERT_Y_SHIFT 0 +#define NV34TCL_SCISSOR_VERT_Y_MASK 0x0000ffff +#define NV34TCL_SCISSOR_VERT_H_SHIFT 16 +#define NV34TCL_SCISSOR_VERT_H_MASK 0xffff0000 +#define NV34TCL_FOG_COORD_DIST 0x000008c8 +#define NV34TCL_FOG_MODE 0x000008cc +#define NV34TCL_FOG_MODE_EXP 0x00000800 +#define NV34TCL_FOG_MODE_EXP_2 0x00000802 +#define NV34TCL_FOG_MODE_EXP2 0x00000803 +#define NV34TCL_FOG_MODE_LINEAR 0x00000804 +#define NV34TCL_FOG_MODE_LINEAR_2 0x00002601 +#define NV34TCL_FOG_EQUATION_CONSTANT 0x000008d0 +#define NV34TCL_FOG_EQUATION_LINEAR 0x000008d4 +#define NV34TCL_FOG_EQUATION_QUADRATIC 0x000008d8 +#define NV34TCL_FP_ACTIVE_PROGRAM 0x000008e4 +#define NV34TCL_FP_ACTIVE_PROGRAM_DMA0 (1 << 0) +#define NV34TCL_FP_ACTIVE_PROGRAM_DMA1 (1 << 1) +#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_SHIFT 2 +#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_MASK 0xfffffffc +#define NV34TCL_RC_COLOR0 0x000008ec +#define NV34TCL_RC_COLOR0_B_SHIFT 0 +#define NV34TCL_RC_COLOR0_B_MASK 0x000000ff +#define NV34TCL_RC_COLOR0_G_SHIFT 8 +#define NV34TCL_RC_COLOR0_G_MASK 0x0000ff00 +#define NV34TCL_RC_COLOR0_R_SHIFT 16 +#define NV34TCL_RC_COLOR0_R_MASK 0x00ff0000 +#define NV34TCL_RC_COLOR0_A_SHIFT 24 +#define NV34TCL_RC_COLOR0_A_MASK 0xff000000 +#define NV34TCL_RC_COLOR1 0x000008f0 +#define NV34TCL_RC_COLOR1_B_SHIFT 0 +#define NV34TCL_RC_COLOR1_B_MASK 0x000000ff +#define NV34TCL_RC_COLOR1_G_SHIFT 8 +#define NV34TCL_RC_COLOR1_G_MASK 0x0000ff00 +#define NV34TCL_RC_COLOR1_R_SHIFT 16 +#define NV34TCL_RC_COLOR1_R_MASK 0x00ff0000 +#define NV34TCL_RC_COLOR1_A_SHIFT 24 +#define NV34TCL_RC_COLOR1_A_MASK 0xff000000 +#define NV34TCL_RC_FINAL0 0x000008f4 +#define NV34TCL_RC_FINAL1 0x000008f8 +#define NV34TCL_RC_ENABLE 0x000008fc +#define NV34TCL_RC_IN_ALPHA(x) (0x00000900+((x)*32)) +#define NV34TCL_RC_IN_ALPHA__SIZE 0x00000008 +#define NV34TCL_RC_IN_RGB(x) (0x00000904+((x)*32)) +#define NV34TCL_RC_IN_RGB__SIZE 0x00000008 +#define NV34TCL_RC_CONSTANT_COLOR0(x) (0x00000908+((x)*32)) +#define NV34TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008 +#define NV34TCL_RC_CONSTANT_COLOR0_B_SHIFT 0 +#define NV34TCL_RC_CONSTANT_COLOR0_B_MASK 0x000000ff +#define NV34TCL_RC_CONSTANT_COLOR0_G_SHIFT 8 +#define NV34TCL_RC_CONSTANT_COLOR0_G_MASK 0x0000ff00 +#define NV34TCL_RC_CONSTANT_COLOR0_R_SHIFT 16 +#define NV34TCL_RC_CONSTANT_COLOR0_R_MASK 0x00ff0000 +#define NV34TCL_RC_CONSTANT_COLOR0_A_SHIFT 24 +#define NV34TCL_RC_CONSTANT_COLOR0_A_MASK 0xff000000 +#define NV34TCL_RC_CONSTANT_COLOR1(x) (0x0000090c+((x)*32)) +#define NV34TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008 +#define NV34TCL_RC_CONSTANT_COLOR1_B_SHIFT 0 +#define NV34TCL_RC_CONSTANT_COLOR1_B_MASK 0x000000ff +#define NV34TCL_RC_CONSTANT_COLOR1_G_SHIFT 8 +#define NV34TCL_RC_CONSTANT_COLOR1_G_MASK 0x0000ff00 +#define NV34TCL_RC_CONSTANT_COLOR1_R_SHIFT 16 +#define NV34TCL_RC_CONSTANT_COLOR1_R_MASK 0x00ff0000 +#define NV34TCL_RC_CONSTANT_COLOR1_A_SHIFT 24 +#define NV34TCL_RC_CONSTANT_COLOR1_A_MASK 0xff000000 +#define NV34TCL_RC_OUT_ALPHA(x) (0x00000910+((x)*32)) +#define NV34TCL_RC_OUT_ALPHA__SIZE 0x00000008 +#define NV34TCL_RC_OUT_RGB(x) (0x00000914+((x)*32)) +#define NV34TCL_RC_OUT_RGB__SIZE 0x00000008 +#define NV34TCL_VIEWPORT_HORIZ 0x00000a00 +#define NV34TCL_VIEWPORT_HORIZ_X_SHIFT 0 +#define NV34TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_HORIZ_W_SHIFT 16 +#define NV34TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 +#define NV34TCL_VIEWPORT_VERT 0x00000a04 +#define NV34TCL_VIEWPORT_VERT_Y_SHIFT 0 +#define NV34TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_VERT_H_SHIFT 16 +#define NV34TCL_VIEWPORT_VERT_H_MASK 0xffff0000 +#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10 +#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14 +#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18 +#define NV34TCL_VIEWPORT_TRANSLATE_X 0x00000a20 +#define NV34TCL_VIEWPORT_TRANSLATE_Y 0x00000a24 +#define NV34TCL_VIEWPORT_TRANSLATE_Z 0x00000a28 +#define NV34TCL_VIEWPORT_TRANSLATE_W 0x00000a2c +#define NV34TCL_VIEWPORT_SCALE_X 0x00000a30 +#define NV34TCL_VIEWPORT_SCALE_Y 0x00000a34 +#define NV34TCL_VIEWPORT_SCALE_Z 0x00000a38 +#define NV34TCL_VIEWPORT_SCALE_W 0x00000a3c +#define NV34TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a60 +#define NV34TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 +#define NV34TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a68 +#define NV34TCL_DEPTH_FUNC 0x00000a6c +#define NV34TCL_DEPTH_FUNC_NEVER 0x00000200 +#define NV34TCL_DEPTH_FUNC_LESS 0x00000201 +#define NV34TCL_DEPTH_FUNC_EQUAL 0x00000202 +#define NV34TCL_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV34TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV34TCL_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV34TCL_DEPTH_FUNC_ALWAYS 0x00000207 +#define NV34TCL_DEPTH_WRITE_ENABLE 0x00000a70 +#define NV34TCL_DEPTH_TEST_ENABLE 0x00000a74 +#define NV34TCL_POLYGON_OFFSET_FACTOR 0x00000a78 +#define NV34TCL_POLYGON_OFFSET_UNITS 0x00000a7c +#define NV34TCL_VERTEX_NOR_3I_XY 0x00000a90 +#define NV34TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 +#define NV34TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff +#define NV34TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 +#define NV34TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 +#define NV34TCL_VERTEX_NOR_3I_Z 0x00000a94 +#define NV34TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 +#define NV34TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff +#define NV34TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) +#define NV34TCL_VP_UPLOAD_INST__SIZE 0x00000004 +#define NV34TCL_CLIP_PLANE_A(x) (0x00000e00+((x)*16)) +#define NV34TCL_CLIP_PLANE_A__SIZE 0x00000020 +#define NV34TCL_CLIP_PLANE_B(x) (0x00000e04+((x)*16)) +#define NV34TCL_CLIP_PLANE_B__SIZE 0x00000020 +#define NV34TCL_CLIP_PLANE_C(x) (0x00000e08+((x)*16)) +#define NV34TCL_CLIP_PLANE_C__SIZE 0x00000020 +#define NV34TCL_CLIP_PLANE_D(x) (0x00000e0c+((x)*16)) +#define NV34TCL_CLIP_PLANE_D__SIZE 0x00000020 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00001000+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00001004+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00001008+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000100c+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00001010+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00001014+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00001018+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000101c+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00001020+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008 +#define NV34TCL_LIGHT_HALF_VECTOR_X(x) (0x00001028+((x)*64)) +#define NV34TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008 +#define NV34TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000102c+((x)*64)) +#define NV34TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008 +#define NV34TCL_LIGHT_HALF_VECTOR_Z(x) (0x00001030+((x)*64)) +#define NV34TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008 +#define NV34TCL_LIGHT_DIRECTION_X(x) (0x00001034+((x)*64)) +#define NV34TCL_LIGHT_DIRECTION_X__SIZE 0x00000008 +#define NV34TCL_LIGHT_DIRECTION_Y(x) (0x00001038+((x)*64)) +#define NV34TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008 +#define NV34TCL_LIGHT_DIRECTION_Z(x) (0x0000103c+((x)*64)) +#define NV34TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00001200+((x)*64)) +#define NV34TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00001204+((x)*64)) +#define NV34TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00001208+((x)*64)) +#define NV34TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_DIR_X(x) (0x0000120c+((x)*64)) +#define NV34TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_DIR_Y(x) (0x00001210+((x)*64)) +#define NV34TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_DIR_Z(x) (0x00001214+((x)*64)) +#define NV34TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00001218+((x)*64)) +#define NV34TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008 +#define NV34TCL_LIGHT_POSITION_X(x) (0x0000121c+((x)*64)) +#define NV34TCL_LIGHT_POSITION_X__SIZE 0x00000008 +#define NV34TCL_LIGHT_POSITION_Y(x) (0x00001220+((x)*64)) +#define NV34TCL_LIGHT_POSITION_Y__SIZE 0x00000008 +#define NV34TCL_LIGHT_POSITION_Z(x) (0x00001224+((x)*64)) +#define NV34TCL_LIGHT_POSITION_Z__SIZE 0x00000008 +#define NV34TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00001228+((x)*64)) +#define NV34TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008 +#define NV34TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000122c+((x)*64)) +#define NV34TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008 +#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00001230+((x)*64)) +#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008 +#define NV34TCL_FRONT_MATERIAL_SHININESS(x) (0x00001400+((x)*4)) +#define NV34TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV34TCL_FP_REG_CONTROL 0x00001450 +#define NV34TCL_FP_REG_CONTROL_UNK1_SHIFT 16 +#define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000 +#define NV34TCL_FP_REG_CONTROL_UNK0_SHIFT 0 +#define NV34TCL_FP_REG_CONTROL_UNK0_MASK 0x0000ffff +#define NV34TCL_VP_CLIP_PLANES_ENABLE 0x00001478 +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 (1 << 1) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 (1 << 5) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 (1 << 9) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 (1 << 13) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 (1 << 17) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5 (1 << 21) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE6 (1 << 25) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE7 (1 << 29) +#define NV34TCL_POLYGON_STIPPLE_ENABLE 0x0000147c +#define NV34TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) +#define NV34TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 +#define NV34TCL_VERTEX_ATTR_3F_X(x) (0x00001500+((x)*16)) +#define NV34TCL_VERTEX_ATTR_3F_X__SIZE 0x00000010 +#define NV34TCL_VERTEX_ATTR_3F_Y(x) (0x00001504+((x)*16)) +#define NV34TCL_VERTEX_ATTR_3F_Y__SIZE 0x00000010 +#define NV34TCL_VERTEX_ATTR_3F_Z(x) (0x00001508+((x)*16)) +#define NV34TCL_VERTEX_ATTR_3F_Z__SIZE 0x00000010 +#define NV34TCL_VP_CLIP_PLANE_A(x) (0x00001600+((x)*16)) +#define NV34TCL_VP_CLIP_PLANE_A__SIZE 0x00000006 +#define NV34TCL_VP_CLIP_PLANE_B(x) (0x00001604+((x)*16)) +#define NV34TCL_VP_CLIP_PLANE_B__SIZE 0x00000006 +#define NV34TCL_VP_CLIP_PLANE_C(x) (0x00001608+((x)*16)) +#define NV34TCL_VP_CLIP_PLANE_C__SIZE 0x00000006 +#define NV34TCL_VP_CLIP_PLANE_D(x) (0x0000160c+((x)*16)) +#define NV34TCL_VP_CLIP_PLANE_D__SIZE 0x00000006 +#define NV34TCL_VERTEX_BUFFER_ADDRESS(x) (0x00001680+((x)*4)) +#define NV34TCL_VERTEX_BUFFER_ADDRESS__SIZE 0x00000010 +#define NV34TCL_VERTEX_BUFFER_ADDRESS_DMA1 (1 << 31) +#define NV34TCL_VERTEX_BUFFER_ADDRESS_OFFSET_SHIFT 0 +#define NV34TCL_VERTEX_BUFFER_ADDRESS_OFFSET_MASK 0x0fffffff +#define NV34TCL_VERTEX_ARRAY_FORMAT(x) (0x00001740+((x)*4)) +#define NV34TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 +#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_SHIFT 0 +#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_MASK 0x0000000f +#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT 0x00000002 +#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE 0x00000004 +#define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT 4 +#define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_MASK 0x000000f0 +#define NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 8 +#define NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_MASK 0x0000ff00 +#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0 +#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4 +#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8 +#define NV34TCL_COLOR_MATERIAL_BACK_R 0x000017b0 +#define NV34TCL_COLOR_MATERIAL_BACK_G 0x000017b4 +#define NV34TCL_COLOR_MATERIAL_BACK_B 0x000017b8 +#define NV34TCL_COLOR_MATERIAL_BACK_A 0x000017c0 +#define NV34TCL_QUERY_RESET 0x000017c8 +#define NV34TCL_QUERY_UNK17CC 0x000017cc +#define NV34TCL_QUERY_GET 0x00001800 +#define NV34TCL_QUERY_GET_UNK24_SHIFT 24 +#define NV34TCL_QUERY_GET_UNK24_MASK 0xff000000 +#define NV34TCL_QUERY_GET_OFFSET_SHIFT 0 +#define NV34TCL_QUERY_GET_OFFSET_MASK 0x00ffffff +#define NV34TCL_VERTEX_BEGIN_END 0x00001808 +#define NV34TCL_VERTEX_BEGIN_END_STOP 0x00000000 +#define NV34TCL_VERTEX_BEGIN_END_POINTS 0x00000001 +#define NV34TCL_VERTEX_BEGIN_END_LINES 0x00000002 +#define NV34TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 +#define NV34TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 +#define NV34TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005 +#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV34TCL_VERTEX_BEGIN_END_QUADS 0x00000008 +#define NV34TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV34TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a +#define NV34TCL_VB_ELEMENT_U16 0x0000180c +#define NV34TCL_VB_ELEMENT_U16_I0_SHIFT 0 +#define NV34TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff +#define NV34TCL_VB_ELEMENT_U16_I1_SHIFT 16 +#define NV34TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000 +#define NV34TCL_VB_ELEMENT_U32 0x00001810 +#define NV34TCL_VB_VERTEX_BATCH 0x00001814 +#define NV34TCL_VB_VERTEX_BATCH_OFFSET_SHIFT 0 +#define NV34TCL_VB_VERTEX_BATCH_OFFSET_MASK 0x00ffffff +#define NV34TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24 +#define NV34TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000 +#define NV34TCL_VERTEX_DATA 0x00001818 +#define NV34TCL_POLYGON_MODE_FRONT 0x00001828 +#define NV34TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV34TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV34TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV34TCL_POLYGON_MODE_BACK 0x0000182c +#define NV34TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV34TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV34TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV34TCL_CULL_FACE 0x00001830 +#define NV34TCL_CULL_FACE_FRONT 0x00000404 +#define NV34TCL_CULL_FACE_BACK 0x00000405 +#define NV34TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV34TCL_FRONT_FACE 0x00001834 +#define NV34TCL_FRONT_FACE_CW 0x00000900 +#define NV34TCL_FRONT_FACE_CCW 0x00000901 +#define NV34TCL_POLYGON_SMOOTH_ENABLE 0x00001838 +#define NV34TCL_CULL_FACE_ENABLE 0x0000183c +#define NV34TCL_VERTEX_ATTR_2F_X(x) (0x00001880+((x)*8)) +#define NV34TCL_VERTEX_ATTR_2F_X__SIZE 0x00000010 +#define NV34TCL_VERTEX_ATTR_2F_Y(x) (0x00001884+((x)*8)) +#define NV34TCL_VERTEX_ATTR_2F_Y__SIZE 0x00000010 +#define NV34TCL_VERTEX_ATTR_2I(x) (0x00001900+((x)*4)) +#define NV34TCL_VERTEX_ATTR_2I__SIZE 0x00000010 +#define NV34TCL_VERTEX_ATTR_2I_Y_SHIFT 16 +#define NV34TCL_VERTEX_ATTR_2I_Y_MASK 0xffff0000 +#define NV34TCL_VERTEX_ATTR_2I_X_SHIFT 0 +#define NV34TCL_VERTEX_ATTR_2I_X_MASK 0x0000ffff +#define NV34TCL_VERTEX_COL_4I(x) (0x0000194c+((x)*4)) +#define NV34TCL_VERTEX_COL_4I__SIZE 0x00000002 +#define NV34TCL_VERTEX_COL_4I_R_SHIFT 0 +#define NV34TCL_VERTEX_COL_4I_R_MASK 0x000000ff +#define NV34TCL_VERTEX_COL_4I_G_SHIFT 8 +#define NV34TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 +#define NV34TCL_VERTEX_COL_4I_B_SHIFT 16 +#define NV34TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 +#define NV34TCL_VERTEX_COL_4I_A_SHIFT 24 +#define NV34TCL_VERTEX_COL_4I_A_MASK 0xff000000 +#define NV34TCL_VERTEX_POS_4I_XY 0x00001980 +#define NV34TCL_VERTEX_POS_4I_XY_X_SHIFT 0 +#define NV34TCL_VERTEX_POS_4I_XY_X_MASK 0x0000ffff +#define NV34TCL_VERTEX_POS_4I_XY_Y_SHIFT 16 +#define NV34TCL_VERTEX_POS_4I_XY_Y_MASK 0xffff0000 +#define NV34TCL_VERTEX_POS_4I_ZW 0x00001984 +#define NV34TCL_VERTEX_POS_4I_ZW_Z_SHIFT 0 +#define NV34TCL_VERTEX_POS_4I_ZW_Z_MASK 0x0000ffff +#define NV34TCL_VERTEX_POS_4I_ZW_W_SHIFT 16 +#define NV34TCL_VERTEX_POS_4I_ZW_W_MASK 0xffff0000 +#define NV34TCL_VERTEX_TX_4I_ST(x) (0x000019c0+((x)*8)) +#define NV34TCL_VERTEX_TX_4I_ST__SIZE 0x00000004 +#define NV34TCL_VERTEX_TX_4I_ST_S_SHIFT 0 +#define NV34TCL_VERTEX_TX_4I_ST_S_MASK 0x0000ffff +#define NV34TCL_VERTEX_TX_4I_ST_T_SHIFT 16 +#define NV34TCL_VERTEX_TX_4I_ST_T_MASK 0xffff0000 +#define NV34TCL_VERTEX_TX_4I_RQ(x) (0x000019c4+((x)*8)) +#define NV34TCL_VERTEX_TX_4I_RQ__SIZE 0x00000004 +#define NV34TCL_VERTEX_TX_4I_RQ_R_SHIFT 0 +#define NV34TCL_VERTEX_TX_4I_RQ_R_MASK 0x0000ffff +#define NV34TCL_VERTEX_TX_4I_RQ_Q_SHIFT 16 +#define NV34TCL_VERTEX_TX_4I_RQ_Q_MASK 0xffff0000 +#define NV34TCL_TX_OFFSET(x) (0x00001a00+((x)*32)) +#define NV34TCL_TX_OFFSET__SIZE 0x00000004 +#define NV34TCL_TX_FORMAT(x) (0x00001a04+((x)*32)) +#define NV34TCL_TX_FORMAT__SIZE 0x00000004 +#define NV34TCL_TX_FORMAT_DMA0 (1 << 0) +#define NV34TCL_TX_FORMAT_DMA1 (1 << 1) +#define NV34TCL_TX_FORMAT_CUBE_MAP (1 << 2) +#define NV34TCL_TX_FORMAT_DIMS_SHIFT 4 +#define NV34TCL_TX_FORMAT_DIMS_MASK 0x000000f0 +#define NV34TCL_TX_FORMAT_DIMS_1D 0x00000010 +#define NV34TCL_TX_FORMAT_DIMS_2D 0x00000020 +#define NV34TCL_TX_FORMAT_DIMS_3D 0x00000030 +#define NV34TCL_TX_FORMAT_FORMAT_SHIFT 8 +#define NV34TCL_TX_FORMAT_FORMAT_MASK 0x0000ff00 +#define NV34TCL_TX_FORMAT_FORMAT_L8 0x00000000 +#define NV34TCL_TX_FORMAT_FORMAT_A8 0x00000100 +#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300 +#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400 +#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600 +#define NV34TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700 +#define NV34TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00 +#define NV34TCL_TX_FORMAT_FORMAT_DXT1 0x00000c00 +#define NV34TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00 +#define NV34TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00 +#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000 +#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200 +#define NV34TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300 +#define NV34TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00 +#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00 +#define NV34TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00 +#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00 +#define NV34TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000 +#define NV34TCL_TX_FORMAT_FORMAT_A16 0x00003200 +#define NV34TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500 +#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00 +#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00 +#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00 +#define NV34TCL_TX_FORMAT_NPOT (1 << 12) +#define NV34TCL_TX_FORMAT_RECT (1 << 14) +#define NV34TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT 16 +#define NV34TCL_TX_FORMAT_MIPMAP_LEVELS_MASK 0x000f0000 +#define NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20 +#define NV34TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000 +#define NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24 +#define NV34TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x0f000000 +#define NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT 28 +#define NV34TCL_TX_FORMAT_BASE_SIZE_W_MASK 0xf0000000 +#define NV34TCL_TX_WRAP(x) (0x00001a08+((x)*32)) +#define NV34TCL_TX_WRAP__SIZE 0x00000004 +#define NV34TCL_TX_WRAP_S_SHIFT 0 +#define NV34TCL_TX_WRAP_S_MASK 0x000000ff +#define NV34TCL_TX_WRAP_S_REPEAT 0x00000001 +#define NV34TCL_TX_WRAP_S_MIRRORED_REPEAT 0x00000002 +#define NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE 0x00000003 +#define NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004 +#define NV34TCL_TX_WRAP_S_CLAMP 0x00000005 +#define NV34TCL_TX_WRAP_T_SHIFT 8 +#define NV34TCL_TX_WRAP_T_MASK 0x0000ff00 +#define NV34TCL_TX_WRAP_T_REPEAT 0x00000100 +#define NV34TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200 +#define NV34TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300 +#define NV34TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400 +#define NV34TCL_TX_WRAP_T_CLAMP 0x00000500 +#define NV34TCL_TX_WRAP_R_SHIFT 16 +#define NV34TCL_TX_WRAP_R_MASK 0x00ff0000 +#define NV34TCL_TX_WRAP_R_REPEAT 0x00010000 +#define NV34TCL_TX_WRAP_R_MIRRORED_REPEAT 0x00020000 +#define NV34TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000 +#define NV34TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000 +#define NV34TCL_TX_WRAP_R_CLAMP 0x00050000 +#define NV34TCL_TX_ENABLE(x) (0x00001a0c+((x)*32)) +#define NV34TCL_TX_ENABLE__SIZE 0x00000004 +#define NV34TCL_TX_ENABLE_ENABLE (1 << 30) +#define NV34TCL_TX_SWIZZLE(x) (0x00001a10+((x)*32)) +#define NV34TCL_TX_SWIZZLE__SIZE 0x00000004 +#define NV34TCL_TX_SWIZZLE_S0_X_SHIFT 14 +#define NV34TCL_TX_SWIZZLE_S0_X_MASK 0x0000c000 +#define NV34TCL_TX_SWIZZLE_S0_X_ZERO 0x00000000 +#define NV34TCL_TX_SWIZZLE_S0_X_ONE 0x00004000 +#define NV34TCL_TX_SWIZZLE_S0_X_S1 0x00008000 +#define NV34TCL_TX_SWIZZLE_S0_Y_SHIFT 12 +#define NV34TCL_TX_SWIZZLE_S0_Y_MASK 0x00003000 +#define NV34TCL_TX_SWIZZLE_S0_Y_ZERO 0x00000000 +#define NV34TCL_TX_SWIZZLE_S0_Y_ONE 0x00001000 +#define NV34TCL_TX_SWIZZLE_S0_Y_S1 0x00002000 +#define NV34TCL_TX_SWIZZLE_S0_Z_SHIFT 10 +#define NV34TCL_TX_SWIZZLE_S0_Z_MASK 0x00000c00 +#define NV34TCL_TX_SWIZZLE_S0_Z_ZERO 0x00000000 +#define NV34TCL_TX_SWIZZLE_S0_Z_ONE 0x00000400 +#define NV34TCL_TX_SWIZZLE_S0_Z_S1 0x00000800 +#define NV34TCL_TX_SWIZZLE_S0_W_SHIFT 8 +#define NV34TCL_TX_SWIZZLE_S0_W_MASK 0x00000300 +#define NV34TCL_TX_SWIZZLE_S0_W_ZERO 0x00000000 +#define NV34TCL_TX_SWIZZLE_S0_W_ONE 0x00000100 +#define NV34TCL_TX_SWIZZLE_S0_W_S1 0x00000200 +#define NV34TCL_TX_SWIZZLE_S1_X_SHIFT 6 +#define NV34TCL_TX_SWIZZLE_S1_X_MASK 0x000000c0 +#define NV34TCL_TX_SWIZZLE_S1_X_W 0x00000000 +#define NV34TCL_TX_SWIZZLE_S1_X_Z 0x00000040 +#define NV34TCL_TX_SWIZZLE_S1_X_Y 0x00000080 +#define NV34TCL_TX_SWIZZLE_S1_X_X 0x000000c0 +#define NV34TCL_TX_SWIZZLE_S1_Y_SHIFT 4 +#define NV34TCL_TX_SWIZZLE_S1_Y_MASK 0x00000030 +#define NV34TCL_TX_SWIZZLE_S1_Y_W 0x00000000 +#define NV34TCL_TX_SWIZZLE_S1_Y_Z 0x00000010 +#define NV34TCL_TX_SWIZZLE_S1_Y_Y 0x00000020 +#define NV34TCL_TX_SWIZZLE_S1_Y_X 0x00000030 +#define NV34TCL_TX_SWIZZLE_S1_Z_SHIFT 2 +#define NV34TCL_TX_SWIZZLE_S1_Z_MASK 0x0000000c +#define NV34TCL_TX_SWIZZLE_S1_Z_W 0x00000000 +#define NV34TCL_TX_SWIZZLE_S1_Z_Z 0x00000004 +#define NV34TCL_TX_SWIZZLE_S1_Z_Y 0x00000008 +#define NV34TCL_TX_SWIZZLE_S1_Z_X 0x0000000c +#define NV34TCL_TX_SWIZZLE_S1_W_SHIFT 0 +#define NV34TCL_TX_SWIZZLE_S1_W_MASK 0x00000003 +#define NV34TCL_TX_SWIZZLE_S1_W_W 0x00000000 +#define NV34TCL_TX_SWIZZLE_S1_W_Z 0x00000001 +#define NV34TCL_TX_SWIZZLE_S1_W_Y 0x00000002 +#define NV34TCL_TX_SWIZZLE_S1_W_X 0x00000003 +#define NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT 16 +#define NV34TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000 +#define NV34TCL_TX_FILTER(x) (0x00001a14+((x)*32)) +#define NV34TCL_TX_FILTER__SIZE 0x00000004 +#define NV34TCL_TX_FILTER_MINIFY_SHIFT 16 +#define NV34TCL_TX_FILTER_MINIFY_MASK 0x000f0000 +#define NV34TCL_TX_FILTER_MINIFY_NEAREST 0x00010000 +#define NV34TCL_TX_FILTER_MINIFY_LINEAR 0x00020000 +#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x00030000 +#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x00040000 +#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x00050000 +#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x00060000 +#define NV34TCL_TX_FILTER_MAGNIFY_SHIFT 24 +#define NV34TCL_TX_FILTER_MAGNIFY_MASK 0x0f000000 +#define NV34TCL_TX_FILTER_MAGNIFY_NEAREST 0x01000000 +#define NV34TCL_TX_FILTER_MAGNIFY_LINEAR 0x02000000 +#define NV34TCL_TX_FILTER_SIGNED_BLUE (1 << 28) +#define NV34TCL_TX_FILTER_SIGNED_GREEN (1 << 29) +#define NV34TCL_TX_FILTER_SIGNED_RED (1 << 30) +#define NV34TCL_TX_FILTER_SIGNED_ALPHA (1 << 31) +#define NV34TCL_TX_NPOT_SIZE(x) (0x00001a18+((x)*32)) +#define NV34TCL_TX_NPOT_SIZE__SIZE 0x00000004 +#define NV34TCL_TX_NPOT_SIZE_H_SHIFT 0 +#define NV34TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff +#define NV34TCL_TX_NPOT_SIZE_W_SHIFT 16 +#define NV34TCL_TX_NPOT_SIZE_W_MASK 0xffff0000 +#define NV34TCL_TX_BORDER_COLOR(x) (0x00001a1c+((x)*32)) +#define NV34TCL_TX_BORDER_COLOR__SIZE 0x00000004 +#define NV34TCL_TX_BORDER_COLOR_B_SHIFT 0 +#define NV34TCL_TX_BORDER_COLOR_B_MASK 0x000000ff +#define NV34TCL_TX_BORDER_COLOR_G_SHIFT 8 +#define NV34TCL_TX_BORDER_COLOR_G_MASK 0x0000ff00 +#define NV34TCL_TX_BORDER_COLOR_R_SHIFT 16 +#define NV34TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000 +#define NV34TCL_TX_BORDER_COLOR_A_SHIFT 24 +#define NV34TCL_TX_BORDER_COLOR_A_MASK 0xff000000 +#define NV34TCL_VERTEX_ATTR_4F_X(x) (0x00001c00+((x)*16)) +#define NV34TCL_VERTEX_ATTR_4F_X__SIZE 0x00000010 +#define NV34TCL_VERTEX_ATTR_4F_Y(x) (0x00001c04+((x)*16)) +#define NV34TCL_VERTEX_ATTR_4F_Y__SIZE 0x00000010 +#define NV34TCL_VERTEX_ATTR_4F_Z(x) (0x00001c08+((x)*16)) +#define NV34TCL_VERTEX_ATTR_4F_Z__SIZE 0x00000010 +#define NV34TCL_VERTEX_ATTR_4F_W(x) (0x00001c0c+((x)*16)) +#define NV34TCL_VERTEX_ATTR_4F_W__SIZE 0x00000010 +#define NV34TCL_FP_CONTROL 0x00001d60 +#define NV34TCL_FP_CONTROL_USES_KIL (1 << 7) +#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_SHIFT 0 +#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_MASK 0x0000000f +#define NV34TCL_MULTISAMPLE_CONTROL 0x00001d7c +#define NV34TCL_CLEAR_DEPTH_VALUE 0x00001d8c +#define NV34TCL_CLEAR_COLOR_VALUE 0x00001d90 +#define NV34TCL_CLEAR_COLOR_VALUE_B_SHIFT 0 +#define NV34TCL_CLEAR_COLOR_VALUE_B_MASK 0x000000ff +#define NV34TCL_CLEAR_COLOR_VALUE_G_SHIFT 8 +#define NV34TCL_CLEAR_COLOR_VALUE_G_MASK 0x0000ff00 +#define NV34TCL_CLEAR_COLOR_VALUE_R_SHIFT 16 +#define NV34TCL_CLEAR_COLOR_VALUE_R_MASK 0x00ff0000 +#define NV34TCL_CLEAR_COLOR_VALUE_A_SHIFT 24 +#define NV34TCL_CLEAR_COLOR_VALUE_A_MASK 0xff000000 +#define NV34TCL_CLEAR_BUFFERS 0x00001d94 +#define NV34TCL_CLEAR_BUFFERS_COLOR_A (1 << 7) +#define NV34TCL_CLEAR_BUFFERS_COLOR_B (1 << 6) +#define NV34TCL_CLEAR_BUFFERS_COLOR_G (1 << 5) +#define NV34TCL_CLEAR_BUFFERS_COLOR_R (1 << 4) +#define NV34TCL_CLEAR_BUFFERS_STENCIL (1 << 1) +#define NV34TCL_CLEAR_BUFFERS_DEPTH (1 << 0) +#define NV34TCL_DO_VERTICES 0x00001dac +#define NV34TCL_LINE_STIPPLE_ENABLE 0x00001db4 +#define NV34TCL_LINE_STIPPLE_PATTERN 0x00001db8 +#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0 +#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff +#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16 +#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000 +#define NV34TCL_BACK_MATERIAL_SHININESS(x) (0x00001e20+((x)*4)) +#define NV34TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV34TCL_VERTEX_FOG_1F 0x00001e54 +#define NV34TCL_VP_UPLOAD_FROM_ID 0x00001e9c +#define NV34TCL_VP_START_FROM_ID 0x00001ea0 +#define NV34TCL_POINT_PARAMETERS(x) (0x00001ec0+((x)*4)) +#define NV34TCL_POINT_PARAMETERS__SIZE 0x00000008 +#define NV34TCL_POINT_SIZE 0x00001ee0 +#define NV34TCL_POINT_PARAMETERS_ENABLE 0x00001ee4 +#define NV34TCL_POINT_SPRITE 0x00001ee8 +#define NV34TCL_POINT_SPRITE_ENABLE (1 << 0) +#define NV34TCL_POINT_SPRITE_R_MODE_SHIFT 1 +#define NV34TCL_POINT_SPRITE_R_MODE_MASK 0x00000006 +#define NV34TCL_POINT_SPRITE_R_MODE_GL_ZERO 0x00000000 +#define NV34TCL_POINT_SPRITE_R_MODE_GL_R 0x00000002 +#define NV34TCL_POINT_SPRITE_R_MODE_GL_S 0x00000004 +#define NV34TCL_POINT_SPRITE_COORD_REPLACE (1 << 11) +#define NV34TCL_VP_UPLOAD_CONST_ID 0x00001efc +#define NV34TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16)) +#define NV34TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004 +#define NV34TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16)) +#define NV34TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004 +#define NV34TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16)) +#define NV34TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004 +#define NV34TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16)) +#define NV34TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004 +#define NV34TCL_UNK1f80(x) (0x00001f80+((x)*4)) +#define NV34TCL_UNK1f80__SIZE 0x00000010 +#define NV34TCL_VP_ATTRIB_EN 0x00001ff0 +#define NV34TCL_VP_RESULT_EN 0x00001ff4 + + +#define NV40_CONTEXT_SURFACES_2D 0x00003062 + + + +#define NV40_STRETCHED_IMAGE_FROM_CPU 0x00003066 + + + +#define NV40_TEXTURE_FROM_CPU 0x0000307b + + + +#define NV40_SCALED_IMAGE_FROM_MEMORY 0x00003089 + + + +#define NV40_IMAGE_FROM_CPU 0x0000308a + + + +#define NV40_SWIZZLED_SURFACE 0x0000309e + + + +#define NV40TCL 0x00004097 + +#define NV40TCL_REF_CNT 0x00000050 +#define NV40TCL_NOP 0x00000100 +#define NV40TCL_NOTIFY 0x00000104 +#define NV40TCL_DMA_NOTIFY 0x00000180 +#define NV40TCL_DMA_TEXTURE0 0x00000184 +#define NV40TCL_DMA_TEXTURE1 0x00000188 +#define NV40TCL_DMA_COLOR1 0x0000018c +#define NV40TCL_DMA_COLOR0 0x00000194 +#define NV40TCL_DMA_ZETA 0x00000198 +#define NV40TCL_DMA_VTXBUF0 0x0000019c +#define NV40TCL_DMA_VTXBUF1 0x000001a0 +#define NV40TCL_DMA_FENCE 0x000001a4 +#define NV40TCL_DMA_QUERY 0x000001a8 +#define NV40TCL_DMA_UNK01AC 0x000001ac +#define NV40TCL_DMA_UNK01B0 0x000001b0 +#define NV40TCL_DMA_COLOR2 0x000001b4 +#define NV40TCL_DMA_COLOR3 0x000001b8 +#define NV40TCL_RT_HORIZ 0x00000200 +#define NV40TCL_RT_HORIZ_W_SHIFT 16 +#define NV40TCL_RT_HORIZ_W_MASK 0xffff0000 +#define NV40TCL_RT_HORIZ_X_SHIFT 0 +#define NV40TCL_RT_HORIZ_X_MASK 0x0000ffff +#define NV40TCL_RT_VERT 0x00000204 +#define NV40TCL_RT_VERT_H_SHIFT 16 +#define NV40TCL_RT_VERT_H_MASK 0xffff0000 +#define NV40TCL_RT_VERT_Y_SHIFT 0 +#define NV40TCL_RT_VERT_Y_MASK 0x0000ffff +#define NV40TCL_RT_FORMAT 0x00000208 +#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT 24 +#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_MASK 0xff000000 +#define NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT 16 +#define NV40TCL_RT_FORMAT_LOG2_WIDTH_MASK 0x00ff0000 +#define NV40TCL_RT_FORMAT_TYPE_SHIFT 8 +#define NV40TCL_RT_FORMAT_TYPE_MASK 0x00000f00 +#define NV40TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV40TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV40TCL_RT_FORMAT_ZETA_SHIFT 5 +#define NV40TCL_RT_FORMAT_ZETA_MASK 0x000000e0 +#define NV40TCL_RT_FORMAT_ZETA_Z16 0x00000020 +#define NV40TCL_RT_FORMAT_ZETA_Z24S8 0x00000040 +#define NV40TCL_RT_FORMAT_COLOR_SHIFT 0 +#define NV40TCL_RT_FORMAT_COLOR_MASK 0x0000001f +#define NV40TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV40TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV40TCL_RT_FORMAT_COLOR_B8 0x00000009 +#define NV40TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV40TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV40TCL_COLOR0_PITCH 0x0000020c +#define NV40TCL_COLOR0_OFFSET 0x00000210 +#define NV40TCL_ZETA_OFFSET 0x00000214 +#define NV40TCL_COLOR1_OFFSET 0x00000218 +#define NV40TCL_COLOR1_PITCH 0x0000021c +#define NV40TCL_RT_ENABLE 0x00000220 +#define NV40TCL_RT_ENABLE_MRT (1 << 4) +#define NV40TCL_RT_ENABLE_COLOR3 (1 << 3) +#define NV40TCL_RT_ENABLE_COLOR2 (1 << 2) +#define NV40TCL_RT_ENABLE_COLOR1 (1 << 1) +#define NV40TCL_RT_ENABLE_COLOR0 (1 << 0) +#define NV40TCL_ZETA_PITCH 0x0000022c +#define NV40TCL_COLOR2_PITCH 0x00000280 +#define NV40TCL_COLOR3_PITCH 0x00000284 +#define NV40TCL_COLOR2_OFFSET 0x00000288 +#define NV40TCL_COLOR3_OFFSET 0x0000028c +#define NV40TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8)) +#define NV40TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV40TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8)) +#define NV40TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV40TCL_DITHER_ENABLE 0x00000300 +#define NV40TCL_ALPHA_TEST_ENABLE 0x00000304 +#define NV40TCL_ALPHA_TEST_FUNC 0x00000308 +#define NV40TCL_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NV40TCL_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NV40TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NV40TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV40TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV40TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NV40TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207 +#define NV40TCL_ALPHA_TEST_REF 0x0000030c +#define NV40TCL_BLEND_ENABLE 0x00000310 +#define NV40TCL_BLEND_FUNC_SRC 0x00000314 +#define NV40TCL_BLEND_FUNC_SRC_RGB_SHIFT 0 +#define NV40TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff +#define NV40TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 +#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 +#define NV40TCL_BLEND_FUNC_DST 0x00000318 +#define NV40TCL_BLEND_FUNC_DST_RGB_SHIFT 0 +#define NV40TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff +#define NV40TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 +#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV40TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV40TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 +#define NV40TCL_BLEND_COLOR 0x0000031c +#define NV40TCL_BLEND_EQUATION 0x00000320 +#define NV40TCL_BLEND_EQUATION_RGB_SHIFT 0 +#define NV40TCL_BLEND_EQUATION_RGB_MASK 0x0000ffff +#define NV40TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV40TCL_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV40TCL_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV40TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV40TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV40TCL_BLEND_EQUATION_ALPHA_SHIFT 16 +#define NV40TCL_BLEND_EQUATION_ALPHA_MASK 0xffff0000 +#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x80060000 +#define NV40TCL_BLEND_EQUATION_ALPHA_MIN 0x80070000 +#define NV40TCL_BLEND_EQUATION_ALPHA_MAX 0x80080000 +#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x800a0000 +#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x800b0000 +#define NV40TCL_COLOR_MASK 0x00000324 +#define NV40TCL_COLOR_MASK_BUFFER0_B_SHIFT 0 +#define NV40TCL_COLOR_MASK_BUFFER0_B_MASK 0x000000ff +#define NV40TCL_COLOR_MASK_BUFFER0_G_SHIFT 8 +#define NV40TCL_COLOR_MASK_BUFFER0_G_MASK 0x0000ff00 +#define NV40TCL_COLOR_MASK_BUFFER0_R_SHIFT 16 +#define NV40TCL_COLOR_MASK_BUFFER0_R_MASK 0x00ff0000 +#define NV40TCL_COLOR_MASK_BUFFER0_A_SHIFT 24 +#define NV40TCL_COLOR_MASK_BUFFER0_A_MASK 0xff000000 +#define NV40TCL_STENCIL_FRONT_ENABLE 0x00000328 +#define NV40TCL_STENCIL_FRONT_MASK 0x0000032c +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC 0x00000330 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 +#define NV40TCL_STENCIL_FRONT_FUNC_REF 0x00000334 +#define NV40TCL_STENCIL_FRONT_FUNC_MASK 0x00000338 +#define NV40TCL_STENCIL_FRONT_OP_FAIL 0x0000033c +#define NV40TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV40TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL 0x00000340 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS 0x00000344 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_BACK_ENABLE 0x00000348 +#define NV40TCL_STENCIL_BACK_MASK 0x0000034c +#define NV40TCL_STENCIL_BACK_FUNC_FUNC 0x00000350 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 +#define NV40TCL_STENCIL_BACK_FUNC_REF 0x00000354 +#define NV40TCL_STENCIL_BACK_FUNC_MASK 0x00000358 +#define NV40TCL_STENCIL_BACK_OP_FAIL 0x0000035c +#define NV40TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV40TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV40TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV40TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL 0x00000360 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_BACK_OP_ZPASS 0x00000364 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV40TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV40TCL_SHADE_MODEL 0x00000368 +#define NV40TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV40TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV40TCL_MRT_COLOR_MASK 0x00000370 +#define NV40TCL_MRT_COLOR_MASK_BUFFER1_A (1 << 4) +#define NV40TCL_MRT_COLOR_MASK_BUFFER1_R (1 << 5) +#define NV40TCL_MRT_COLOR_MASK_BUFFER1_G (1 << 6) +#define NV40TCL_MRT_COLOR_MASK_BUFFER1_B (1 << 7) +#define NV40TCL_MRT_COLOR_MASK_BUFFER2_A (1 << 8) +#define NV40TCL_MRT_COLOR_MASK_BUFFER2_R (1 << 9) +#define NV40TCL_MRT_COLOR_MASK_BUFFER2_G (1 << 10) +#define NV40TCL_MRT_COLOR_MASK_BUFFER2_B (1 << 11) +#define NV40TCL_MRT_COLOR_MASK_BUFFER3_A (1 << 12) +#define NV40TCL_MRT_COLOR_MASK_BUFFER3_R (1 << 13) +#define NV40TCL_MRT_COLOR_MASK_BUFFER3_G (1 << 14) +#define NV40TCL_MRT_COLOR_MASK_BUFFER3_B (1 << 15) +#define NV40TCL_COLOR_LOGIC_OP_ENABLE 0x00000374 +#define NV40TCL_COLOR_LOGIC_OP 0x00000378 +#define NV40TCL_COLOR_LOGIC_OP_CLEAR 0x00001500 +#define NV40TCL_COLOR_LOGIC_OP_AND 0x00001501 +#define NV40TCL_COLOR_LOGIC_OP_AND_REVERSE 0x00001502 +#define NV40TCL_COLOR_LOGIC_OP_COPY 0x00001503 +#define NV40TCL_COLOR_LOGIC_OP_AND_INVERTED 0x00001504 +#define NV40TCL_COLOR_LOGIC_OP_NOOP 0x00001505 +#define NV40TCL_COLOR_LOGIC_OP_XOR 0x00001506 +#define NV40TCL_COLOR_LOGIC_OP_OR 0x00001507 +#define NV40TCL_COLOR_LOGIC_OP_NOR 0x00001508 +#define NV40TCL_COLOR_LOGIC_OP_EQUIV 0x00001509 +#define NV40TCL_COLOR_LOGIC_OP_INVERT 0x0000150a +#define NV40TCL_COLOR_LOGIC_OP_OR_REVERSE 0x0000150b +#define NV40TCL_COLOR_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NV40TCL_COLOR_LOGIC_OP_OR_INVERTED 0x0000150d +#define NV40TCL_COLOR_LOGIC_OP_NAND 0x0000150e +#define NV40TCL_COLOR_LOGIC_OP_SET 0x0000150f +#define NV40TCL_DEPTH_RANGE_NEAR 0x00000394 +#define NV40TCL_DEPTH_RANGE_FAR 0x00000398 +#define NV40TCL_LINE_WIDTH 0x000003b8 +#define NV40TCL_LINE_SMOOTH_ENABLE 0x000003bc +#define NV40TCL_UNK03C0(x) (0x000003c0+((x)*4)) +#define NV40TCL_UNK03C0__SIZE 0x00000010 +#define NV40TCL_UNK0400(x) (0x00000400+((x)*4)) +#define NV40TCL_UNK0400__SIZE 0x00000010 +#define NV40TCL_UNK0440(x) (0x00000440+((x)*4)) +#define NV40TCL_UNK0440__SIZE 0x00000020 +#define NV40TCL_SCISSOR_HORIZ 0x000008c0 +#define NV40TCL_SCISSOR_HORIZ_X_SHIFT 0 +#define NV40TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff +#define NV40TCL_SCISSOR_HORIZ_W_SHIFT 16 +#define NV40TCL_SCISSOR_HORIZ_W_MASK 0xffff0000 +#define NV40TCL_SCISSOR_VERT 0x000008c4 +#define NV40TCL_SCISSOR_VERT_Y_SHIFT 0 +#define NV40TCL_SCISSOR_VERT_Y_MASK 0x0000ffff +#define NV40TCL_SCISSOR_VERT_H_SHIFT 16 +#define NV40TCL_SCISSOR_VERT_H_MASK 0xffff0000 +#define NV40TCL_FOG_MODE 0x000008cc +#define NV40TCL_FOG_EQUATION_CONSTANT 0x000008d0 +#define NV40TCL_FOG_EQUATION_LINEAR 0x000008d4 +#define NV40TCL_FOG_EQUATION_QUADRATIC 0x000008d8 +#define NV40TCL_FP_ADDRESS 0x000008e4 +#define NV40TCL_FP_ADDRESS_OFFSET_SHIFT 8 +#define NV40TCL_FP_ADDRESS_OFFSET_MASK 0xffffff00 +#define NV40TCL_FP_ADDRESS_DMA1 (1 << 1) +#define NV40TCL_FP_ADDRESS_DMA0 (1 << 0) +#define NV40TCL_VIEWPORT_HORIZ 0x00000a00 +#define NV40TCL_VIEWPORT_HORIZ_W_SHIFT 16 +#define NV40TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 +#define NV40TCL_VIEWPORT_HORIZ_X_SHIFT 0 +#define NV40TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff +#define NV40TCL_VIEWPORT_VERT 0x00000a04 +#define NV40TCL_VIEWPORT_VERT_H_SHIFT 16 +#define NV40TCL_VIEWPORT_VERT_H_MASK 0xffff0000 +#define NV40TCL_VIEWPORT_VERT_Y_SHIFT 0 +#define NV40TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff +#define NV40TCL_VIEWPORT_TRANSLATE_X 0x00000a20 +#define NV40TCL_VIEWPORT_TRANSLATE_Y 0x00000a24 +#define NV40TCL_VIEWPORT_TRANSLATE_Z 0x00000a28 +#define NV40TCL_VIEWPORT_TRANSLATE_W 0x00000a2c +#define NV40TCL_VIEWPORT_SCALE_X 0x00000a30 +#define NV40TCL_VIEWPORT_SCALE_Y 0x00000a34 +#define NV40TCL_VIEWPORT_SCALE_Z 0x00000a38 +#define NV40TCL_VIEWPORT_SCALE_W 0x00000a3c +#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a60 +#define NV40TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 +#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a68 +#define NV40TCL_DEPTH_FUNC 0x00000a6c +#define NV40TCL_DEPTH_FUNC_NEVER 0x00000200 +#define NV40TCL_DEPTH_FUNC_LESS 0x00000201 +#define NV40TCL_DEPTH_FUNC_EQUAL 0x00000202 +#define NV40TCL_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV40TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV40TCL_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV40TCL_DEPTH_FUNC_ALWAYS 0x00000207 +#define NV40TCL_DEPTH_WRITE_ENABLE 0x00000a70 +#define NV40TCL_DEPTH_TEST_ENABLE 0x00000a74 +#define NV40TCL_POLYGON_OFFSET_FACTOR 0x00000a78 +#define NV40TCL_POLYGON_OFFSET_UNITS 0x00000a7c +#define NV40TCL_UNK0B40(x) (0x00000b40+((x)*4)) +#define NV40TCL_UNK0B40__SIZE 0x00000008 +#define NV40TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) +#define NV40TCL_VP_UPLOAD_INST__SIZE 0x00000004 +#define NV40TCL_CLIP_PLANE_ENABLE 0x00001478 +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 2) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 6) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 10) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 14) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 18) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 22) +#define NV40TCL_POLYGON_STIPPLE_ENABLE 0x0000147c +#define NV40TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) +#define NV40TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 +#define NV40TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16)) +#define NV40TCL_VTX_ATTR_3F_X__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16)) +#define NV40TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16)) +#define NV40TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 +#define NV40TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4)) +#define NV40TCL_VTXBUF_ADDRESS__SIZE 0x00000010 +#define NV40TCL_VTXBUF_ADDRESS_DMA1 (1 << 31) +#define NV40TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0 +#define NV40TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff +#define NV40TCL_VTX_CACHE_INVALIDATE 0x00001714 +#define NV40TCL_VTXFMT(x) (0x00001740+((x)*4)) +#define NV40TCL_VTXFMT__SIZE 0x00000010 +#define NV40TCL_VTXFMT_TYPE_SHIFT 0 +#define NV40TCL_VTXFMT_TYPE_MASK 0x0000000f +#define NV40TCL_VTXFMT_TYPE_FLOAT 0x00000002 +#define NV40TCL_VTXFMT_TYPE_UBYTE 0x00000004 +#define NV40TCL_VTXFMT_SIZE_SHIFT 4 +#define NV40TCL_VTXFMT_SIZE_MASK 0x000000f0 +#define NV40TCL_VTXFMT_STRIDE_SHIFT 8 +#define NV40TCL_VTXFMT_STRIDE_MASK 0x0000ff00 +#define NV40TCL_QUERY_RESET 0x000017c8 +#define NV40TCL_QUERY_UNK17CC 0x000017cc +#define NV40TCL_QUERY_GET 0x00001800 +#define NV40TCL_QUERY_GET_UNK24_SHIFT 24 +#define NV40TCL_QUERY_GET_UNK24_MASK 0xff000000 +#define NV40TCL_QUERY_GET_OFFSET_SHIFT 0 +#define NV40TCL_QUERY_GET_OFFSET_MASK 0x00ffffff +#define NV40TCL_BEGIN_END 0x00001808 +#define NV40TCL_BEGIN_END_STOP 0x00000000 +#define NV40TCL_BEGIN_END_POINTS 0x00000001 +#define NV40TCL_BEGIN_END_LINES 0x00000002 +#define NV40TCL_BEGIN_END_LINE_LOOP 0x00000003 +#define NV40TCL_BEGIN_END_LINE_STRIP 0x00000004 +#define NV40TCL_BEGIN_END_TRIANGLES 0x00000005 +#define NV40TCL_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV40TCL_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV40TCL_BEGIN_END_QUADS 0x00000008 +#define NV40TCL_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV40TCL_BEGIN_END_POLYGON 0x0000000a +#define NV40TCL_VB_ELEMENT_U16 0x0000180c +#define NV40TCL_VB_ELEMENT_U16_1_SHIFT 16 +#define NV40TCL_VB_ELEMENT_U16_1_MASK 0xffff0000 +#define NV40TCL_VB_ELEMENT_U16_0_SHIFT 0 +#define NV40TCL_VB_ELEMENT_U16_0_MASK 0x0000ffff +#define NV40TCL_VB_ELEMENT_U32 0x00001810 +#define NV40TCL_VB_VERTEX_BATCH 0x00001814 +#define NV40TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24 +#define NV40TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000 +#define NV40TCL_VB_VERTEX_BATCH_START_SHIFT 0 +#define NV40TCL_VB_VERTEX_BATCH_START_MASK 0x00ffffff +#define NV40TCL_VERTEX_DATA 0x00001818 +#define NV40TCL_IDXBUF_ADDRESS 0x0000181c +#define NV40TCL_IDXBUF_FORMAT 0x00001820 +#define NV40TCL_IDXBUF_FORMAT_TYPE_SHIFT 4 +#define NV40TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0 +#define NV40TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000 +#define NV40TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010 +#define NV40TCL_IDXBUF_FORMAT_DMA1 (1 << 0) +#define NV40TCL_VB_INDEX_BATCH 0x00001824 +#define NV40TCL_VB_INDEX_BATCH_COUNT_SHIFT 24 +#define NV40TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000 +#define NV40TCL_VB_INDEX_BATCH_START_SHIFT 0 +#define NV40TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff +#define NV40TCL_POLYGON_MODE_FRONT 0x00001828 +#define NV40TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV40TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV40TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV40TCL_POLYGON_MODE_BACK 0x0000182c +#define NV40TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV40TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV40TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV40TCL_CULL_FACE 0x00001830 +#define NV40TCL_CULL_FACE_FRONT 0x00000404 +#define NV40TCL_CULL_FACE_BACK 0x00000405 +#define NV40TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV40TCL_FRONT_FACE 0x00001834 +#define NV40TCL_FRONT_FACE_CW 0x00000900 +#define NV40TCL_FRONT_FACE_CCW 0x00000901 +#define NV40TCL_POLYGON_SMOOTH_ENABLE 0x00001838 +#define NV40TCL_CULL_FACE_ENABLE 0x0000183c +#define NV40TCL_TEX_SIZE1(x) (0x00001840+((x)*4)) +#define NV40TCL_TEX_SIZE1__SIZE 0x00000008 +#define NV40TCL_TEX_SIZE1_DEPTH_SHIFT 20 +#define NV40TCL_TEX_SIZE1_DEPTH_MASK 0xfff00000 +#define NV40TCL_TEX_SIZE1_PITCH_SHIFT 0 +#define NV40TCL_TEX_SIZE1_PITCH_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8)) +#define NV40TCL_VTX_ATTR_2F_X__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8)) +#define NV40TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4)) +#define NV40TCL_VTX_ATTR_2I__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_2I_Y_SHIFT 16 +#define NV40TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_2I_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_2I_X_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_4I_0(x) (0x00001900+((x)*8)) +#define NV40TCL_VTX_ATTR_4I_0__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4I_0_Y_SHIFT 16 +#define NV40TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_4I_0_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_4I_1(x) (0x00001904+((x)*8)) +#define NV40TCL_VTX_ATTR_4I_1__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4I_1_W_SHIFT 16 +#define NV40TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_4I_1_Z_SHIFT 0 +#define NV40TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff +#define NV40TCL_TEX_OFFSET(x) (0x00001a00+((x)*32)) +#define NV40TCL_TEX_OFFSET__SIZE 0x00000010 +#define NV40TCL_TEX_FORMAT(x) (0x00001a04+((x)*32)) +#define NV40TCL_TEX_FORMAT__SIZE 0x00000010 +#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT 16 +#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_MASK 0x000f0000 +#define NV40TCL_TEX_FORMAT_RECT (1 << 14) +#define NV40TCL_TEX_FORMAT_LINEAR (1 << 13) +#define NV40TCL_TEX_FORMAT_FORMAT_SHIFT 8 +#define NV40TCL_TEX_FORMAT_FORMAT_MASK 0x00001f00 +#define NV40TCL_TEX_FORMAT_FORMAT_L8 0x00000100 +#define NV40TCL_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV40TCL_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000300 +#define NV40TCL_TEX_FORMAT_FORMAT_R5G6B5 0x00000400 +#define NV40TCL_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000500 +#define NV40TCL_TEX_FORMAT_FORMAT_DXT1 0x00000600 +#define NV40TCL_TEX_FORMAT_FORMAT_DXT3 0x00000700 +#define NV40TCL_TEX_FORMAT_FORMAT_DXT5 0x00000800 +#define NV40TCL_TEX_FORMAT_FORMAT_A8L8 0x00000b00 +#define NV40TCL_TEX_FORMAT_FORMAT_Z24 0x00001000 +#define NV40TCL_TEX_FORMAT_FORMAT_Z16 0x00001200 +#define NV40TCL_TEX_FORMAT_FORMAT_HILO8 0x00001800 +#define NV40TCL_TEX_FORMAT_FORMAT_RGBA16F 0x00001a00 +#define NV40TCL_TEX_FORMAT_FORMAT_RGBA32F 0x00001b00 +#define NV40TCL_TEX_FORMAT_DIMS_SHIFT 4 +#define NV40TCL_TEX_FORMAT_DIMS_MASK 0x000000f0 +#define NV40TCL_TEX_FORMAT_DIMS_1D 0x00000010 +#define NV40TCL_TEX_FORMAT_DIMS_2D 0x00000020 +#define NV40TCL_TEX_FORMAT_DIMS_3D 0x00000030 +#define NV40TCL_TEX_FORMAT_NO_BORDER (1 << 3) +#define NV40TCL_TEX_FORMAT_CUBIC (1 << 2) +#define NV40TCL_TEX_FORMAT_DMA1 (1 << 1) +#define NV40TCL_TEX_FORMAT_DMA0 (1 << 0) +#define NV40TCL_TEX_WRAP(x) (0x00001a08+((x)*32)) +#define NV40TCL_TEX_WRAP__SIZE 0x00000010 +#define NV40TCL_TEX_WRAP_S_SHIFT 0 +#define NV40TCL_TEX_WRAP_S_MASK 0x000000ff +#define NV40TCL_TEX_WRAP_S_REPEAT 0x00000001 +#define NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT 0x00000002 +#define NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE 0x00000003 +#define NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER 0x00000004 +#define NV40TCL_TEX_WRAP_S_CLAMP 0x00000005 +#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE 0x00000006 +#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER 0x00000007 +#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP 0x00000008 +#define NV40TCL_TEX_WRAP_T_SHIFT 8 +#define NV40TCL_TEX_WRAP_T_MASK 0x0000ff00 +#define NV40TCL_TEX_WRAP_T_REPEAT 0x00000100 +#define NV40TCL_TEX_WRAP_T_MIRRORED_REPEAT 0x00000200 +#define NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE 0x00000300 +#define NV40TCL_TEX_WRAP_T_CLAMP_TO_BORDER 0x00000400 +#define NV40TCL_TEX_WRAP_T_CLAMP 0x00000500 +#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_EDGE 0x00000600 +#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_BORDER 0x00000700 +#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP 0x00000800 +#define NV40TCL_TEX_WRAP_R_SHIFT 16 +#define NV40TCL_TEX_WRAP_R_MASK 0x00ff0000 +#define NV40TCL_TEX_WRAP_R_REPEAT 0x00010000 +#define NV40TCL_TEX_WRAP_R_MIRRORED_REPEAT 0x00020000 +#define NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE 0x00030000 +#define NV40TCL_TEX_WRAP_R_CLAMP_TO_BORDER 0x00040000 +#define NV40TCL_TEX_WRAP_R_CLAMP 0x00050000 +#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_EDGE 0x00060000 +#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_BORDER 0x00070000 +#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP 0x00080000 +#define NV40TCL_TEX_WRAP_RCOMP_SHIFT 28 +#define NV40TCL_TEX_WRAP_RCOMP_MASK 0xf0000000 +#define NV40TCL_TEX_WRAP_RCOMP_NEVER 0x00000000 +#define NV40TCL_TEX_WRAP_RCOMP_GREATER 0x10000000 +#define NV40TCL_TEX_WRAP_RCOMP_EQUAL 0x20000000 +#define NV40TCL_TEX_WRAP_RCOMP_GEQUAL 0x30000000 +#define NV40TCL_TEX_WRAP_RCOMP_LESS 0x40000000 +#define NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL 0x50000000 +#define NV40TCL_TEX_WRAP_RCOMP_LEQUAL 0x60000000 +#define NV40TCL_TEX_WRAP_RCOMP_ALWAYS 0x70000000 +#define NV40TCL_TEX_ENABLE(x) (0x00001a0c+((x)*32)) +#define NV40TCL_TEX_ENABLE__SIZE 0x00000010 +#define NV40TCL_TEX_ENABLE_ENABLE (1 << 31) +#define NV40TCL_TEX_ENABLE_ANISO_SHIFT 4 +#define NV40TCL_TEX_ENABLE_ANISO_MASK 0x000000f0 +#define NV40TCL_TEX_ENABLE_ANISO_NONE 0x00000000 +#define NV40TCL_TEX_ENABLE_ANISO_2X 0x00000010 +#define NV40TCL_TEX_ENABLE_ANISO_4X 0x00000020 +#define NV40TCL_TEX_ENABLE_ANISO_6X 0x00000030 +#define NV40TCL_TEX_ENABLE_ANISO_8X 0x00000040 +#define NV40TCL_TEX_ENABLE_ANISO_10X 0x00000050 +#define NV40TCL_TEX_ENABLE_ANISO_12X 0x00000060 +#define NV40TCL_TEX_ENABLE_ANISO_16X 0x00000070 +#define NV40TCL_TEX_SWIZZLE(x) (0x00001a10+((x)*32)) +#define NV40TCL_TEX_SWIZZLE__SIZE 0x00000010 +#define NV40TCL_TEX_SWIZZLE_S0_X_SHIFT 14 +#define NV40TCL_TEX_SWIZZLE_S0_X_MASK 0x0000c000 +#define NV40TCL_TEX_SWIZZLE_S0_X_ZERO 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S0_X_ONE 0x00004000 +#define NV40TCL_TEX_SWIZZLE_S0_X_S1 0x00008000 +#define NV40TCL_TEX_SWIZZLE_S0_Y_SHIFT 12 +#define NV40TCL_TEX_SWIZZLE_S0_Y_MASK 0x00003000 +#define NV40TCL_TEX_SWIZZLE_S0_Y_ZERO 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S0_Y_ONE 0x00001000 +#define NV40TCL_TEX_SWIZZLE_S0_Y_S1 0x00002000 +#define NV40TCL_TEX_SWIZZLE_S0_Z_SHIFT 10 +#define NV40TCL_TEX_SWIZZLE_S0_Z_MASK 0x00000c00 +#define NV40TCL_TEX_SWIZZLE_S0_Z_ZERO 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S0_Z_ONE 0x00000400 +#define NV40TCL_TEX_SWIZZLE_S0_Z_S1 0x00000800 +#define NV40TCL_TEX_SWIZZLE_S0_W_SHIFT 8 +#define NV40TCL_TEX_SWIZZLE_S0_W_MASK 0x00000300 +#define NV40TCL_TEX_SWIZZLE_S0_W_ZERO 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S0_W_ONE 0x00000100 +#define NV40TCL_TEX_SWIZZLE_S0_W_S1 0x00000200 +#define NV40TCL_TEX_SWIZZLE_S1_X_SHIFT 6 +#define NV40TCL_TEX_SWIZZLE_S1_X_MASK 0x000000c0 +#define NV40TCL_TEX_SWIZZLE_S1_X_W 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S1_X_Z 0x00000040 +#define NV40TCL_TEX_SWIZZLE_S1_X_Y 0x00000080 +#define NV40TCL_TEX_SWIZZLE_S1_X_X 0x000000c0 +#define NV40TCL_TEX_SWIZZLE_S1_Y_SHIFT 4 +#define NV40TCL_TEX_SWIZZLE_S1_Y_MASK 0x00000030 +#define NV40TCL_TEX_SWIZZLE_S1_Y_W 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S1_Y_Z 0x00000010 +#define NV40TCL_TEX_SWIZZLE_S1_Y_Y 0x00000020 +#define NV40TCL_TEX_SWIZZLE_S1_Y_X 0x00000030 +#define NV40TCL_TEX_SWIZZLE_S1_Z_SHIFT 2 +#define NV40TCL_TEX_SWIZZLE_S1_Z_MASK 0x0000000c +#define NV40TCL_TEX_SWIZZLE_S1_Z_W 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S1_Z_Z 0x00000004 +#define NV40TCL_TEX_SWIZZLE_S1_Z_Y 0x00000008 +#define NV40TCL_TEX_SWIZZLE_S1_Z_X 0x0000000c +#define NV40TCL_TEX_SWIZZLE_S1_W_SHIFT 0 +#define NV40TCL_TEX_SWIZZLE_S1_W_MASK 0x00000003 +#define NV40TCL_TEX_SWIZZLE_S1_W_W 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S1_W_Z 0x00000001 +#define NV40TCL_TEX_SWIZZLE_S1_W_Y 0x00000002 +#define NV40TCL_TEX_SWIZZLE_S1_W_X 0x00000003 +#define NV40TCL_TEX_FILTER(x) (0x00001a14+((x)*32)) +#define NV40TCL_TEX_FILTER__SIZE 0x00000010 +#define NV40TCL_TEX_FILTER_SIGNED_ALPHA (1 << 31) +#define NV40TCL_TEX_FILTER_SIGNED_RED (1 << 30) +#define NV40TCL_TEX_FILTER_SIGNED_GREEN (1 << 29) +#define NV40TCL_TEX_FILTER_SIGNED_BLUE (1 << 28) +#define NV40TCL_TEX_FILTER_MIN_SHIFT 16 +#define NV40TCL_TEX_FILTER_MIN_MASK 0x000f0000 +#define NV40TCL_TEX_FILTER_MIN_NEAREST 0x00010000 +#define NV40TCL_TEX_FILTER_MIN_LINEAR 0x00020000 +#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST 0x00030000 +#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST 0x00040000 +#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR 0x00050000 +#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR 0x00060000 +#define NV40TCL_TEX_FILTER_MAG_SHIFT 24 +#define NV40TCL_TEX_FILTER_MAG_MASK 0x0f000000 +#define NV40TCL_TEX_FILTER_MAG_NEAREST 0x01000000 +#define NV40TCL_TEX_FILTER_MAG_LINEAR 0x02000000 +#define NV40TCL_TEX_SIZE0(x) (0x00001a18+((x)*32)) +#define NV40TCL_TEX_SIZE0__SIZE 0x00000010 +#define NV40TCL_TEX_SIZE0_H_SHIFT 0 +#define NV40TCL_TEX_SIZE0_H_MASK 0x0000ffff +#define NV40TCL_TEX_SIZE0_W_SHIFT 16 +#define NV40TCL_TEX_SIZE0_W_MASK 0xffff0000 +#define NV40TCL_TEX_BORDER_COLOR(x) (0x00001a1c+((x)*32)) +#define NV40TCL_TEX_BORDER_COLOR__SIZE 0x00000010 +#define NV40TCL_TEX_BORDER_COLOR_B_SHIFT 0 +#define NV40TCL_TEX_BORDER_COLOR_B_MASK 0x000000ff +#define NV40TCL_TEX_BORDER_COLOR_G_SHIFT 8 +#define NV40TCL_TEX_BORDER_COLOR_G_MASK 0x0000ff00 +#define NV40TCL_TEX_BORDER_COLOR_R_SHIFT 16 +#define NV40TCL_TEX_BORDER_COLOR_R_MASK 0x00ff0000 +#define NV40TCL_TEX_BORDER_COLOR_A_SHIFT 24 +#define NV40TCL_TEX_BORDER_COLOR_A_MASK 0xff000000 +#define NV40TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16)) +#define NV40TCL_VTX_ATTR_4F_X__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16)) +#define NV40TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16)) +#define NV40TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16)) +#define NV40TCL_VTX_ATTR_4F_W__SIZE 0x00000010 +#define NV40TCL_FP_CONTROL 0x00001d60 +#define NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT 24 +#define NV40TCL_FP_CONTROL_TEMP_COUNT_MASK 0xff000000 +#define NV40TCL_FP_CONTROL_KIL (1 << 7) +#define NV40TCL_MULTISAMPLE_CONTROL 0x00001d7c +#define NV40TCL_CLEAR_VALUE_DEPTH 0x00001d8c +#define NV40TCL_CLEAR_VALUE_COLOR 0x00001d90 +#define NV40TCL_CLEAR_BUFFERS 0x00001d94 +#define NV40TCL_CLEAR_BUFFERS_COLOR_A (1 << 7) +#define NV40TCL_CLEAR_BUFFERS_COLOR_B (1 << 6) +#define NV40TCL_CLEAR_BUFFERS_COLOR_G (1 << 5) +#define NV40TCL_CLEAR_BUFFERS_COLOR_R (1 << 4) +#define NV40TCL_CLEAR_BUFFERS_STENCIL (1 << 1) +#define NV40TCL_CLEAR_BUFFERS_DEPTH (1 << 0) +#define NV40TCL_LINE_STIPPLE_ENABLE 0x00001db4 +#define NV40TCL_LINE_STIPPLE_PATTERN 0x00001db8 +#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0 +#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff +#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16 +#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000 +#define NV40TCL_VP_UPLOAD_FROM_ID 0x00001e9c +#define NV40TCL_VP_START_FROM_ID 0x00001ea0 +#define NV40TCL_POINT_SIZE 0x00001ee0 +#define NV40TCL_POINT_SPRITE 0x00001ee8 +#define NV40TCL_VP_UPLOAD_CONST_ID 0x00001efc +#define NV40TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16)) +#define NV40TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004 +#define NV40TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16)) +#define NV40TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004 +#define NV40TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16)) +#define NV40TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004 +#define NV40TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16)) +#define NV40TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004 +#define NV40TCL_TEX_CACHE_CTL 0x00001fd8 +#define NV40TCL_VP_ATTRIB_EN 0x00001ff0 +#define NV40TCL_VP_RESULT_EN 0x00001ff4 + + +#define NV44TCL 0x00004497 + + + +#define NV50_2D 0x0000502d + +#define NV50_2D_NOP 0x00000100 +#define NV50_2D_NOTIFY 0x00000104 +#define NV50_2D_DMA_NOTIFY 0x00000180 +#define NV50_2D_DMA_IN_MEMORY0 0x00000184 +#define NV50_2D_DMA_IN_MEMORY1 0x00000188 +#define NV50_2D_DMA_IN_MEMORY2 0x0000018c +#define NV50_2D_DST_FORMAT 0x00000200 +#define NV50_2D_DST_FORMAT_32BPP 0x000000cf +#define NV50_2D_DST_FORMAT_24BPP 0x000000e6 +#define NV50_2D_DST_FORMAT_16BPP 0x000000e8 +#define NV50_2D_DST_FORMAT_8BPP 0x000000f3 +#define NV50_2D_DST_FORMAT_15BPP 0x000000f8 +#define NV50_2D_DST_PITCH 0x00000214 +#define NV50_2D_DST_WIDTH 0x00000218 +#define NV50_2D_DST_HEIGHT 0x0000021c +#define NV50_2D_DST_ADDRESS_HIGH 0x00000220 +#define NV50_2D_DST_ADDRESS_LOW 0x00000224 +#define NV50_2D_SRC_FORMAT 0x00000230 +#define NV50_2D_SRC_FORMAT_32BPP 0x000000cf +#define NV50_2D_SRC_FORMAT_24BPP 0x000000e6 +#define NV50_2D_SRC_FORMAT_16BPP 0x000000e8 +#define NV50_2D_SRC_FORMAT_8BPP 0x000000f3 +#define NV50_2D_SRC_FORMAT_15BPP 0x000000f8 +#define NV50_2D_SRC_PITCH 0x00000244 +#define NV50_2D_SRC_WIDTH 0x00000248 +#define NV50_2D_SRC_HEIGHT 0x0000024c +#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250 +#define NV50_2D_SRC_ADDRESS_LOW 0x00000254 +#define NV50_2D_CLIP_X 0x00000280 +#define NV50_2D_CLIP_Y 0x00000284 +#define NV50_2D_CLIP_Z 0x00000288 +#define NV50_2D_CLIP_W 0x0000028c +#define NV50_2D_ROP 0x000002a0 +#define NV50_2D_OPERATION 0x000002ac +#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NV50_2D_OPERATION_ROP_AND 0x00000001 +#define NV50_2D_OPERATION_BLEND_AND 0x00000002 +#define NV50_2D_OPERATION_SRCCOPY 0x00000003 +#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000005 +#define NV50_2D_PATTERN_FORMAT 0x000002e8 +#define NV50_2D_PATTERN_FORMAT_16BPP 0x00000000 +#define NV50_2D_PATTERN_FORMAT_15BPP 0x00000001 +#define NV50_2D_PATTERN_FORMAT_32BPP 0x00000002 +#define NV50_2D_PATTERN_FORMAT_8BPP 0x00000003 +#define NV50_2D_PATTERN_COLOR(x) (0x000002f0+((x)*4)) +#define NV50_2D_PATTERN_COLOR__SIZE 0x00000002 +#define NV50_2D_PATTERN_BITMAP(x) (0x000002f8+((x)*4)) +#define NV50_2D_PATTERN_BITMAP__SIZE 0x00000002 +#define NV50_2D_RECT_FORMAT 0x00000584 +#define NV50_2D_RECT_FORMAT_32BPP 0x000000cf +#define NV50_2D_RECT_FORMAT_24BPP 0x000000e6 +#define NV50_2D_RECT_FORMAT_16BPP 0x000000e8 +#define NV50_2D_RECT_FORMAT_8BPP 0x000000f3 +#define NV50_2D_RECT_FORMAT_15BPP 0x000000f8 +#define NV50_2D_RECT_COLOR 0x00000588 +#define NV50_2D_RECT_X1 0x00000600 +#define NV50_2D_RECT_Y1 0x00000604 +#define NV50_2D_RECT_X2 0x00000608 +#define NV50_2D_RECT_Y2 0x0000060c +#define NV50_2D_BLIT_DST_X 0x000008b0 +#define NV50_2D_BLIT_DST_Y 0x000008b4 +#define NV50_2D_BLIT_DST_W 0x000008b8 +#define NV50_2D_BLIT_DST_H 0x000008bc +#define NV50_2D_BLIT_SRC_X 0x000008d4 +#define NV50_2D_BLIT_SRC_Y 0x000008dc +#define NV50_2D_SIFC_UNK0800 0x00000800 +#define NV50_2D_SIFC_FORMAT 0x00000804 +#define NV50_2D_SIFC_FORMAT_32BPP 0x000000cf +#define NV50_2D_SIFC_FORMAT_24BPP 0x000000e6 +#define NV50_2D_SIFC_FORMAT_16BPP 0x000000e8 +#define NV50_2D_SIFC_FORMAT_8BPP 0x000000f3 +#define NV50_2D_SIFC_FORMAT_15BPP 0x000000f8 +#define NV50_2D_SIFC_WIDTH 0x00000838 +#define NV50_2D_SIFC_HEIGHT 0x0000083c +#define NV50_2D_SIFC_SCALE_UNK0840 0x00000840 +#define NV50_2D_SIFC_SCALE_UNK0844 0x00000844 +#define NV50_2D_SIFC_SCALE_UNK0848 0x00000848 +#define NV50_2D_SIFC_SCALE_UNK084C 0x0000084c +#define NV50_2D_SIFC_UNK0850 0x00000850 +#define NV50_2D_SIFC_DST_X 0x00000854 +#define NV50_2D_SIFC_UNK0858 0x00000858 +#define NV50_2D_SIFC_DST_Y 0x0000085c +#define NV50_2D_SIFC_DATA 0x00000860 + + +#define NV50_MEMORY_TO_MEMORY_FORMAT 0x00005039 + +#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x00000238 +#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c + + +#define NV50TCL 0x00005097 + +#define NV50TCL_NOP 0x00000100 +#define NV50TCL_NOTIFY 0x00000104 +#define NV50TCL_DMA_NOTIFY 0x00000180 +#define NV50TCL_DMA_IN_MEMORY0(x) (0x00000184+((x)*4)) +#define NV50TCL_DMA_IN_MEMORY0__SIZE 0x0000000b +#define NV50TCL_DMA_IN_MEMORY1(x) (0x000001c0+((x)*4)) +#define NV50TCL_DMA_IN_MEMORY1__SIZE 0x00000008 +#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32)) +#define NV50TCL_RT_ADDRESS_HIGH__SIZE 0x00000008 +#define NV50TCL_RT_ADDRESS_LOW(x) (0x00000204+((x)*32)) +#define NV50TCL_RT_ADDRESS_LOW__SIZE 0x00000008 +#define NV50TCL_RT_FORMAT(x) (0x00000208+((x)*32)) +#define NV50TCL_RT_FORMAT__SIZE 0x00000008 +#define NV50TCL_RT_UNK3(x) (0x0000020c+((x)*32)) +#define NV50TCL_RT_UNK3__SIZE 0x00000008 +#define NV50TCL_RT_UNK4(x) (0x00000210+((x)*32)) +#define NV50TCL_RT_UNK4__SIZE 0x00000008 +#define NV50TCL_VTX_ATTR_1F(x) (0x00000300+((x)*4)) +#define NV50TCL_VTX_ATTR_1F__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_2F_X(x) (0x00000380+((x)*8)) +#define NV50TCL_VTX_ATTR_2F_X__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_2F_Y(x) (0x00000384+((x)*8)) +#define NV50TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_3F_X(x) (0x00000400+((x)*16)) +#define NV50TCL_VTX_ATTR_3F_X__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_3F_Y(x) (0x00000404+((x)*16)) +#define NV50TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_3F_Z(x) (0x00000408+((x)*16)) +#define NV50TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_3F_W(x) (0x0000040c+((x)*16)) +#define NV50TCL_VTX_ATTR_3F_W__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4F_X(x) (0x00000500+((x)*16)) +#define NV50TCL_VTX_ATTR_4F_X__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4F_Y(x) (0x00000504+((x)*16)) +#define NV50TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4F_Z(x) (0x00000508+((x)*16)) +#define NV50TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4F_W(x) (0x0000050c+((x)*16)) +#define NV50TCL_VTX_ATTR_4F_W__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_2I(x) (0x00000680+((x)*4)) +#define NV50TCL_VTX_ATTR_2I__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_2I_X_SHIFT 0 +#define NV50TCL_VTX_ATTR_2I_X_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_2I_Y_SHIFT 16 +#define NV50TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 +#define NV50TCL_VTX_ATTR_4I_0(x) (0x00000700+((x)*8)) +#define NV50TCL_VTX_ATTR_4I_0__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4I_0_X_SHIFT 0 +#define NV50TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_4I_0_Y_SHIFT 16 +#define NV50TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000 +#define NV50TCL_VTX_ATTR_4I_1(x) (0x00000704+((x)*8)) +#define NV50TCL_VTX_ATTR_4I_1__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4I_1_Z_SHIFT 0 +#define NV50TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_4I_1_W_SHIFT 16 +#define NV50TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000 +#define NV50TCL_VTX_ATTR_4NI_0(x) (0x00000780+((x)*8)) +#define NV50TCL_VTX_ATTR_4NI_0__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4NI_0_X_SHIFT 0 +#define NV50TCL_VTX_ATTR_4NI_0_X_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_4NI_0_Y_SHIFT 16 +#define NV50TCL_VTX_ATTR_4NI_0_Y_MASK 0xffff0000 +#define NV50TCL_VTX_ATTR_4NI_1(x) (0x00000784+((x)*8)) +#define NV50TCL_VTX_ATTR_4NI_1__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4NI_1_Z_SHIFT 0 +#define NV50TCL_VTX_ATTR_4NI_1_Z_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_4NI_1_W_SHIFT 16 +#define NV50TCL_VTX_ATTR_4NI_1_W_MASK 0xffff0000 +#define NV50TCL_VERTEX_ARRAY_FORMAT(x) (0x00000900+((x)*16)) +#define NV50TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 +#define NV50TCL_VIEWPORT_UNK0(x) (0x00000a00+((x)*4)) +#define NV50TCL_VIEWPORT_UNK0__SIZE 0x00000003 +#define NV50TCL_VIEWPORT_UNK1(x) (0x00000a0c+((x)*4)) +#define NV50TCL_VIEWPORT_UNK1__SIZE 0x00000003 +#define NV50TCL_VIEWPORT_HORIZ 0x00000c00 +#define NV50TCL_VIEWPORT_HORIZ_X_SHIFT 0 +#define NV50TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff +#define NV50TCL_VIEWPORT_HORIZ_W_SHIFT 16 +#define NV50TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 +#define NV50TCL_VIEWPORT_VERT 0x00000c04 +#define NV50TCL_VIEWPORT_VERT_Y_SHIFT 0 +#define NV50TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff +#define NV50TCL_VIEWPORT_VERT_H_SHIFT 16 +#define NV50TCL_VIEWPORT_VERT_H_MASK 0xffff0000 +#define NV50TCL_DEPTH_RANGE_NEAR 0x00000c08 +#define NV50TCL_DEPTH_RANGE_FAR 0x00000c0c +#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8)) +#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8)) +#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74 +#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78 +#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4)) +#define NV50TCL_CLEAR_COLOR__SIZE 0x00000004 +#define NV50TCL_CLEAR_DEPTH 0x00000d90 +#define NV50TCL_CLEAR_STENCIL 0x00000da0 +#define NV50TCL_POLYGON_MODE_FRONT 0x00000dac +#define NV50TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV50TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV50TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV50TCL_POLYGON_MODE_BACK 0x00000db0 +#define NV50TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV50TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV50TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV50TCL_POLYGON_SMOOTH_ENABLE 0x00000db4 +#define NV50TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 +#define NV50TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 +#define NV50TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 +#define NV50TCL_SCISSOR_HORIZ 0x00000e04 +#define NV50TCL_SCISSOR_HORIZ_L_SHIFT 0 +#define NV50TCL_SCISSOR_HORIZ_L_MASK 0x0000ffff +#define NV50TCL_SCISSOR_HORIZ_R_SHIFT 16 +#define NV50TCL_SCISSOR_HORIZ_R_MASK 0xffff0000 +#define NV50TCL_SCISSOR_VERT 0x00000e08 +#define NV50TCL_SCISSOR_VERT_T_SHIFT 0 +#define NV50TCL_SCISSOR_VERT_T_MASK 0x0000ffff +#define NV50TCL_SCISSOR_VERT_B_SHIFT 16 +#define NV50TCL_SCISSOR_VERT_B_MASK 0xffff0000 +#define NV50TCL_VP_UPLOAD_CONST_ID 0x00000f00 +#define NV50TCL_VP_UPLOAD_CONST(x) (0x00000f04+((x)*4)) +#define NV50TCL_VP_UPLOAD_CONST__SIZE 0x00000010 +#define NV50TCL_STENCIL_FRONT_FUNC_REF 0x00000f54 +#define NV50TCL_STENCIL_FRONT_MASK 0x00000f58 +#define NV50TCL_STENCIL_FRONT_FUNC_MASK 0x00000f5c +#define NV50TCL_GP_ADDRESS_HIGH 0x00000f70 +#define NV50TCL_GP_ADDRESS_LOW 0x00000f74 +#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c +#define NV50TCL_VP_ADDRESS_LOW 0x00000f80 +#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4 +#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8 +#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0 +#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4 +#define NV50TCL_UNKFF4 0x00000ff4 +#define NV50TCL_UNKFF4_W_SHIFT 16 +#define NV50TCL_UNKFF4_W_MASK 0xffff0000 +#define NV50TCL_UNKFF8 0x00000ff8 +#define NV50TCL_UNKFF8_H_SHIFT 16 +#define NV50TCL_UNKFF8_H_MASK 0xffff0000 +#define NV50TCL_RT_HORIZ(x) (0x00001240+((x)*8)) +#define NV50TCL_RT_HORIZ__SIZE 0x00000008 +#define NV50TCL_RT_VERT(x) (0x00001244+((x)*8)) +#define NV50TCL_RT_VERT__SIZE 0x00000008 +#define NV50TCL_DEPTH_TEST_ENABLE 0x000012cc +#define NV50TCL_SHADE_MODEL 0x000012d4 +#define NV50TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV50TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV50TCL_DEPTH_WRITE_ENABLE 0x000012e8 +#define NV50TCL_ALPHA_TEST_ENABLE 0x000012ec +#define NV50TCL_DEPTH_TEST_FUNC 0x0000130c +#define NV50TCL_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NV50TCL_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NV50TCL_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NV50TCL_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NV50TCL_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50TCL_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NV50TCL_DEPTH_TEST_FUNC_ALWAYS 0x00000207 +#define NV50TCL_ALPHA_TEST_REF 0x00001310 +#define NV50TCL_ALPHA_TEST_FUNC 0x00001314 +#define NV50TCL_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NV50TCL_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NV50TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NV50TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV50TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NV50TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207 +#define NV50TCL_BLEND_COLOR(x) (0x0000131c+((x)*4)) +#define NV50TCL_BLEND_COLOR__SIZE 0x00000004 +#define NV50TCL_BLEND_EQUATION_RGB 0x00001340 +#define NV50TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV50TCL_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV50TCL_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 +#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348 +#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 +#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c +#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NV50TCL_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00000001 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00000300 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00000302 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00000304 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00000306 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00000307 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00000308 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x00008001 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x00008003 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00000001 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00000300 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00000302 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00000304 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00000306 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00000307 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00000308 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x00008001 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x00008003 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4)) +#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008 +#define NV50TCL_STENCIL_BACK_ENABLE 0x00001380 +#define NV50TCL_STENCIL_BACK_OP_FAIL 0x00001384 +#define NV50TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV50TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV50TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV50TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL 0x00001388 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_BACK_OP_ZPASS 0x0000138c +#define NV50TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV50TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC 0x00001390 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 +#define NV50TCL_STENCIL_BACK_FUNC_REF 0x00001394 +#define NV50TCL_STENCIL_BACK_MASK 0x00001398 +#define NV50TCL_STENCIL_BACK_FUNC_MASK 0x0000139c +#define NV50TCL_LINE_WIDTH 0x000013b0 +#define NV50TCL_VP_START_ID 0x0000140c +#define NV50TCL_GP_START_ID 0x00001410 +#define NV50TCL_FP_START_ID 0x00001414 +#define NV50TCL_POINT_SIZE 0x00001518 +#define NV50TCL_TEX_CB0_ADDRESS_HIGH 0x0000155c +#define NV50TCL_TEX_CB0_ADDRESS_LOW 0x00001560 +#define NV50TCL_POLYGON_OFFSET_FACTOR 0x0000156c +#define NV50TCL_LINE_SMOOTH_ENABLE 0x00001570 +#define NV50TCL_TEX_CB1_ADDRESS_HIGH 0x00001574 +#define NV50TCL_TEX_CB1_ADDRESS_LOW 0x00001578 +#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001594 +#define NV50TCL_STENCIL_FRONT_OP_FAIL 0x00001598 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV50TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL 0x0000159c +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS 0x000015a0 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC 0x000015a4 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 +#define NV50TCL_POLYGON_OFFSET_UNITS 0x000015bc +#define NV50TCL_VERTEX_BEGIN 0x000015dc +#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000 +#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001 +#define NV50TCL_VERTEX_BEGIN_LINE_LOOP 0x00000002 +#define NV50TCL_VERTEX_BEGIN_LINE_STRIP 0x00000003 +#define NV50TCL_VERTEX_BEGIN_TRIANGLES 0x00000004 +#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP 0x00000005 +#define NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN 0x00000006 +#define NV50TCL_VERTEX_BEGIN_QUADS 0x00000007 +#define NV50TCL_VERTEX_BEGIN_QUAD_STRIP 0x00000008 +#define NV50TCL_VERTEX_BEGIN_POLYGON 0x00000009 +#define NV50TCL_VERTEX_END 0x000015e0 +#define NV50TCL_VERTEX_DATA 0x00001640 +#define NV50TCL_VP_ATTR_EN_0 0x00001650 +#define NV50TCL_VP_ATTR_EN_0_7_SHIFT 28 +#define NV50TCL_VP_ATTR_EN_0_7_MASK 0xf0000000 +#define NV50TCL_VP_ATTR_EN_0_7_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_7_XNNN 0x10000000 +#define NV50TCL_VP_ATTR_EN_0_7_NYNN 0x20000000 +#define NV50TCL_VP_ATTR_EN_0_7_XYNN 0x30000000 +#define NV50TCL_VP_ATTR_EN_0_7_NNZN 0x40000000 +#define NV50TCL_VP_ATTR_EN_0_7_XNZN 0x50000000 +#define NV50TCL_VP_ATTR_EN_0_7_NYZN 0x60000000 +#define NV50TCL_VP_ATTR_EN_0_7_XYZN 0x70000000 +#define NV50TCL_VP_ATTR_EN_0_7_NNNW 0x80000000 +#define NV50TCL_VP_ATTR_EN_0_7_XNNW 0x90000000 +#define NV50TCL_VP_ATTR_EN_0_7_NYNW 0xa0000000 +#define NV50TCL_VP_ATTR_EN_0_7_XYNW 0xb0000000 +#define NV50TCL_VP_ATTR_EN_0_7_NNZW 0xc0000000 +#define NV50TCL_VP_ATTR_EN_0_7_XNZW 0xd0000000 +#define NV50TCL_VP_ATTR_EN_0_7_NYZW 0xe0000000 +#define NV50TCL_VP_ATTR_EN_0_7_XYZW 0xf0000000 +#define NV50TCL_VP_ATTR_EN_0_6_SHIFT 24 +#define NV50TCL_VP_ATTR_EN_0_6_MASK 0x0f000000 +#define NV50TCL_VP_ATTR_EN_0_6_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_6_XNNN 0x01000000 +#define NV50TCL_VP_ATTR_EN_0_6_NYNN 0x02000000 +#define NV50TCL_VP_ATTR_EN_0_6_XYNN 0x03000000 +#define NV50TCL_VP_ATTR_EN_0_6_NNZN 0x04000000 +#define NV50TCL_VP_ATTR_EN_0_6_XNZN 0x05000000 +#define NV50TCL_VP_ATTR_EN_0_6_NYZN 0x06000000 +#define NV50TCL_VP_ATTR_EN_0_6_XYZN 0x07000000 +#define NV50TCL_VP_ATTR_EN_0_6_NNNW 0x08000000 +#define NV50TCL_VP_ATTR_EN_0_6_XNNW 0x09000000 +#define NV50TCL_VP_ATTR_EN_0_6_NYNW 0x0a000000 +#define NV50TCL_VP_ATTR_EN_0_6_XYNW 0x0b000000 +#define NV50TCL_VP_ATTR_EN_0_6_NNZW 0x0c000000 +#define NV50TCL_VP_ATTR_EN_0_6_XNZW 0x0d000000 +#define NV50TCL_VP_ATTR_EN_0_6_NYZW 0x0e000000 +#define NV50TCL_VP_ATTR_EN_0_6_XYZW 0x0f000000 +#define NV50TCL_VP_ATTR_EN_0_5_SHIFT 20 +#define NV50TCL_VP_ATTR_EN_0_5_MASK 0x00f00000 +#define NV50TCL_VP_ATTR_EN_0_5_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_5_XNNN 0x00100000 +#define NV50TCL_VP_ATTR_EN_0_5_NYNN 0x00200000 +#define NV50TCL_VP_ATTR_EN_0_5_XYNN 0x00300000 +#define NV50TCL_VP_ATTR_EN_0_5_NNZN 0x00400000 +#define NV50TCL_VP_ATTR_EN_0_5_XNZN 0x00500000 +#define NV50TCL_VP_ATTR_EN_0_5_NYZN 0x00600000 +#define NV50TCL_VP_ATTR_EN_0_5_XYZN 0x00700000 +#define NV50TCL_VP_ATTR_EN_0_5_NNNW 0x00800000 +#define NV50TCL_VP_ATTR_EN_0_5_XNNW 0x00900000 +#define NV50TCL_VP_ATTR_EN_0_5_NYNW 0x00a00000 +#define NV50TCL_VP_ATTR_EN_0_5_XYNW 0x00b00000 +#define NV50TCL_VP_ATTR_EN_0_5_NNZW 0x00c00000 +#define NV50TCL_VP_ATTR_EN_0_5_XNZW 0x00d00000 +#define NV50TCL_VP_ATTR_EN_0_5_NYZW 0x00e00000 +#define NV50TCL_VP_ATTR_EN_0_5_XYZW 0x00f00000 +#define NV50TCL_VP_ATTR_EN_0_4_SHIFT 16 +#define NV50TCL_VP_ATTR_EN_0_4_MASK 0x000f0000 +#define NV50TCL_VP_ATTR_EN_0_4_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_4_XNNN 0x00010000 +#define NV50TCL_VP_ATTR_EN_0_4_NYNN 0x00020000 +#define NV50TCL_VP_ATTR_EN_0_4_XYNN 0x00030000 +#define NV50TCL_VP_ATTR_EN_0_4_NNZN 0x00040000 +#define NV50TCL_VP_ATTR_EN_0_4_XNZN 0x00050000 +#define NV50TCL_VP_ATTR_EN_0_4_NYZN 0x00060000 +#define NV50TCL_VP_ATTR_EN_0_4_XYZN 0x00070000 +#define NV50TCL_VP_ATTR_EN_0_4_NNNW 0x00080000 +#define NV50TCL_VP_ATTR_EN_0_4_XNNW 0x00090000 +#define NV50TCL_VP_ATTR_EN_0_4_NYNW 0x000a0000 +#define NV50TCL_VP_ATTR_EN_0_4_XYNW 0x000b0000 +#define NV50TCL_VP_ATTR_EN_0_4_NNZW 0x000c0000 +#define NV50TCL_VP_ATTR_EN_0_4_XNZW 0x000d0000 +#define NV50TCL_VP_ATTR_EN_0_4_NYZW 0x000e0000 +#define NV50TCL_VP_ATTR_EN_0_4_XYZW 0x000f0000 +#define NV50TCL_VP_ATTR_EN_0_3_SHIFT 12 +#define NV50TCL_VP_ATTR_EN_0_3_MASK 0x0000f000 +#define NV50TCL_VP_ATTR_EN_0_3_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_3_XNNN 0x00001000 +#define NV50TCL_VP_ATTR_EN_0_3_NYNN 0x00002000 +#define NV50TCL_VP_ATTR_EN_0_3_XYNN 0x00003000 +#define NV50TCL_VP_ATTR_EN_0_3_NNZN 0x00004000 +#define NV50TCL_VP_ATTR_EN_0_3_XNZN 0x00005000 +#define NV50TCL_VP_ATTR_EN_0_3_NYZN 0x00006000 +#define NV50TCL_VP_ATTR_EN_0_3_XYZN 0x00007000 +#define NV50TCL_VP_ATTR_EN_0_3_NNNW 0x00008000 +#define NV50TCL_VP_ATTR_EN_0_3_XNNW 0x00009000 +#define NV50TCL_VP_ATTR_EN_0_3_NYNW 0x0000a000 +#define NV50TCL_VP_ATTR_EN_0_3_XYNW 0x0000b000 +#define NV50TCL_VP_ATTR_EN_0_3_NNZW 0x0000c000 +#define NV50TCL_VP_ATTR_EN_0_3_XNZW 0x0000d000 +#define NV50TCL_VP_ATTR_EN_0_3_NYZW 0x0000e000 +#define NV50TCL_VP_ATTR_EN_0_3_XYZW 0x0000f000 +#define NV50TCL_VP_ATTR_EN_0_2_SHIFT 8 +#define NV50TCL_VP_ATTR_EN_0_2_MASK 0x00000f00 +#define NV50TCL_VP_ATTR_EN_0_2_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_2_XNNN 0x00000100 +#define NV50TCL_VP_ATTR_EN_0_2_NYNN 0x00000200 +#define NV50TCL_VP_ATTR_EN_0_2_XYNN 0x00000300 +#define NV50TCL_VP_ATTR_EN_0_2_NNZN 0x00000400 +#define NV50TCL_VP_ATTR_EN_0_2_XNZN 0x00000500 +#define NV50TCL_VP_ATTR_EN_0_2_NYZN 0x00000600 +#define NV50TCL_VP_ATTR_EN_0_2_XYZN 0x00000700 +#define NV50TCL_VP_ATTR_EN_0_2_NNNW 0x00000800 +#define NV50TCL_VP_ATTR_EN_0_2_XNNW 0x00000900 +#define NV50TCL_VP_ATTR_EN_0_2_NYNW 0x00000a00 +#define NV50TCL_VP_ATTR_EN_0_2_XYNW 0x00000b00 +#define NV50TCL_VP_ATTR_EN_0_2_NNZW 0x00000c00 +#define NV50TCL_VP_ATTR_EN_0_2_XNZW 0x00000d00 +#define NV50TCL_VP_ATTR_EN_0_2_NYZW 0x00000e00 +#define NV50TCL_VP_ATTR_EN_0_2_XYZW 0x00000f00 +#define NV50TCL_VP_ATTR_EN_0_1_SHIFT 4 +#define NV50TCL_VP_ATTR_EN_0_1_MASK 0x000000f0 +#define NV50TCL_VP_ATTR_EN_0_1_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_1_XNNN 0x00000010 +#define NV50TCL_VP_ATTR_EN_0_1_NYNN 0x00000020 +#define NV50TCL_VP_ATTR_EN_0_1_XYNN 0x00000030 +#define NV50TCL_VP_ATTR_EN_0_1_NNZN 0x00000040 +#define NV50TCL_VP_ATTR_EN_0_1_XNZN 0x00000050 +#define NV50TCL_VP_ATTR_EN_0_1_NYZN 0x00000060 +#define NV50TCL_VP_ATTR_EN_0_1_XYZN 0x00000070 +#define NV50TCL_VP_ATTR_EN_0_1_NNNW 0x00000080 +#define NV50TCL_VP_ATTR_EN_0_1_XNNW 0x00000090 +#define NV50TCL_VP_ATTR_EN_0_1_NYNW 0x000000a0 +#define NV50TCL_VP_ATTR_EN_0_1_XYNW 0x000000b0 +#define NV50TCL_VP_ATTR_EN_0_1_NNZW 0x000000c0 +#define NV50TCL_VP_ATTR_EN_0_1_XNZW 0x000000d0 +#define NV50TCL_VP_ATTR_EN_0_1_NYZW 0x000000e0 +#define NV50TCL_VP_ATTR_EN_0_1_XYZW 0x000000f0 +#define NV50TCL_VP_ATTR_EN_0_0_SHIFT 0 +#define NV50TCL_VP_ATTR_EN_0_0_MASK 0x0000000f +#define NV50TCL_VP_ATTR_EN_0_0_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_0_XNNN 0x00000001 +#define NV50TCL_VP_ATTR_EN_0_0_NYNN 0x00000002 +#define NV50TCL_VP_ATTR_EN_0_0_XYNN 0x00000003 +#define NV50TCL_VP_ATTR_EN_0_0_NNZN 0x00000004 +#define NV50TCL_VP_ATTR_EN_0_0_XNZN 0x00000005 +#define NV50TCL_VP_ATTR_EN_0_0_NYZN 0x00000006 +#define NV50TCL_VP_ATTR_EN_0_0_XYZN 0x00000007 +#define NV50TCL_VP_ATTR_EN_0_0_NNNW 0x00000008 +#define NV50TCL_VP_ATTR_EN_0_0_XNNW 0x00000009 +#define NV50TCL_VP_ATTR_EN_0_0_NYNW 0x0000000a +#define NV50TCL_VP_ATTR_EN_0_0_XYNW 0x0000000b +#define NV50TCL_VP_ATTR_EN_0_0_NNZW 0x0000000c +#define NV50TCL_VP_ATTR_EN_0_0_XNZW 0x0000000d +#define NV50TCL_VP_ATTR_EN_0_0_NYZW 0x0000000e +#define NV50TCL_VP_ATTR_EN_0_0_XYZW 0x0000000f +#define NV50TCL_VP_ATTR_EN_1 0x00001654 +#define NV50TCL_VP_ATTR_EN_1_15_SHIFT 28 +#define NV50TCL_VP_ATTR_EN_1_15_MASK 0xf0000000 +#define NV50TCL_VP_ATTR_EN_1_15_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_15_XNNN 0x10000000 +#define NV50TCL_VP_ATTR_EN_1_15_NYNN 0x20000000 +#define NV50TCL_VP_ATTR_EN_1_15_XYNN 0x30000000 +#define NV50TCL_VP_ATTR_EN_1_15_NNZN 0x40000000 +#define NV50TCL_VP_ATTR_EN_1_15_XNZN 0x50000000 +#define NV50TCL_VP_ATTR_EN_1_15_NYZN 0x60000000 +#define NV50TCL_VP_ATTR_EN_1_15_XYZN 0x70000000 +#define NV50TCL_VP_ATTR_EN_1_15_NNNW 0x80000000 +#define NV50TCL_VP_ATTR_EN_1_15_XNNW 0x90000000 +#define NV50TCL_VP_ATTR_EN_1_15_NYNW 0xa0000000 +#define NV50TCL_VP_ATTR_EN_1_15_XYNW 0xb0000000 +#define NV50TCL_VP_ATTR_EN_1_15_NNZW 0xc0000000 +#define NV50TCL_VP_ATTR_EN_1_15_XNZW 0xd0000000 +#define NV50TCL_VP_ATTR_EN_1_15_NYZW 0xe0000000 +#define NV50TCL_VP_ATTR_EN_1_15_XYZW 0xf0000000 +#define NV50TCL_VP_ATTR_EN_1_14_SHIFT 24 +#define NV50TCL_VP_ATTR_EN_1_14_MASK 0x0f000000 +#define NV50TCL_VP_ATTR_EN_1_14_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_14_XNNN 0x01000000 +#define NV50TCL_VP_ATTR_EN_1_14_NYNN 0x02000000 +#define NV50TCL_VP_ATTR_EN_1_14_XYNN 0x03000000 +#define NV50TCL_VP_ATTR_EN_1_14_NNZN 0x04000000 +#define NV50TCL_VP_ATTR_EN_1_14_XNZN 0x05000000 +#define NV50TCL_VP_ATTR_EN_1_14_NYZN 0x06000000 +#define NV50TCL_VP_ATTR_EN_1_14_XYZN 0x07000000 +#define NV50TCL_VP_ATTR_EN_1_14_NNNW 0x08000000 +#define NV50TCL_VP_ATTR_EN_1_14_XNNW 0x09000000 +#define NV50TCL_VP_ATTR_EN_1_14_NYNW 0x0a000000 +#define NV50TCL_VP_ATTR_EN_1_14_XYNW 0x0b000000 +#define NV50TCL_VP_ATTR_EN_1_14_NNZW 0x0c000000 +#define NV50TCL_VP_ATTR_EN_1_14_XNZW 0x0d000000 +#define NV50TCL_VP_ATTR_EN_1_14_NYZW 0x0e000000 +#define NV50TCL_VP_ATTR_EN_1_14_XYZW 0x0f000000 +#define NV50TCL_VP_ATTR_EN_1_13_SHIFT 20 +#define NV50TCL_VP_ATTR_EN_1_13_MASK 0x00f00000 +#define NV50TCL_VP_ATTR_EN_1_13_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_13_XNNN 0x00100000 +#define NV50TCL_VP_ATTR_EN_1_13_NYNN 0x00200000 +#define NV50TCL_VP_ATTR_EN_1_13_XYNN 0x00300000 +#define NV50TCL_VP_ATTR_EN_1_13_NNZN 0x00400000 +#define NV50TCL_VP_ATTR_EN_1_13_XNZN 0x00500000 +#define NV50TCL_VP_ATTR_EN_1_13_NYZN 0x00600000 +#define NV50TCL_VP_ATTR_EN_1_13_XYZN 0x00700000 +#define NV50TCL_VP_ATTR_EN_1_13_NNNW 0x00800000 +#define NV50TCL_VP_ATTR_EN_1_13_XNNW 0x00900000 +#define NV50TCL_VP_ATTR_EN_1_13_NYNW 0x00a00000 +#define NV50TCL_VP_ATTR_EN_1_13_XYNW 0x00b00000 +#define NV50TCL_VP_ATTR_EN_1_13_NNZW 0x00c00000 +#define NV50TCL_VP_ATTR_EN_1_13_XNZW 0x00d00000 +#define NV50TCL_VP_ATTR_EN_1_13_NYZW 0x00e00000 +#define NV50TCL_VP_ATTR_EN_1_13_XYZW 0x00f00000 +#define NV50TCL_VP_ATTR_EN_1_12_SHIFT 16 +#define NV50TCL_VP_ATTR_EN_1_12_MASK 0x000f0000 +#define NV50TCL_VP_ATTR_EN_1_12_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_12_XNNN 0x00010000 +#define NV50TCL_VP_ATTR_EN_1_12_NYNN 0x00020000 +#define NV50TCL_VP_ATTR_EN_1_12_XYNN 0x00030000 +#define NV50TCL_VP_ATTR_EN_1_12_NNZN 0x00040000 +#define NV50TCL_VP_ATTR_EN_1_12_XNZN 0x00050000 +#define NV50TCL_VP_ATTR_EN_1_12_NYZN 0x00060000 +#define NV50TCL_VP_ATTR_EN_1_12_XYZN 0x00070000 +#define NV50TCL_VP_ATTR_EN_1_12_NNNW 0x00080000 +#define NV50TCL_VP_ATTR_EN_1_12_XNNW 0x00090000 +#define NV50TCL_VP_ATTR_EN_1_12_NYNW 0x000a0000 +#define NV50TCL_VP_ATTR_EN_1_12_XYNW 0x000b0000 +#define NV50TCL_VP_ATTR_EN_1_12_NNZW 0x000c0000 +#define NV50TCL_VP_ATTR_EN_1_12_XNZW 0x000d0000 +#define NV50TCL_VP_ATTR_EN_1_12_NYZW 0x000e0000 +#define NV50TCL_VP_ATTR_EN_1_12_XYZW 0x000f0000 +#define NV50TCL_VP_ATTR_EN_1_11_SHIFT 12 +#define NV50TCL_VP_ATTR_EN_1_11_MASK 0x0000f000 +#define NV50TCL_VP_ATTR_EN_1_11_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_11_XNNN 0x00001000 +#define NV50TCL_VP_ATTR_EN_1_11_NYNN 0x00002000 +#define NV50TCL_VP_ATTR_EN_1_11_XYNN 0x00003000 +#define NV50TCL_VP_ATTR_EN_1_11_NNZN 0x00004000 +#define NV50TCL_VP_ATTR_EN_1_11_XNZN 0x00005000 +#define NV50TCL_VP_ATTR_EN_1_11_NYZN 0x00006000 +#define NV50TCL_VP_ATTR_EN_1_11_XYZN 0x00007000 +#define NV50TCL_VP_ATTR_EN_1_11_NNNW 0x00008000 +#define NV50TCL_VP_ATTR_EN_1_11_XNNW 0x00009000 +#define NV50TCL_VP_ATTR_EN_1_11_NYNW 0x0000a000 +#define NV50TCL_VP_ATTR_EN_1_11_XYNW 0x0000b000 +#define NV50TCL_VP_ATTR_EN_1_11_NNZW 0x0000c000 +#define NV50TCL_VP_ATTR_EN_1_11_XNZW 0x0000d000 +#define NV50TCL_VP_ATTR_EN_1_11_NYZW 0x0000e000 +#define NV50TCL_VP_ATTR_EN_1_11_XYZW 0x0000f000 +#define NV50TCL_VP_ATTR_EN_1_10_SHIFT 8 +#define NV50TCL_VP_ATTR_EN_1_10_MASK 0x00000f00 +#define NV50TCL_VP_ATTR_EN_1_10_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_10_XNNN 0x00000100 +#define NV50TCL_VP_ATTR_EN_1_10_NYNN 0x00000200 +#define NV50TCL_VP_ATTR_EN_1_10_XYNN 0x00000300 +#define NV50TCL_VP_ATTR_EN_1_10_NNZN 0x00000400 +#define NV50TCL_VP_ATTR_EN_1_10_XNZN 0x00000500 +#define NV50TCL_VP_ATTR_EN_1_10_NYZN 0x00000600 +#define NV50TCL_VP_ATTR_EN_1_10_XYZN 0x00000700 +#define NV50TCL_VP_ATTR_EN_1_10_NNNW 0x00000800 +#define NV50TCL_VP_ATTR_EN_1_10_XNNW 0x00000900 +#define NV50TCL_VP_ATTR_EN_1_10_NYNW 0x00000a00 +#define NV50TCL_VP_ATTR_EN_1_10_XYNW 0x00000b00 +#define NV50TCL_VP_ATTR_EN_1_10_NNZW 0x00000c00 +#define NV50TCL_VP_ATTR_EN_1_10_XNZW 0x00000d00 +#define NV50TCL_VP_ATTR_EN_1_10_NYZW 0x00000e00 +#define NV50TCL_VP_ATTR_EN_1_10_XYZW 0x00000f00 +#define NV50TCL_VP_ATTR_EN_1_9_SHIFT 4 +#define NV50TCL_VP_ATTR_EN_1_9_MASK 0x000000f0 +#define NV50TCL_VP_ATTR_EN_1_9_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_9_XNNN 0x00000010 +#define NV50TCL_VP_ATTR_EN_1_9_NYNN 0x00000020 +#define NV50TCL_VP_ATTR_EN_1_9_XYNN 0x00000030 +#define NV50TCL_VP_ATTR_EN_1_9_NNZN 0x00000040 +#define NV50TCL_VP_ATTR_EN_1_9_XNZN 0x00000050 +#define NV50TCL_VP_ATTR_EN_1_9_NYZN 0x00000060 +#define NV50TCL_VP_ATTR_EN_1_9_XYZN 0x00000070 +#define NV50TCL_VP_ATTR_EN_1_9_NNNW 0x00000080 +#define NV50TCL_VP_ATTR_EN_1_9_XNNW 0x00000090 +#define NV50TCL_VP_ATTR_EN_1_9_NYNW 0x000000a0 +#define NV50TCL_VP_ATTR_EN_1_9_XYNW 0x000000b0 +#define NV50TCL_VP_ATTR_EN_1_9_NNZW 0x000000c0 +#define NV50TCL_VP_ATTR_EN_1_9_XNZW 0x000000d0 +#define NV50TCL_VP_ATTR_EN_1_9_NYZW 0x000000e0 +#define NV50TCL_VP_ATTR_EN_1_9_XYZW 0x000000f0 +#define NV50TCL_VP_ATTR_EN_1_8_SHIFT 0 +#define NV50TCL_VP_ATTR_EN_1_8_MASK 0x0000000f +#define NV50TCL_VP_ATTR_EN_1_8_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_8_XNNN 0x00000001 +#define NV50TCL_VP_ATTR_EN_1_8_NYNN 0x00000002 +#define NV50TCL_VP_ATTR_EN_1_8_XYNN 0x00000003 +#define NV50TCL_VP_ATTR_EN_1_8_NNZN 0x00000004 +#define NV50TCL_VP_ATTR_EN_1_8_XNZN 0x00000005 +#define NV50TCL_VP_ATTR_EN_1_8_NYZN 0x00000006 +#define NV50TCL_VP_ATTR_EN_1_8_XYZN 0x00000007 +#define NV50TCL_VP_ATTR_EN_1_8_NNNW 0x00000008 +#define NV50TCL_VP_ATTR_EN_1_8_XNNW 0x00000009 +#define NV50TCL_VP_ATTR_EN_1_8_NYNW 0x0000000a +#define NV50TCL_VP_ATTR_EN_1_8_XYNW 0x0000000b +#define NV50TCL_VP_ATTR_EN_1_8_NNZW 0x0000000c +#define NV50TCL_VP_ATTR_EN_1_8_XNZW 0x0000000d +#define NV50TCL_VP_ATTR_EN_1_8_NYZW 0x0000000e +#define NV50TCL_VP_ATTR_EN_1_8_XYZW 0x0000000f +#define NV50TCL_LINE_STIPPLE_ENABLE 0x0000166c +#define NV50TCL_LINE_STIPPLE_PATTERN 0x00001680 +#define NV50TCL_POLYGON_STIPPLE_ENABLE 0x0000168c +#define NV50TCL_VP_REG_HPOS 0x000016bc +#define NV50TCL_VP_REG_HPOS_X_SHIFT 0 +#define NV50TCL_VP_REG_HPOS_X_MASK 0x000000ff +#define NV50TCL_VP_REG_HPOS_Y_SHIFT 8 +#define NV50TCL_VP_REG_HPOS_Y_MASK 0x0000ff00 +#define NV50TCL_VP_REG_HPOS_Z_SHIFT 16 +#define NV50TCL_VP_REG_HPOS_Z_MASK 0x00ff0000 +#define NV50TCL_VP_REG_HPOS_W_SHIFT 24 +#define NV50TCL_VP_REG_HPOS_W_MASK 0xff000000 +#define NV50TCL_VP_REG_COL0 0x000016c0 +#define NV50TCL_VP_REG_COL0_X_SHIFT 0 +#define NV50TCL_VP_REG_COL0_X_MASK 0x000000ff +#define NV50TCL_VP_REG_COL0_Y_SHIFT 8 +#define NV50TCL_VP_REG_COL0_Y_MASK 0x0000ff00 +#define NV50TCL_VP_REG_COL0_Z_SHIFT 16 +#define NV50TCL_VP_REG_COL0_Z_MASK 0x00ff0000 +#define NV50TCL_VP_REG_COL0_W_SHIFT 24 +#define NV50TCL_VP_REG_COL0_W_MASK 0xff000000 +#define NV50TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001700+((x)*4)) +#define NV50TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 +#define NV50TCL_CULL_FACE_ENABLE 0x00001918 +#define NV50TCL_FRONT_FACE 0x0000191c +#define NV50TCL_FRONT_FACE_CW 0x00000900 +#define NV50TCL_FRONT_FACE_CCW 0x00000901 +#define NV50TCL_CULL_FACE 0x00001920 +#define NV50TCL_CULL_FACE_FRONT 0x00000404 +#define NV50TCL_CULL_FACE_BACK 0x00000405 +#define NV50TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV50TCL_LOGIC_OP_ENABLE 0x000019c4 +#define NV50TCL_LOGIC_OP 0x000019c8 +#define NV50TCL_LOGIC_OP_CLEAR 0x00001500 +#define NV50TCL_LOGIC_OP_AND 0x00001501 +#define NV50TCL_LOGIC_OP_AND_REVERSE 0x00001502 +#define NV50TCL_LOGIC_OP_COPY 0x00001503 +#define NV50TCL_LOGIC_OP_AND_INVERTED 0x00001504 +#define NV50TCL_LOGIC_OP_NOOP 0x00001505 +#define NV50TCL_LOGIC_OP_XOR 0x00001506 +#define NV50TCL_LOGIC_OP_OR 0x00001507 +#define NV50TCL_LOGIC_OP_NOR 0x00001508 +#define NV50TCL_LOGIC_OP_EQUIV 0x00001509 +#define NV50TCL_LOGIC_OP_INVERT 0x0000150a +#define NV50TCL_LOGIC_OP_OR_REVERSE 0x0000150b +#define NV50TCL_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NV50TCL_LOGIC_OP_OR_INVERTED 0x0000150d +#define NV50TCL_LOGIC_OP_NAND 0x0000150e +#define NV50TCL_LOGIC_OP_SET 0x0000150f +#define NV50TCL_CLEAR_BUFFERS 0x000019d0 +#define NV50TCL_COLOR_MASK(x) (0x00001a00+((x)*4)) +#define NV50TCL_COLOR_MASK__SIZE 0x00000008 +#define NV50TCL_COLOR_MASK_R_SHIFT 0 +#define NV50TCL_COLOR_MASK_R_MASK 0x0000000f +#define NV50TCL_COLOR_MASK_G_SHIFT 4 +#define NV50TCL_COLOR_MASK_G_MASK 0x000000f0 +#define NV50TCL_COLOR_MASK_B_SHIFT 8 +#define NV50TCL_COLOR_MASK_B_MASK 0x00000f00 +#define NV50TCL_COLOR_MASK_A_SHIFT 12 +#define NV50TCL_COLOR_MASK_A_MASK 0x0000f000 + + +#define NV50_COMPUTE 0x000050c0 + +#define NV50_COMPUTE_DMA_UNK0 0x000001a0 +#define NV50_COMPUTE_DMA_STATUS 0x000001a4 +#define NV50_COMPUTE_DMA_UNK1 0x000001b8 +#define NV50_COMPUTE_DMA_UNK2 0x000001bc +#define NV50_COMPUTE_DMA_UNK3 0x000001c0 +#define NV50_COMPUTE_UNK4_HIGH 0x00000210 +#define NV50_COMPUTE_UNK4_LOW 0x00000214 +#define NV50_COMPUTE_UNK5_HIGH 0x00000218 +#define NV50_COMPUTE_UNK5_LOW 0x0000021c +#define NV50_COMPUTE_UNK6_HIGH 0x00000294 +#define NV50_COMPUTE_UNK6_LOW 0x00000298 +#define NV50_COMPUTE_CONST_BASE_HIGH 0x000002a4 +#define NV50_COMPUTE_CONST_BASE_LO 0x000002a8 +#define NV50_COMPUTE_CONST_SIZE_SEG 0x000002ac +#define NV50_COMPUTE_REG_COUNT 0x000002c0 +#define NV50_COMPUTE_STATUS_HIGH 0x00000310 +#define NV50_COMPUTE_STATUS_LOW 0x00000314 +#define NV50_COMPUTE_EXECUTE 0x0000031c +#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374 +#define NV50_COMPUTE_GRIDDIM_YX 0x000003a4 +#define NV50_COMPUTE_SHARED_SIZE 0x000003a8 +#define NV50_COMPUTE_BLOCKDIM_YX 0x000003ac +#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0 +#define NV50_COMPUTE_CALL_ADDRESS 0x000003b4 +#define NV50_COMPUTE_GLOBAL_BASE_HIGH(x) (0x00000400+((x)*32)) +#define NV50_COMPUTE_GLOBAL_BASE_HIGH__SIZE 0x00000010 +#define NV50_COMPUTE_GLOBAL_BASE_LOW(x) (0x00000404+((x)*32)) +#define NV50_COMPUTE_GLOBAL_BASE_LOW__SIZE 0x00000010 +#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH(x) (0x00000408+((x)*32)) +#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH__SIZE 0x00000010 +#define NV50_COMPUTE_GLOBAL_LIMIT_LOW(x) (0x0000040c+((x)*32)) +#define NV50_COMPUTE_GLOBAL_LIMIT_LOW__SIZE 0x00000010 +#define NV50_COMPUTE_GLOBAL_UNK(x) (0x00000410+((x)*32)) +#define NV50_COMPUTE_GLOBAL_UNK__SIZE 0x00000010 +#define NV50_COMPUTE_USER_PARAM(x) (0x00000600+((x)*4)) +#define NV50_COMPUTE_USER_PARAM__SIZE 0x00000040 + + +#define NV54TCL 0x00008297 + + + +#endif /* NOUVEAU_REG_H */ diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h new file mode 100644 index 00000000000..e1015c93a27 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h @@ -0,0 +1,196 @@ +#ifndef __NOUVEAU_GLDEFS_H__ +#define __NOUVEAU_GLDEFS_H__ + +static INLINE unsigned +nvgl_blend_func(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return 0x0000; + case PIPE_BLENDFACTOR_ONE: + return 0x0001; + case PIPE_BLENDFACTOR_SRC_COLOR: + return 0x0300; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return 0x0301; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return 0x0302; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return 0x0303; + case PIPE_BLENDFACTOR_DST_ALPHA: + return 0x0304; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return 0x0305; + case PIPE_BLENDFACTOR_DST_COLOR: + return 0x0306; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return 0x0307; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return 0x0308; + case PIPE_BLENDFACTOR_CONST_COLOR: + return 0x8001; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return 0x8002; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return 0x8003; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return 0x8004; + default: + return 0x0000; + } +} + +static INLINE unsigned +nvgl_blend_eqn(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return 0x8006; + case PIPE_BLEND_MIN: + return 0x8007; + case PIPE_BLEND_MAX: + return 0x8008; + case PIPE_BLEND_SUBTRACT: + return 0x800a; + case PIPE_BLEND_REVERSE_SUBTRACT: + return 0x800b; + default: + return 0x8006; + } +} + +static INLINE unsigned +nvgl_logicop_func(unsigned func) +{ + switch (func) { + case PIPE_LOGICOP_CLEAR: + return 0x1500; + case PIPE_LOGICOP_NOR: + return 0x1508; + case PIPE_LOGICOP_AND_INVERTED: + return 0x1504; + case PIPE_LOGICOP_COPY_INVERTED: + return 0x150c; + case PIPE_LOGICOP_AND_REVERSE: + return 0x1502; + case PIPE_LOGICOP_INVERT: + return 0x150a; + case PIPE_LOGICOP_XOR: + return 0x1506; + case PIPE_LOGICOP_NAND: + return 0x150e; + case PIPE_LOGICOP_AND: + return 0x1501; + case PIPE_LOGICOP_EQUIV: + return 0x1509; + case PIPE_LOGICOP_NOOP: + return 0x1505; + case PIPE_LOGICOP_OR_INVERTED: + return 0x150d; + case PIPE_LOGICOP_COPY: + return 0x1503; + case PIPE_LOGICOP_OR_REVERSE: + return 0x150b; + case PIPE_LOGICOP_OR: + return 0x1507; + case PIPE_LOGICOP_SET: + return 0x150f; + default: + return 0x1505; + } +} + +static INLINE unsigned +nvgl_comparison_op(unsigned op) +{ + switch (op) { + case PIPE_FUNC_NEVER: + return 0x0200; + case PIPE_FUNC_LESS: + return 0x0201; + case PIPE_FUNC_EQUAL: + return 0x0202; + case PIPE_FUNC_LEQUAL: + return 0x0203; + case PIPE_FUNC_GREATER: + return 0x0204; + case PIPE_FUNC_NOTEQUAL: + return 0x0205; + case PIPE_FUNC_GEQUAL: + return 0x0206; + case PIPE_FUNC_ALWAYS: + return 0x0207; + default: + return 0x0207; + } +} + +static INLINE unsigned +nvgl_polygon_mode(unsigned mode) +{ + switch (mode) { + case PIPE_POLYGON_MODE_POINT: + return 0x1b00; + case PIPE_POLYGON_MODE_LINE: + return 0x1b01; + case PIPE_POLYGON_MODE_FILL: + return 0x1b02; + default: + return 0x1b02; + } +} + +static INLINE unsigned +nvgl_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_ZERO: + return 0x0000; + case PIPE_STENCIL_OP_INVERT: + return 0x150a; + case PIPE_STENCIL_OP_KEEP: + return 0x1e00; + case PIPE_STENCIL_OP_REPLACE: + return 0x1e01; + case PIPE_STENCIL_OP_INCR: + return 0x1e02; + case PIPE_STENCIL_OP_DECR: + return 0x1e03; + case PIPE_STENCIL_OP_INCR_WRAP: + return 0x8507; + case PIPE_STENCIL_OP_DECR_WRAP: + return 0x8508; + default: + return 0x1e00; + } +} + +static INLINE unsigned +nvgl_primitive(unsigned prim) { + switch (prim) { + case PIPE_PRIM_POINTS: + return 0x0001; + case PIPE_PRIM_LINES: + return 0x0002; + case PIPE_PRIM_LINE_LOOP: + return 0x0003; + case PIPE_PRIM_LINE_STRIP: + return 0x0004; + case PIPE_PRIM_TRIANGLES: + return 0x0005; + case PIPE_PRIM_TRIANGLE_STRIP: + return 0x0006; + case PIPE_PRIM_TRIANGLE_FAN: + return 0x0007; + case PIPE_PRIM_QUADS: + return 0x0008; + case PIPE_PRIM_QUAD_STRIP: + return 0x0009; + case PIPE_PRIM_POLYGON: + return 0x000a; + default: + return 0x0001; + } +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_grobj.h b/src/gallium/drivers/nouveau/nouveau_grobj.h new file mode 100644 index 00000000000..8f5abf90514 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_grobj.h @@ -0,0 +1,35 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_GROBJ_H__ +#define __NOUVEAU_GROBJ_H__ + +#include "nouveau_channel.h" + +struct nouveau_grobj { + struct nouveau_channel *channel; + int grclass; + uint32_t handle; + int subc; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_notifier.h b/src/gallium/drivers/nouveau/nouveau_notifier.h new file mode 100644 index 00000000000..35adde1e324 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_notifier.h @@ -0,0 +1,43 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_NOTIFIER_H__ +#define __NOUVEAU_NOTIFIER_H__ + +#define NV_NOTIFIER_SIZE 32 +#define NV_NOTIFY_TIME_0 0x00000000 +#define NV_NOTIFY_TIME_1 0x00000004 +#define NV_NOTIFY_RETURN_VALUE 0x00000008 +#define NV_NOTIFY_STATE 0x0000000C +#define NV_NOTIFY_STATE_STATUS_MASK 0xFF000000 +#define NV_NOTIFY_STATE_STATUS_SHIFT 24 +#define NV_NOTIFY_STATE_STATUS_COMPLETED 0x00 +#define NV_NOTIFY_STATE_STATUS_IN_PROCESS 0x01 +#define NV_NOTIFY_STATE_ERROR_CODE_MASK 0x0000FFFF +#define NV_NOTIFY_STATE_ERROR_CODE_SHIFT 0 + +struct nouveau_notifier { + struct nouveau_channel *channel; + uint32_t handle; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h new file mode 100644 index 00000000000..c5c5d988d58 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_push.h @@ -0,0 +1,83 @@ +#ifndef __NOUVEAU_PUSH_H__ +#define __NOUVEAU_PUSH_H__ + +#include "nouveau/nouveau_winsys.h" + +#ifndef NOUVEAU_PUSH_CONTEXT +#error undefined push context +#endif + +#define OUT_RING(data) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + (*pc->nvws->channel->pushbuf->cur++) = (data); \ +} while(0) + +#define OUT_RINGp(src,size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + memcpy(pc->nvws->channel->pushbuf->cur, (src), (size) * 4); \ + pc->nvws->channel->pushbuf->cur += (size); \ +} while(0) + +#define OUT_RINGf(data) do { \ + union { float v; uint32_t u; } c; \ + c.v = (data); \ + OUT_RING(c.u); \ +} while(0) + +#define BEGIN_RING(obj,mthd,size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ + pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \ + OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ + pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ +} while(0) + +#define BEGIN_RING_NI(obj,mthd,size) do { \ + BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ +} while(0) + +#define FIRE_RING() do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + pc->nvws->push_flush(pc->nvws->channel, 0); \ +} while(0) + +#define OUT_RELOC(bo,data,flags,vor,tor) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + pc->nvws->push_reloc(pc->nvws->channel, \ + pc->nvws->channel->pushbuf->cur++, \ + (bo), (data), (flags), (vor), (tor)); \ +} while(0) + +/* Raw data + flags depending on FB/TT buffer */ +#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ + OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ +} while(0) + +/* FB/TT object handle */ +#define OUT_RELOCo(bo,flags) do { \ + OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ + pc->nvws->channel->vram->handle, \ + pc->nvws->channel->gart->handle); \ +} while(0) + +/* Low 32-bits of offset */ +#define OUT_RELOCl(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ +} while(0) + +/* High 32-bits of offset */ +#define OUT_RELOCh(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ +} while(0) + +/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ +#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ + pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \ + OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ + (flags), 0, 0); \ + pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ +} while(0) + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_pushbuf.h b/src/gallium/drivers/nouveau/nouveau_pushbuf.h new file mode 100644 index 00000000000..19097650982 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_pushbuf.h @@ -0,0 +1,32 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_PUSHBUF_H__ +#define __NOUVEAU_PUSHBUF_H__ + +struct nouveau_pushbuf { + struct nouveau_channel *channel; + unsigned remaining; + uint32_t *cur; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_resource.h b/src/gallium/drivers/nouveau/nouveau_resource.h new file mode 100644 index 00000000000..1af7961d301 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_resource.h @@ -0,0 +1,37 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_RESOURCE_H__ +#define __NOUVEAU_RESOURCE_H__ + +struct nouveau_resource { + struct nouveau_resource *prev; + struct nouveau_resource *next; + + int in_use; + void *priv; + + unsigned int start; + unsigned int size; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h new file mode 100644 index 00000000000..07c31b014a5 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -0,0 +1,141 @@ +#ifndef __NOUVEAU_STATEOBJ_H__ +#define __NOUVEAU_STATEOBJ_H__ + +#include "pipe/p_util.h" + +struct nouveau_stateobj_reloc { + struct pipe_buffer *bo; + + unsigned offset; + unsigned packet; + + unsigned data; + unsigned flags; + unsigned vor; + unsigned tor; +}; + +struct nouveau_stateobj { + int refcount; + + unsigned *push; + struct nouveau_stateobj_reloc *reloc; + + unsigned *cur; + unsigned cur_packet; + unsigned cur_reloc; +}; + +static INLINE struct nouveau_stateobj * +so_new(unsigned push, unsigned reloc) +{ + struct nouveau_stateobj *so; + + so = MALLOC(sizeof(struct nouveau_stateobj)); + so->refcount = 1; + so->push = MALLOC(sizeof(unsigned) * push); + so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + + so->cur = so->push; + so->cur_reloc = so->cur_packet = 0; + + return so; +} + +static INLINE void +so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) +{ + struct nouveau_stateobj *so; + + so = *pso; + if (so) { + if (--so->refcount <= 0) { + free(so->push); + free(so->reloc); + free(so); + } + *pso = NULL; + } + + if (ref) { + ref->refcount++; + *pso = ref; + } +} + +static INLINE void +so_data(struct nouveau_stateobj *so, unsigned data) +{ + (*so->cur++) = (data); + so->cur_packet += 4; +} + +static INLINE void +so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, + unsigned mthd, unsigned size) +{ + so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); + so_data(so, (gr->subc << 13) | (size << 18) | mthd); +} + +static INLINE void +so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo, + unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ + struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; + + r->bo = bo; + r->offset = so->cur - so->push; + r->packet = so->cur_packet; + r->data = data; + r->flags = flags; + r->vor = vor; + r->tor = tor; + so_data(so, data); +} + +static INLINE void +so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) +{ + struct nouveau_pushbuf *pb = nvws->channel->pushbuf; + unsigned nr, i; + + nr = so->cur - so->push; + if (pb->remaining < nr) + nvws->push_flush(nvws->channel, nr); + pb->remaining -= nr; + + memcpy(pb->cur, so->push, nr * 4); + for (i = 0; i < so->cur_reloc; i++) { + struct nouveau_stateobj_reloc *r = &so->reloc[i]; + + nvws->push_reloc(nvws->channel, pb->cur + r->offset, r->bo, + r->data, r->flags, r->vor, r->tor); + } + pb->cur += nr; +} + +static INLINE void +so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) +{ + struct nouveau_pushbuf *pb = nvws->channel->pushbuf; + unsigned i; + + i = so->cur_reloc << 1; + if (nvws->channel->pushbuf->remaining < i) + nvws->push_flush(nvws->channel, i); + nvws->channel->pushbuf->remaining -= i; + + for (i = 0; i < so->cur_reloc; i++) { + struct nouveau_stateobj_reloc *r = &so->reloc[i]; + + nvws->push_reloc(nvws->channel, pb->cur++, r->bo, r->packet, + (r->flags & + (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART)) | + NOUVEAU_BO_DUMMY, 0, 0); + nvws->push_reloc(nvws->channel, pb->cur++, r->bo, r->data, + r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor); + } +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h new file mode 100644 index 00000000000..b5e470cfaa4 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -0,0 +1,61 @@ +#ifndef NOUVEAU_WINSYS_H +#define NOUVEAU_WINSYS_H + +#include <stdint.h> +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" + +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_class.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_notifier.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" + +struct nouveau_winsys { + struct nouveau_context *nv; + + struct nouveau_channel *channel; + + int (*res_init)(struct nouveau_resource **heap, unsigned start, + unsigned size); + int (*res_alloc)(struct nouveau_resource *heap, int size, void *priv, + struct nouveau_resource **); + void (*res_free)(struct nouveau_resource **); + + int (*push_reloc)(struct nouveau_channel *, void *ptr, + struct pipe_buffer *, uint32_t data, + uint32_t flags, uint32_t vor, uint32_t tor); + int (*push_flush)(struct nouveau_channel *, unsigned size); + + int (*grobj_alloc)(struct nouveau_winsys *, int grclass, + struct nouveau_grobj **); + void (*grobj_free)(struct nouveau_grobj **); + + int (*notifier_alloc)(struct nouveau_winsys *, int count, + struct nouveau_notifier **); + void (*notifier_free)(struct nouveau_notifier **); + void (*notifier_reset)(struct nouveau_notifier *, int id); + uint32_t (*notifier_status)(struct nouveau_notifier *, int id); + uint32_t (*notifier_retval)(struct nouveau_notifier *, int id); + int (*notifier_wait)(struct nouveau_notifier *, int id, + int status, int timeout); + + int (*surface_copy)(struct nouveau_winsys *, struct pipe_surface *, + unsigned, unsigned, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned); + int (*surface_fill)(struct nouveau_winsys *, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned, unsigned); +}; + +extern struct pipe_context * +nv30_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); + +extern struct pipe_context * +nv40_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); + +extern struct pipe_context * +nv50_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); + +#endif diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile new file mode 100644 index 00000000000..b7c252fc986 --- /dev/null +++ b/src/gallium/drivers/nv30/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv30 + +DRIVER_SOURCES = \ + nv30_clear.c \ + nv30_context.c \ + nv30_draw.c \ + nv30_fragprog.c \ + nv30_fragtex.c \ + nv30_miptree.c \ + nv30_query.c \ + nv30_state.c \ + nv30_state_emit.c \ + nv30_surface.c \ + nv30_vbo.c \ + nv30_vertprog.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c new file mode 100644 index 00000000000..71f413588ee --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv30_context.h" + +void +nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c new file mode 100644 index 00000000000..e9afeb80176 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -0,0 +1,431 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "nv30_context.h" + +static const char * +nv30_get_name(struct pipe_context *pipe) +{ + struct nv30_context *nv30 = nv30_context(pipe); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", nv30->chipset); + return buffer; +} + +static const char * +nv30_get_vendor(struct pipe_context *pipe) +{ + return "nouveau"; +} + +static int +nv30_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 2; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv30_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + case PIPE_CAP_BITMAP_TEXCOORD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv30_flush(struct pipe_context *pipe, unsigned flags) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_winsys *nvws = nv30->nvws; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(rankine, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(rankine, 0x1fd8, 1); + OUT_RING (1); + } + + if (flags & PIPE_FLUSH_WAIT) { + nvws->notifier_reset(nv30->sync, 0); + BEGIN_RING(rankine, 0x104, 1); + OUT_RING (0); + BEGIN_RING(rankine, 0x100, 1); + OUT_RING (0); + } + + FIRE_RING(); + + if (flags & PIPE_FLUSH_WAIT) + nvws->notifier_wait(nv30->sync, 0, 0, 2000); +} + +static void +nv30_destroy(struct pipe_context *pipe) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_winsys *nvws = nv30->nvws; + + if (nv30->draw) + draw_destroy(nv30->draw); + + nvws->res_free(&nv30->vertprog.exec_heap); + nvws->res_free(&nv30->vertprog.data_heap); + + nvws->res_free(&nv30->query_heap); + nvws->notifier_free(&nv30->query); + + nvws->notifier_free(&nv30->sync); + + nvws->grobj_free(&nv30->rankine); + + free(nv30); +} + +static boolean +nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) +{ + struct nouveau_winsys *nvws = nv30->nvws; + int ret; + int i; + + ret = nvws->grobj_alloc(nvws, rankine_class, &nv30->rankine); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + BEGIN_RING(rankine, NV34TCL_DMA_NOTIFY, 1); + OUT_RING (nv30->sync->handle); + BEGIN_RING(rankine, NV34TCL_DMA_TEXTURE0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); + BEGIN_RING(rankine, NV34TCL_DMA_COLOR1, 1); + OUT_RING (nvws->channel->vram->handle); + BEGIN_RING(rankine, NV34TCL_DMA_COLOR0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); + BEGIN_RING(rankine, NV34TCL_DMA_VTXBUF0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); +/* BEGIN_RING(rankine, NV34TCL_DMA_FENCE, 2); + OUT_RING (0); + OUT_RING (nv30->query->handle);*/ + BEGIN_RING(rankine, NV34TCL_DMA_IN_MEMORY7, 1); + OUT_RING (nvws->channel->vram->handle); + BEGIN_RING(rankine, NV34TCL_DMA_IN_MEMORY8, 1); + OUT_RING (nvws->channel->vram->handle); + + for (i=1; i<8; i++) { + BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (0); + BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (0); + } + + BEGIN_RING(rankine, 0x220, 1); + OUT_RING (1); + + BEGIN_RING(rankine, 0x03b0, 1); + OUT_RING (0x00100000); + BEGIN_RING(rankine, 0x1454, 1); + OUT_RING (0); + BEGIN_RING(rankine, 0x1d80, 1); + OUT_RING (3); + BEGIN_RING(rankine, 0x1450, 1); + OUT_RING (0x00030004); + + /* NEW */ + BEGIN_RING(rankine, 0x1e98, 1); + OUT_RING (0); + BEGIN_RING(rankine, 0x17e0, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0x3f800000); + BEGIN_RING(rankine, 0x1f80, 16); + OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); + OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); + OUT_RING (0x0000ffff); + OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); + OUT_RING (0); OUT_RING (0); OUT_RING (0); + + BEGIN_RING(rankine, 0x120, 3); + OUT_RING (0); + OUT_RING (1); + OUT_RING (2); + + BEGIN_RING(rankine, 0x1d88, 1); + OUT_RING (0x00001200); + + BEGIN_RING(rankine, NV34TCL_RC_ENABLE, 1); + OUT_RING (0); + + /* Attempt to setup a known state.. Probably missing a heap of + * stuff here.. + */ + BEGIN_RING(rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(rankine, NV34TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(rankine, NV34TCL_DEPTH_WRITE_ENABLE, 2); + OUT_RING (0); /* wr disable */ + OUT_RING (0); /* test disable */ + BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1); + OUT_RING (0x01010101); /* TR,TR,TR,TR */ + BEGIN_RING(rankine, NV34TCL_CULL_FACE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_ENABLE, 5); + OUT_RING (0); /* Blend enable */ + OUT_RING (0); /* Blend src */ + OUT_RING (0); /* Blend dst */ + OUT_RING (0x00000000); /* Blend colour */ + OUT_RING (0x8006); /* FUNC_ADD */ + BEGIN_RING(rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (0); + OUT_RING (0x1503 /*GL_COPY*/); + BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1); + OUT_RING (1); + BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1); + OUT_RING (0x1d01 /*GL_SMOOTH*/); + BEGIN_RING(rankine, NV34TCL_POLYGON_OFFSET_FACTOR,2); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + BEGIN_RING(rankine, NV34TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (0x1b02 /*GL_FILL*/); + OUT_RING (0x1b02 /*GL_FILL*/); + /* - Disable texture units + * - Set fragprog to MOVR result.color, fragment.color */ + for (i=0;i<16;i++) { + BEGIN_RING(rankine, + NV34TCL_TX_ENABLE(i), 1); + OUT_RING (0); + } + /* Polygon stipple */ + BEGIN_RING(rankine, + NV34TCL_POLYGON_STIPPLE_PATTERN(0), 0x20); + for (i=0;i<0x20;i++) + OUT_RING (0xFFFFFFFF); + + int w=4096; + int h=4096; + int pitch=4096*4; + BEGIN_RING(rankine, NV34TCL_RT_HORIZ, 5); + OUT_RING (w<<16); + OUT_RING (h<<16); + OUT_RING (0x148); /* format */ + OUT_RING (pitch << 16 | pitch); + OUT_RING (0x0); + BEGIN_RING(rankine, 0x0a00, 2); + OUT_RING ((w<<16) | 0); + OUT_RING ((h<<16) | 0); + BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING ((w-1)<<16); + OUT_RING ((h-1)<<16); + BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); + OUT_RING (w<<16); + OUT_RING (h<<16); + BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 2); + OUT_RING (w<<16); + OUT_RING (h<<16); + + BEGIN_RING(rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + + BEGIN_RING(rankine, NV34TCL_MODELVIEW_MATRIX(0), 16); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + + BEGIN_RING(rankine, NV34TCL_PROJECTION_MATRIX(0), 16); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + + BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); + OUT_RING (4096<<16); + OUT_RING (4096<<16); + + BEGIN_RING(rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); + OUT_RING (0xffff0000); + + FIRE_RING (); + return TRUE; +} + +#define NV30TCL_CHIPSET_3X_MASK 0x00000003 +#define NV34TCL_CHIPSET_3X_MASK 0x00000010 +#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 + +struct pipe_context * +nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv30_context *nv30; + int rankine_class = 0, ret; + + if ((chipset & 0xf0) != 0x30) { + NOUVEAU_ERR("Not a NV3X chipset\n"); + return NULL; + } + + if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { + rankine_class = 0x0397; + } else if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { + rankine_class = 0x0697; + } else if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { + rankine_class = 0x0497; + } else { + NOUVEAU_ERR("Unknown NV3X chipset: NV%02x\n", chipset); + return NULL; + } + + nv30 = CALLOC_STRUCT(nv30_context); + if (!nv30) + return NULL; + nv30->chipset = chipset; + nv30->nvws = nvws; + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &nv30->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv30_destroy(&nv30->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &nv30->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv30_destroy(&nv30->pipe); + return NULL; + } + + ret = nvws->res_init(&nv30->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv30_destroy(&nv30->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&nv30->vertprog.exec_heap, 0, 512) || + nvws->res_init(&nv30->vertprog.data_heap, 0, 256)) { + nv30_destroy(&nv30->pipe); + return NULL; + } + + /* Static rankine initialisation */ + if (!nv30_init_hwctx(nv30, rankine_class)) { + nv30_destroy(&nv30->pipe); + return NULL; + } + + /* Pipe context setup */ + nv30->pipe.winsys = pipe_winsys; + + nv30->pipe.destroy = nv30_destroy; + nv30->pipe.get_name = nv30_get_name; + nv30->pipe.get_vendor = nv30_get_vendor; + nv30->pipe.get_param = nv30_get_param; + nv30->pipe.get_paramf = nv30_get_paramf; + + nv30->pipe.draw_arrays = nv30_draw_arrays; + nv30->pipe.draw_elements = nv30_draw_elements; + nv30->pipe.clear = nv30_clear; + + nv30->pipe.flush = nv30_flush; + + nv30_init_query_functions(nv30); + nv30_init_surface_functions(nv30); + nv30_init_state_functions(nv30); + nv30_init_miptree_functions(nv30); + + nv30->draw = draw_create(); + assert(nv30->draw); + draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); + + return &nv30->pipe; +} + diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h new file mode 100644 index 00000000000..d6d16ee8686 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -0,0 +1,136 @@ +#ifndef __NV30_CONTEXT_H__ +#define __NV30_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv30_context *ctx = nv30 +#include "nouveau/nouveau_push.h" + +#include "nv30_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV30_NEW_VERTPROG (1 << 1) +#define NV30_NEW_FRAGPROG (1 << 2) +#define NV30_NEW_ARRAYS (1 << 3) + +struct nv30_context { + struct pipe_context pipe; + struct nouveau_winsys *nvws; + + struct draw_context *draw; + + int chipset; + struct nouveau_grobj *rankine; + struct nouveau_notifier *sync; + + /* query objects */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + uint32_t dirty; + + struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_buffer *rt[4]; + struct pipe_buffer *zeta; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[16]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + + struct { + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv30_vertex_program *active; + + struct nv30_vertex_program *current; + struct pipe_buffer *constant_buf; + } vertprog; + + struct { + struct nv30_fragment_program *active; + + struct nv30_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; +}; + +static INLINE struct nv30_context * +nv30_context(struct pipe_context *pipe) +{ + return (struct nv30_context *)pipe; +} + +extern void nv30_init_state_functions(struct nv30_context *nv30); +extern void nv30_init_surface_functions(struct nv30_context *nv30); +extern void nv30_init_miptree_functions(struct nv30_context *nv30); +extern void nv30_init_query_functions(struct nv30_context *nv30); + +/* nv30_draw.c */ +extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); + +/* nv30_vertprog.c */ +extern void nv30_vertprog_translate(struct nv30_context *, + struct nv30_vertex_program *); +extern void nv30_vertprog_bind(struct nv30_context *, + struct nv30_vertex_program *); +extern void nv30_vertprog_destroy(struct nv30_context *, + struct nv30_vertex_program *); + +/* nv30_fragprog.c */ +extern void nv30_fragprog_translate(struct nv30_context *, + struct nv30_fragment_program *); +extern void nv30_fragprog_bind(struct nv30_context *, + struct nv30_fragment_program *); +extern void nv30_fragprog_destroy(struct nv30_context *, + struct nv30_fragment_program *); + +/* nv30_fragtex.c */ +extern void nv30_fragtex_bind(struct nv30_context *); + +/* nv30_state.c and friends */ +extern void nv30_emit_hw_state(struct nv30_context *nv30); +extern void nv30_state_tex_update(struct nv30_context *nv30); + +/* nv30_vbo.c */ +extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv30_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nv30_clear.c */ +extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c new file mode 100644 index 00000000000..59a72657996 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_draw.c @@ -0,0 +1,62 @@ +#include "draw/draw_private.h" +#include "pipe/p_util.h" + +#include "nv30_context.h" + +struct nv30_draw_stage { + struct draw_stage draw; + struct nv30_context *nv30; +}; + +static void +nv30_draw_point(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_line(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_flush(struct draw_stage *draw, unsigned flags) +{ +} + +static void +nv30_draw_reset_stipple_counter(struct draw_stage *draw) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_destroy(struct draw_stage *draw) +{ + free(draw); +} + +struct draw_stage * +nv30_draw_render_stage(struct nv30_context *nv30) +{ + struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage); + + nv30draw->nv30 = nv30; + nv30draw->draw.draw = nv30->draw; + nv30draw->draw.point = nv30_draw_point; + nv30draw->draw.line = nv30_draw_line; + nv30draw->draw.tri = nv30_draw_tri; + nv30draw->draw.flush = nv30_draw_flush; + nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter; + nv30draw->draw.destroy = nv30_draw_destroy; + + return &nv30draw->draw; +} + diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c new file mode 100644 index 00000000000..09ad555c324 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -0,0 +1,835 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" + +#include "nv30_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV30_FP_OP_COND_TR +#include "nv30_shader.h" + +#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv30_sr_neg((s)) +#define abs(s) nv30_sr_abs((s)) +#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv30_fpc { + struct nv30_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + int high_temp; + int temp_temp_count; + int num_regs; + + uint depth_id; + uint colour_id; + + unsigned inst_offset; + + struct { + int pipe; + float vals[4]; + } consts[MAX_CONSTS]; + int nr_consts; + + struct nv30_sreg imm[MAX_IMM]; + unsigned nr_imm; +}; + +static INLINE struct nv30_sreg +temp(struct nv30_fpc *fpc) +{ + int idx; + + idx = fpc->temp_temp_count++; + idx += fpc->high_temp + 1; + return nv30_sr(NV30SR_TEMP, idx); +} + +static INLINE struct nv30_sreg +constant(struct nv30_fpc *fpc, int pipe, float vals[4]) +{ + int idx; + + if (fpc->nr_consts == MAX_CONSTS) + assert(0); + idx = fpc->nr_consts++; + + fpc->consts[idx].pipe = pipe; + if (pipe == -1) + memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); + return nv30_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +static void +grow_insns(struct nv30_fpc *fpc, int size) +{ + struct nv30_fragment_program *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.type) { + case NV30SR_INPUT: + sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); + break; + case NV30SR_OUTPUT: + sr |= NV30_FP_REG_SRC_HALF; + /* fall-through */ + case NV30SR_TEMP: + sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); + sr |= (src.index << NV30_FP_REG_SRC_SHIFT); + break; + case NV30SR_CONST: + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + if (fpc->consts[src.index].pipe >= 0) { + struct nv30_fragment_program_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = fpc->consts[src.index].pipe; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } else { + memcpy(&fp->insn[fpc->inst_offset + 4], + fpc->consts[src.index].vals, + sizeof(uint32_t) * 4); + } + + sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); + break; + case NV30SR_NONE: + sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV30_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NV30SR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NV30SR_OUTPUT: + if (dst.index == 1) { + fp->fp_control |= 0xe; + } else { + hw[0] |= NV30_FP_OP_OUT_REG_HALF; + } + break; + case NV30SR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); +} + +static void +nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (op == NV30_FP_OP_OPCODE_KIL) + fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; + hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NV30_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, dst); + emit_src(fpc, 0, s0); + emit_src(fpc, 1, s1); + emit_src(fpc, 2, s2); +} + +static void +nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +{ + struct nv30_fragment_program *fp = fpc->fp; + + nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + + fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << unit); +} + +static INLINE struct nv30_sreg +tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nv30_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv30_sr(NV30SR_INPUT, + fpc->attrib_map[fsrc->SrcRegister.Index]); + break; + case TGSI_FILE_CONSTANT: + src = constant(fpc, fsrc->SrcRegister.Index, NULL); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->SrcRegister.Index < fpc->nr_imm); + src = fpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); + if (fpc->high_temp < src.index) + fpc->high_temp = src.index; + break; + /* This is clearly insane, but gallium hands us shaders like this. + * Luckily fragprog results are just temp regs.. + */ + case TGSI_FILE_OUTPUT: + if (fsrc->SrcRegister.Index == fpc->colour_id) + return nv30_sr(NV30SR_OUTPUT, 0); + else + return nv30_sr(NV30SR_OUTPUT, 1); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv30_sreg +tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + int idx; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + if (fdst->DstRegister.Index == fpc->colour_id) + return nv30_sr(NV30SR_OUTPUT, 0); + else + return nv30_sr(NV30SR_OUTPUT, 1); + break; + case TGSI_FILE_TEMPORARY: + idx = fdst->DstRegister.Index + 1; + if (fpc->high_temp < idx) + fpc->high_temp = idx; + return nv30_sr(NV30SR_TEMP, idx); + case TGSI_FILE_NULL: + return nv30_sr(NV30SR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + return nv30_sr(NV30SR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, + struct nv30_sreg *src) +{ + const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= (1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= (1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(fpc); + + if (mask) + arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(fpc, 0, STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv30_sreg one = temp(fpc); + arith(fpc, 0, STR, one, neg_mask, one, none, none); + arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + struct nv30_sreg src[3], dst, tmp; + int mask, sat, unit; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + fpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(fpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + NOUVEAU_MSG("extra src attr %d\n", + fsrc->SrcRegister.Index); + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->SrcRegister.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = temp(fpc); + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV30_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KIL: + arith(fpc, 0, KIL, none, 0, none, none, none); + break; + case TGSI_OPCODE_KILP: + dst = nv30_sr(NV30SR_NONE, 0); + dst.cc_update = 1; + arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RET: + assert(0); + break; + case TGSI_OPCODE_RFL: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + break; + case TGSI_OPCODE_RSQ: + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + break; + case TGSI_OPCODE_SCS: + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide == + TGSI_EXTSWIZZLE_W) { + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + } else + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV30_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV30_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. + SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->u.DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + fpc->depth_id = fdec->u.DeclarationRange.First; + break; + case TGSI_SEMANTIC_COLOR: + fpc->colour_id = fdec->u.DeclarationRange.First; + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + return TRUE; +} + +void +nv30_fragprog_translate(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv30_fpc *fpc = NULL; + + fpc = CALLOC(1, sizeof(struct nv30_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->high_temp = -1; + fpc->num_regs = 2; + + tgsi_parse_init(&parse, fp->pipe->tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nv30_fragprog_parse_decl_output(fpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + float vals[4]; + int i; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < MAX_IMM); + + for (i = 0; i < 4; i++) + vals[i] = imm->u.ImmediateFloat32[i].Float; + fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv30_fragprog_parse_instruction(fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + fp->fp_control |= (fpc->num_regs-1)/2; + fp->fp_reg_control = (1<<16)|0x4; + + /* Terminate final instruction */ + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction, may or may not be necessary. */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + fp->translated = TRUE; + fp->on_hw = FALSE; +out_err: + tgsi_parse_free(&parse); + free(fpc); +} + +void +nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) +{ + struct pipe_winsys *ws = nv30->pipe.winsys; + int i; + + if (!fp->translated) { + nv30_fragprog_translate(nv30, fp); + if (!fp->translated) + assert(0); + } + + if (fp->nr_consts) { + float *map = ws->buffer_map(ws, nv30->fragprog.constant_buf, + PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nv30_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + fp->on_hw = 0; + } + ws->buffer_unmap(ws, nv30->fragprog.constant_buf); + } + + if (!fp->on_hw) { + const uint32_t le = 1; + uint32_t *map; + + if (!fp->buffer) + fp->buffer = ws->buffer_create(ws, 0x100, 0, + fp->insn_len * 4); + map = ws->buffer_map(ws, fp->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); + fp->on_hw = TRUE; + } + + BEGIN_RING(rankine, NV34TCL_FP_CONTROL, 1); + OUT_RING (fp->fp_control); + BEGIN_RING(rankine, NV34TCL_FP_REG_CONTROL, 1); + OUT_RING (fp->fp_reg_control); + + nv30->fragprog.active = fp; +} + +void +nv30_fragprog_destroy(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + if (fp->insn_len) + free(fp->insn); +} + diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c new file mode 100644 index 00000000000..45ee6db8d6a --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -0,0 +1,160 @@ +#include "nv30_context.h" + +static INLINE int log2i(int i) +{ + int r = 0; + + if (i & 0xffff0000) { + i >>= 16; + r += 16; + } + if (i & 0x0000ff00) { + i >>= 8; + r += 8; + } + if (i & 0x000000f0) { + i >>= 4; + r += 4; + } + if (i & 0x0000000c) { + i >>= 2; + r += 2; + } + if (i & 0x00000002) { + r += 1; + } + return r; +} + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV34TCL_TX_FORMAT_FORMAT_##tf, \ + (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ + NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ + NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ + NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \ +} + +struct nv30_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; +}; + +static struct nv30_texture_format +nv30_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), +// _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), + _(U_L8 , L8 , S1, S1, S1, ONE, X, X, X, X), + _(U_A8 , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), + _(U_I8 , L8 , S1, S1, S1, S1, X, X, X, X), + _(U_A8_L8 , A8L8 , S1, S1, S1, S1, X, X, X, Y), +// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), +// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), +// _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT1 , 0x86, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT3 , 0x87, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT5 , 0x88, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), + {}, +}; + +static struct nv30_texture_format * +nv30_fragtex_format(uint pipe_format) +{ + struct nv30_texture_format *tf = nv30_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + return NULL; +} + + +static void +nv30_fragtex_build(struct nv30_context *nv30, int unit) +{ + struct nv30_sampler_state *ps = nv30->tex_sampler[unit]; + struct nv30_miptree *nv30mt = nv30->tex_miptree[unit]; + struct pipe_texture *pt = &nv30mt->base; + struct nv30_texture_format *tf; + uint32_t txf, txs, txp; + int swizzled = 0; /*XXX: implement in region code? */ + + tf = nv30_fragtex_format(pt->format); + if (!tf || !tf->defined) { + NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); + return; + } + + txf = tf->format << 8; + txf |= (pt->last_level + 1) << 16; + txf |= log2i(pt->width[0]) << 20; + txf |= log2i(pt->height[0]) << 24; + txf |= log2i(pt->depth[0]) << 28; + txf |= 8; + + switch (pt->target) { +/* case PIPE_TEXTURE_CUBE: + txf |= NV34TCL_TEX_FORMAT_CUBIC;*/ + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= (2<<4); + break; + case PIPE_TEXTURE_3D: + txf |= (3<<4); + break; + case PIPE_TEXTURE_1D: + txf |= (1<<4); + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + txs = tf->swizzle; + + BEGIN_RING(rankine, NV34TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(nv30mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv30mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (ps->wrap); + OUT_RING (0x40000000); /* enable */ + OUT_RING (txs); + OUT_RING (ps->filt | 0x2000 /* magic */); + OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING (ps->bcol); +} + +void +nv30_fragtex_bind(struct nv30_context *nv30) +{ + struct nv30_fragment_program *fp = nv30->fragprog.active; + unsigned samplers, unit; + + samplers = nv30->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + BEGIN_RING(rankine, NV34TCL_TX_ENABLE(unit), 1); + OUT_RING (0); + } + + samplers = nv30->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + nv30_fragtex_build(nv30, unit); + } + + nv30->fp_samplers = fp->samplers; +} + diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c new file mode 100644 index 00000000000..5fb89f4cfdc --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -0,0 +1,105 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" + +#include "nv30_context.h" + +static void +nv30_miptree_layout(struct nv30_miptree *nv30mt) +{ + struct pipe_texture *pt = &nv30mt->base; + boolean swizzled = FALSE; + uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint offset = 0; + int nr_faces, l, f; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else + if (pt->target == PIPE_TEXTURE_3D) { + nr_faces = pt->depth[0]; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + + if (swizzled) + nv30mt->level[l].pitch = pt->width[l] * pt->cpp; + else + nv30mt->level[l].pitch = pt->width[0] * pt->cpp; + nv30mt->level[l].pitch = (nv30mt->level[l].pitch + 63) & ~63; + + nv30mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv30mt->level[l].image_offset[f] = offset; + offset += nv30mt->level[l].pitch * pt->height[l]; + } + } + + nv30mt->total_size = offset; +} + +static void +nv30_miptree_create(struct pipe_context *pipe, struct pipe_texture **pt) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv30_miptree *nv30mt; + + nv30mt = realloc(*pt, sizeof(struct nv30_miptree)); + if (!nv30mt) + return; + *pt = NULL; + + nv30_miptree_layout(nv30mt); + + nv30mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + nv30mt->total_size); + if (!nv30mt->buffer) { + free(nv30mt); + return; + } + + *pt = &nv30mt->base; +} + +static void +nv30_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + struct pipe_winsys *ws = pipe->winsys; + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv30_miptree *nv30mt = (struct nv30_miptree *)mt; + int l; + + pipe_buffer_reference(ws, &nv30mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv30mt->level[l].image_offset) + free(nv30mt->level[l].image_offset); + } + free(nv30mt); + } +} + +void +nv30_init_miptree_functions(struct nv30_context *nv30) +{ + nv30->pipe.texture_create = nv30_miptree_create; + nv30->pipe.texture_release = nv30_miptree_release; +} + diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c new file mode 100644 index 00000000000..71fdcfa24df --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -0,0 +1,113 @@ +#include "pipe/p_context.h" +#include "pipe/p_util.h" + +#include "nv30_context.h" + +struct nv30_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static inline struct nv30_query * +nv30_query(struct pipe_query *pipe) +{ + return (struct nv30_query *)pipe; +} + +static struct pipe_query * +nv30_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nv30_query *q; + + q = CALLOC(1, sizeof(struct nv30_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + if (q->object) + nv30->nvws->res_free(&q->object); + free(q); +} + +static void +nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (nv30->nvws->res_alloc(nv30->query_heap, 1, NULL, &q->object)) + assert(0); + nv30->nvws->notifier_reset(nv30->query, q->object->start); + + BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); + OUT_RING (1); + BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (1); + + q->ready = FALSE; +} + +static void +nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); + OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(); +} + +static boolean +nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64 *result) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + struct nouveau_winsys *nvws = nv30->nvws; + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nvws->notifier_status(nv30->query, q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + nvws->notifier_wait(nv30->query, q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, + 0); + } + + q->result = nvws->notifier_retval(nv30->query, + q->object->start); + q->ready = TRUE; + nvws->res_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nv30_init_query_functions(struct nv30_context *nv30) +{ + nv30->pipe.create_query = nv30_query_create; + nv30->pipe.destroy_query = nv30_query_destroy; + nv30->pipe.begin_query = nv30_query_begin; + nv30->pipe.end_query = nv30_query_end; + nv30->pipe.get_query_result = nv30_query_result; +} diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h new file mode 100644 index 00000000000..dd3a36f78f3 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_shader.h @@ -0,0 +1,490 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + * ABS - MOV + NV40_VP_INST0_DEST_ABS + * POW - EX2 + MUL + LG2 + * SUB - ADD, second source negated + * SWZ - MOV + * XPD - + * + * Register access + * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + * - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + * According to the value returned for + * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * + * there are only two address registers available. The destination in the + * ARL instruction is set to TEMP <n> (The temp isn't actually written). + * + * When using vanilla ARB_v_p, the proprietary driver will squish both the + * available ADDRESS regs into the first hardware reg in the X and Y + * components. + * + * To use an address reg as an index into consts, the CONST_SRC is set to + * (const_base + offset) and INDEX_CONST is set. + * + * To access the second address reg use ADDR_REG_SELECT_1. A particular + * component of the address regs is selected with ADDR_SWZ. + * + * Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) Conditional + * execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with + * COND_{FL,LT,...}. It is possible to swizzle the values in the condition + * register, which allows for testing against an individual component. + * + * Branching: + * + * The BRA/CAL instructions seem to follow a slightly different opcode + * layout. The destination instruction ID (IADDR) overlaps a source field. + * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO + * command, and is incremented automatically on each UPLOAD_INST FIFO + * command. + * + * Conditional branching is achieved by using the condition tests described + * above. There doesn't appear to be dedicated looping instructions, but + * this can be done using a temp reg + conditional branching. + * + * Subroutines may be uploaded before the main program itself, but the first + * executed instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ + +#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ +#define NV30_VP_INST_VEC_RESULT (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) +#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) +#define NV30_VP_INST_COND_SHIFT 11 +#define NV30_VP_INST_COND_MASK (0x07 << 11) +# define NV30_VP_INST_COND_FL 0 /* guess */ +# define NV30_VP_INST_COND_LT 1 +# define NV30_VP_INST_COND_EQ 2 +# define NV30_VP_INST_COND_LE 3 +# define NV30_VP_INST_COND_GT 4 +# define NV30_VP_INST_COND_NE 5 +# define NV30_VP_INST_COND_GE 6 +# define NV30_VP_INST_COND_TR 7 /* guess */ +#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 +#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 +#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 +#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 +#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 +#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 +#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) +# define NV30_VP_INST_OP_NOP 0x00 +# define NV30_VP_INST_OP_RCP 0x02 +# define NV30_VP_INST_OP_RCC 0x03 +# define NV30_VP_INST_OP_RSQ 0x04 +# define NV30_VP_INST_OP_EXP 0x05 +# define NV30_VP_INST_OP_LOG 0x06 +# define NV30_VP_INST_OP_LIT 0x07 +# define NV30_VP_INST_OP_BRA 0x09 +# define NV30_VP_INST_OP_CAL 0x0B +# define NV30_VP_INST_OP_RET 0x0C +# define NV30_VP_INST_OP_LG2 0x0D +# define NV30_VP_INST_OP_EX2 0x0E +# define NV30_VP_INST_OP_SIN 0x0F +# define NV30_VP_INST_OP_COS 0x10 +#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 +#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) +# define NV30_VP_INST_OP_NOPV 0x00 +# define NV30_VP_INST_OP_MOV 0x01 +# define NV30_VP_INST_OP_MUL 0x02 +# define NV30_VP_INST_OP_ADD 0x03 +# define NV30_VP_INST_OP_MAD 0x04 +# define NV30_VP_INST_OP_DP3 0x05 +# define NV30_VP_INST_OP_DP4 0x07 +# define NV30_VP_INST_OP_DPH 0x06 +# define NV30_VP_INST_OP_DST 0x08 +# define NV30_VP_INST_OP_MIN 0x09 +# define NV30_VP_INST_OP_MAX 0x0A +# define NV30_VP_INST_OP_SLT 0x0B +# define NV30_VP_INST_OP_SGE 0x0C +# define NV30_VP_INST_OP_ARL 0x0D +# define NV30_VP_INST_OP_FRC 0x0E +# define NV30_VP_INST_OP_FLR 0x0F +# define NV30_VP_INST_OP_SEQ 0x10 +# define NV30_VP_INST_OP_SFL 0x11 +# define NV30_VP_INST_OP_SGT 0x12 +# define NV30_VP_INST_OP_SLE 0x13 +# define NV30_VP_INST_OP_SNE 0x14 +# define NV30_VP_INST_OP_STR 0x15 +# define NV30_VP_INST_OP_SSG 0x16 +# define NV30_VP_INST_OP_ARR 0x17 +# define NV30_VP_INST_OP_ARA 0x18 +#define NV30_VP_INST_CONST_SRC_SHIFT 14 +#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ +# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ +# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ +# define NV30_VP_INST_IN_NORMAL 2 +# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ +# define NV30_VP_INST_IN_COL1 4 +# define NV30_VP_INST_IN_FOGC 5 +# define NV30_VP_INST_IN_TC0 8 +# define NV30_VP_INST_IN_TC(n) (8+n) +#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ + +/* Please note: the IADDR fields overlap other fields because they are used + * only for branch instructions. See Branching: label above + * + * DWORD 2 + */ +#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ +#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ +#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ +#define NV30_VP_INST_IADDR_SHIFT 2 +#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ +#define NV30_VP_INST_DEST_SHIFT 2 +#define NV30_VP_INST_DEST_MASK (0x0F << 2) +# define NV30_VP_INST_DEST_POS 0 +# define NV30_VP_INST_DEST_BFC0 1 +# define NV30_VP_INST_DEST_BFC1 2 +# define NV30_VP_INST_DEST_COL0 3 +# define NV30_VP_INST_DEST_COL1 4 +# define NV30_VP_INST_DEST_FOGC 5 +# define NV30_VP_INST_DEST_PSZ 6 +# define NV30_VP_INST_DEST_TC(n) (8+n) + +#define NV30_VP_INST_LAST (1 << 0) + +/* Useful to split the source selection regs into their pieces */ +#define NV30_VP_SRC0_HIGH_SHIFT 6 +#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 +#define NV30_VP_SRC0_LOW_MASK 0x0000003F +#define NV30_VP_SRC2_HIGH_SHIFT 4 +#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 +#define NV30_VP_SRC2_LOW_MASK 0x0000000F + + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_NEGATE (1<<14) +#define NV30_VP_SRC_SWZ_X_SHIFT 12 +#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV30_VP_SRC_SWZ_Y_SHIFT 10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV30_VP_SRC_SWZ_Z_SHIFT 8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV30_VP_SRC_SWZ_W_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT 0 +#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP 1 +#define NV30_VP_SRC_REG_TYPE_INPUT 2 +#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + */ + +//== Opcode / Destination selection == +#define NV30_FP_OP_PROGRAM_END (1 << 0) +#define NV30_FP_OP_OUT_REG_SHIFT 1 +#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ +/* Needs to be set when writing outputs to get expected result.. */ +#define NV30_FP_OP_OUT_REG_HALF (1 << 7) +#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV30_FP_OP_OUTMASK_SHIFT 9 +#define NV30_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV30_FP_OP_OUT_X (1<<9) +# define NV30_FP_OP_OUT_Y (1<<10) +# define NV30_FP_OP_OUT_Z (1<<11) +# define NV30_FP_OP_OUT_W (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV30_FP_OP_INPUT_SRC_SHIFT 13 +#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV30_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV30_FP_OP_INPUT_SRC_COL0 0x1 +# define NV30_FP_OP_INPUT_SRC_COL1 0x2 +# define NV30_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV30_FP_OP_INPUT_SRC_TC0 0x4 +# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +#define NV30_FP_OP_TEX_UNIT_SHIFT 17 +#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ +#define NV30_FP_OP_PRECISION_SHIFT 22 +#define NV30_FP_OP_PRECISION_MASK (3 << 22) +# define NV30_FP_PRECISION_FP32 0 +# define NV30_FP_PRECISION_FP16 1 +# define NV30_FP_PRECISION_FX12 2 +#define NV30_FP_OP_OPCODE_SHIFT 24 +#define NV30_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV30_FP_OP_OPCODE_NOP 0x00 +# define NV30_FP_OP_OPCODE_MOV 0x01 +# define NV30_FP_OP_OPCODE_MUL 0x02 +# define NV30_FP_OP_OPCODE_ADD 0x03 +# define NV30_FP_OP_OPCODE_MAD 0x04 +# define NV30_FP_OP_OPCODE_DP3 0x05 +# define NV30_FP_OP_OPCODE_DP4 0x06 +# define NV30_FP_OP_OPCODE_DST 0x07 +# define NV30_FP_OP_OPCODE_MIN 0x08 +# define NV30_FP_OP_OPCODE_MAX 0x09 +# define NV30_FP_OP_OPCODE_SLT 0x0A +# define NV30_FP_OP_OPCODE_SGE 0x0B +# define NV30_FP_OP_OPCODE_SLE 0x0C +# define NV30_FP_OP_OPCODE_SGT 0x0D +# define NV30_FP_OP_OPCODE_SNE 0x0E +# define NV30_FP_OP_OPCODE_SEQ 0x0F +# define NV30_FP_OP_OPCODE_FRC 0x10 +# define NV30_FP_OP_OPCODE_FLR 0x11 +# define NV30_FP_OP_OPCODE_KIL 0x12 +# define NV30_FP_OP_OPCODE_PK4B 0x13 +# define NV30_FP_OP_OPCODE_UP4B 0x14 +# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +# define NV30_FP_OP_OPCODE_TEX 0x17 +# define NV30_FP_OP_OPCODE_TXP 0x18 +# define NV30_FP_OP_OPCODE_TXD 0x19 +# define NV30_FP_OP_OPCODE_RCP 0x1A +# define NV30_FP_OP_OPCODE_RSQ 0x1B +# define NV30_FP_OP_OPCODE_EX2 0x1C +# define NV30_FP_OP_OPCODE_LG2 0x1D +# define NV30_FP_OP_OPCODE_LIT 0x1E +# define NV30_FP_OP_OPCODE_LRP 0x1F +# define NV30_FP_OP_OPCODE_STR 0x20 +# define NV30_FP_OP_OPCODE_SFL 0x21 +# define NV30_FP_OP_OPCODE_COS 0x22 +# define NV30_FP_OP_OPCODE_SIN 0x23 +# define NV30_FP_OP_OPCODE_PK2H 0x24 +# define NV30_FP_OP_OPCODE_UP2H 0x25 +# define NV30_FP_OP_OPCODE_POW 0x26 +# define NV30_FP_OP_OPCODE_PK4UB 0x27 +# define NV30_FP_OP_OPCODE_UP4UB 0x28 +# define NV30_FP_OP_OPCODE_PK2US 0x29 +# define NV30_FP_OP_OPCODE_UP2US 0x2A +# define NV30_FP_OP_OPCODE_DP2A 0x2E +# define NV30_FP_OP_OPCODE_TXB 0x31 +# define NV30_FP_OP_OPCODE_RFL 0x36 +# define NV30_FP_OP_OPCODE_DIV 0x3A +#define NV30_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV30_FP_OP_OUT_ABS (1 << 29) +#define NV30_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV30_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV30_FP_OP_COND_SHIFT 18 +#define NV30_FP_OP_COND_MASK (0x07 << 18) +# define NV30_FP_OP_COND_FL 0 +# define NV30_FP_OP_COND_LT 1 +# define NV30_FP_OP_COND_EQ 2 +# define NV30_FP_OP_COND_LE 3 +# define NV30_FP_OP_COND_GT 4 +# define NV30_FP_OP_COND_NE 5 +# define NV30_FP_OP_COND_GE 6 +# define NV30_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV30_FP_OP_DST_SCALE_SHIFT 28 +#define NV30_FP_OP_DST_SCALE_MASK (3 << 28) +#define NV30_FP_OP_DST_SCALE_1X 0 +#define NV30_FP_OP_DST_SCALE_2X 1 +#define NV30_FP_OP_DST_SCALE_4X 2 +#define NV30_FP_OP_DST_SCALE_8X 3 +#define NV30_FP_OP_DST_SCALE_INV_2X 5 +#define NV30_FP_OP_DST_SCALE_INV_4X 6 +#define NV30_FP_OP_DST_SCALE_INV_8X 7 + + +/* high order bits of SRC2 */ +#define NV30_FP_OP_INDEX_INPUT (1 << 30) + +//== Register selection == +#define NV30_FP_REG_TYPE_SHIFT 0 +#define NV30_FP_REG_TYPE_MASK (3 << 0) +# define NV30_FP_REG_TYPE_TEMP 0 +# define NV30_FP_REG_TYPE_INPUT 1 +# define NV30_FP_REG_TYPE_CONST 2 +#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */ +#define NV30_FP_REG_SRC_MASK (31 << 2) +#define NV30_FP_REG_SRC_HALF (1 << 8) +#define NV30_FP_REG_SWZ_ALL_SHIFT 9 +#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV30_FP_REG_SWZ_X_SHIFT 9 +#define NV30_FP_REG_SWZ_X_MASK (3 << 9) +#define NV30_FP_REG_SWZ_Y_SHIFT 11 +#define NV30_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV30_FP_REG_SWZ_Z_SHIFT 13 +#define NV30_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV30_FP_REG_SWZ_W_SHIFT 15 +#define NV30_FP_REG_SWZ_W_MASK (3 << 15) +# define NV30_FP_SWIZZLE_X 0 +# define NV30_FP_SWIZZLE_Y 1 +# define NV30_FP_SWIZZLE_Z 2 +# define NV30_FP_SWIZZLE_W 3 +#define NV30_FP_REG_NEGATE (1 << 17) + +#define NV30SR_NONE 0 +#define NV30SR_OUTPUT 1 +#define NV30SR_INPUT 2 +#define NV30SR_TEMP 3 +#define NV30SR_CONST 4 + +struct nv30_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nv30_sreg +nv30_sr(int type, int index) +{ + struct nv30_sreg temp = { + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nv30_sreg +nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) +{ + struct nv30_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nv30_sreg +nv30_sr_neg(struct nv30_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nv30_sreg +nv30_sr_abs(struct nv30_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nv30_sreg +nv30_sr_scale(struct nv30_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} + +#endif diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c new file mode 100644 index 00000000000..53368561e07 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -0,0 +1,739 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +static void * +nv30_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv30_blend_state *cb; + + cb = malloc(sizeof(struct nv30_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + cb->b_eqn = ((nvgl_blend_eqn(cso->alpha_func) << 16) | + (nvgl_blend_eqn(cso->rgb_func))); + + cb->l_enable = cso->logicop_enable ? 1 : 0; + cb->l_op = nvgl_logicop_func(cso->logicop_func); + + cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + + cb->d_enable = cso->dither ? 1 : 0; + + return (void *)cb; +} + +static void +nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_blend_state *cb = hwcso; + + BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1); + OUT_RING (cb->d_enable); + + BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (cb->b_enable); + OUT_RING (cb->b_srcfunc); + OUT_RING (cb->b_dstfunc); + BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_EQUATION, 1); + OUT_RING (cb->b_eqn); + + BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1); + OUT_RING (cb->c_mask); + + BEGIN_RING(rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (cb->l_enable); + OUT_RING (cb->l_op); +} + +static void +nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV34TCL_TX_WRAP_S_CLAMP; + break; +/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; + break;*/ + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + } + + return ret >> NV34TCL_TX_WRAP_S_SHIFT; +} + +static void * +nv30_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv30_sampler_state *ps; + uint32_t filter = 0; + + ps = malloc(sizeof(struct nv30_sampler_state)); + + ps->fmt = 0; + if (!cso->normalized_coords) + ps->fmt |= NV34TCL_TX_FORMAT_RECT; + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | + (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy >= 2.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + +/* if (cso->max_anisotropy >= 16.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else { + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; + }*/ + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ps->filt = filter; + +/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + }*/ + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv30_sampler_state_bind(struct pipe_context *pipe, unsigned unit, + void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_sampler_state *ps = hwcso; + + nv30->tex_sampler[unit] = ps; + nv30->dirty_samplers |= (1 << unit); +} + +static void +nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void +nv30_set_sampler_texture(struct pipe_context *pipe, unsigned unit, + struct pipe_texture *miptree) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->tex_miptree[unit] = (struct nv30_miptree *)miptree; + nv30->dirty_samplers |= (1 << unit); +} + +static void * +nv30_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv30_rasterizer_state *rs; + int i; + + /*XXX: ignored: + * light_twoside + * offset_cw/ccw -nohw + * scissor + * point_smooth -nohw + * multisample + * offset_units / offset_scale + */ + rs = malloc(sizeof(struct nv30_rasterizer_state)); + + rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; + + rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; + rs->line_smooth_en = cso->line_smooth ? 1 : 0; + rs->line_stipple_en = cso->line_stipple_enable ? 1 : 0; + rs->line_stipple = (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor; + + rs->point_size = *(uint32_t*)&cso->point_size; + + rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + rs->poly_stipple_en = cso->poly_stipple_enable ? 1 : 0; + + if (cso->front_winding == PIPE_WINDING_CCW) { + rs->front_face = NV34TCL_FRONT_FACE_CCW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); + } else { + rs->front_face = NV34TCL_FRONT_FACE_CW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); + } + + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CCW) + rs->cull_face = NV34TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV34TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_CW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CW) + rs->cull_face = NV34TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV34TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_BOTH: + rs->cull_face_en = 1; + rs->cull_face = NV34TCL_CULL_FACE_FRONT_AND_BACK; + break; + case PIPE_WINDING_NONE: + default: + rs->cull_face_en = 0; + rs->cull_face = 0; + break; + } + + if (cso->point_sprite) { + rs->point_sprite = (1 << 0); + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + rs->point_sprite |= (1 << (8 + i)); + } + } else { + rs->point_sprite = 0; + } + + return (void *)rs; +} + +static void +nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_rasterizer_state *rs = hwcso; + + BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1); + OUT_RING (rs->shade_model); + + BEGIN_RING(rankine, NV34TCL_LINE_WIDTH, 2); + OUT_RING (rs->line_width); + OUT_RING (rs->line_smooth_en); + BEGIN_RING(rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); + OUT_RING (rs->line_stipple_en); + OUT_RING (rs->line_stipple); + + BEGIN_RING(rankine, NV34TCL_POINT_SIZE, 1); + OUT_RING (rs->point_size); + + BEGIN_RING(rankine, NV34TCL_POLYGON_MODE_FRONT, 6); + OUT_RING (rs->poly_mode_front); + OUT_RING (rs->poly_mode_back); + OUT_RING (rs->cull_face); + OUT_RING (rs->front_face); + OUT_RING (rs->poly_smooth_en); + OUT_RING (rs->cull_face_en); + + BEGIN_RING(rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (rs->poly_stipple_en); + + BEGIN_RING(rankine, NV34TCL_POINT_SPRITE, 1); + OUT_RING (rs->point_sprite); +} + +static void +nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void +nv30_translate_stencil(const struct pipe_depth_stencil_alpha_state *cso, + unsigned idx, struct nv30_stencil_push *hw) +{ + hw->enable = cso->stencil[idx].enabled ? 1 : 0; + hw->wmask = cso->stencil[idx].write_mask; + hw->func = nvgl_comparison_op(cso->stencil[idx].func); + hw->ref = cso->stencil[idx].ref_value; + hw->vmask = cso->stencil[idx].value_mask; + hw->fail = nvgl_stencil_op(cso->stencil[idx].fail_op); + hw->zfail = nvgl_stencil_op(cso->stencil[idx].zfail_op); + hw->zpass = nvgl_stencil_op(cso->stencil[idx].zpass_op); +} + +static void * +nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv30_depth_stencil_alpha_state *hw; + + hw = malloc(sizeof(struct nv30_depth_stencil_alpha_state)); + + hw->depth.func = nvgl_comparison_op(cso->depth.func); + hw->depth.write_enable = cso->depth.writemask ? 1 : 0; + hw->depth.test_enable = cso->depth.enabled ? 1 : 0; + + nv30_translate_stencil(cso, 0, &hw->stencil.front); + nv30_translate_stencil(cso, 1, &hw->stencil.back); + + hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; + hw->alpha.func = nvgl_comparison_op(cso->alpha.func); + hw->alpha.ref = float_to_ubyte(cso->alpha.ref); + + return (void *)hw; +} + +static void +nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_depth_stencil_alpha_state *hw = hwcso; + + BEGIN_RING(rankine, NV34TCL_DEPTH_FUNC, 3); + OUT_RINGp ((uint32_t *)&hw->depth, 3); + BEGIN_RING(rankine, NV34TCL_STENCIL_BACK_ENABLE, 16); + OUT_RINGp ((uint32_t *)&hw->stencil.back, 8); + OUT_RINGp ((uint32_t *)&hw->stencil.front, 8); + BEGIN_RING(rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); + OUT_RINGp ((uint32_t *)&hw->alpha.enabled, 3); +} + +static void +nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv30_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv30_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nv30_vertex_program)); + vp->pipe = cso; + + return (void *)vp; +} + +static void +nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_vertex_program *vp = hwcso; + + nv30->vertprog.current = vp; + nv30->dirty |= NV30_NEW_VERTPROG; +} + +static void +nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_vertex_program *vp = hwcso; + + nv30_vertprog_destroy(nv30, vp); + free(vp); +} + +static void * +nv30_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv30_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv30_fragment_program)); + fp->pipe = cso; + + return (void *)fp; +} + +static void +nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_fragment_program *fp = hwcso; + + nv30->fragprog.current = fp; + nv30->dirty |= NV30_NEW_FRAGPROG; +} + +static void +nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_fragment_program *fp = hwcso; + + nv30_fragprog_destroy(nv30, fp); + free(fp); +} + +static void +nv30_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_COLOR, 1); + OUT_RING ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0)); +} + +static void +nv30_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv30->vertprog.constant_buf = buf->buffer; + nv30->dirty |= NV30_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv30->fragprog.constant_buf = buf->buffer; + nv30->dirty |= NV30_NEW_FRAGPROG; + } +} + +static void +nv30_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct pipe_surface *rt[4], *zeta; + uint32_t rt_enable, rt_format, w, h; + int i, colour_format = 0, zeta_format = 0; + + rt_enable = 0; + for (i = 0; i < 4; i++) { + if (!fb->cbufs[i]) + continue; + + if (colour_format) { + assert(w == fb->cbufs[i]->width); + assert(h == fb->cbufs[i]->height); + assert(colour_format == fb->cbufs[i]->format); + } else { + w = fb->cbufs[i]->width; + h = fb->cbufs[i]->height; + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV34TCL_RT_ENABLE_COLOR2 | + NV34TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV34TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { + BEGIN_RING(rankine, NV34TCL_COLOR0_PITCH, 1); + OUT_RING ( (rt[0]->pitch * rt[0]->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); + nv30->rt[0] = rt[0]->buffer; + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + BEGIN_RING(rankine, NV34TCL_COLOR1_PITCH, 2); + OUT_RING (rt[1]->pitch * rt[1]->cpp); + nv30->rt[1] = rt[1]->buffer; + } + + if (zeta_format) + { + nv30->zeta = zeta->buffer; + } + + nv30->rt_enable = rt_enable; + BEGIN_RING(rankine, NV34TCL_RT_ENABLE, 1); + OUT_RING (rt_enable); + BEGIN_RING(rankine, NV34TCL_RT_HORIZ, 3); + OUT_RING ((w << 16) | 0); + OUT_RING ((h << 16) | 0); + OUT_RING (rt_format); + BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 2); + OUT_RING ((w << 16) | 0); + OUT_RING ((h << 16) | 0); + BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (((w - 1) << 16) | 0); + OUT_RING (((h - 1) << 16) | 0); +} + +static void +nv30_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + BEGIN_RING(rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); + OUT_RINGp ((uint32_t *)stipple->stipple, 32); +} + +static void +nv30_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny); +} + +static void +nv30_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + BEGIN_RING(rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); + OUT_RINGf (vpt->translate[0]); + OUT_RINGf (vpt->translate[1]); + OUT_RINGf (vpt->translate[2]); + OUT_RINGf (vpt->translate[3]); + OUT_RINGf (vpt->scale[0]); + OUT_RINGf (vpt->scale[1]); + OUT_RINGf (vpt->scale[2]); + OUT_RINGf (vpt->scale[3]); +} + +static void +nv30_set_vertex_buffer(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_buffer *vb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->vtxbuf[index] = *vb; + + nv30->dirty |= NV30_NEW_ARRAYS; +} + +static void +nv30_set_vertex_element(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_element *ve) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->vtxelt[index] = *ve; + + nv30->dirty |= NV30_NEW_ARRAYS; +} + +void +nv30_init_state_functions(struct nv30_context *nv30) +{ + nv30->pipe.create_blend_state = nv30_blend_state_create; + nv30->pipe.bind_blend_state = nv30_blend_state_bind; + nv30->pipe.delete_blend_state = nv30_blend_state_delete; + + nv30->pipe.create_sampler_state = nv30_sampler_state_create; + nv30->pipe.bind_sampler_state = nv30_sampler_state_bind; + nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; + nv30->pipe.set_sampler_texture = nv30_set_sampler_texture; + + nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; + nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; + nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete; + + nv30->pipe.create_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_create; + nv30->pipe.bind_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_bind; + nv30->pipe.delete_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_delete; + + nv30->pipe.create_vs_state = nv30_vp_state_create; + nv30->pipe.bind_vs_state = nv30_vp_state_bind; + nv30->pipe.delete_vs_state = nv30_vp_state_delete; + + nv30->pipe.create_fs_state = nv30_fp_state_create; + nv30->pipe.bind_fs_state = nv30_fp_state_bind; + nv30->pipe.delete_fs_state = nv30_fp_state_delete; + + nv30->pipe.set_blend_color = nv30_set_blend_color; + nv30->pipe.set_clip_state = nv30_set_clip_state; + nv30->pipe.set_constant_buffer = nv30_set_constant_buffer; + nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state; + nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple; + nv30->pipe.set_scissor_state = nv30_set_scissor_state; + nv30->pipe.set_viewport_state = nv30_set_viewport_state; + + nv30->pipe.set_vertex_buffer = nv30_set_vertex_buffer; + nv30->pipe.set_vertex_element = nv30_set_vertex_element; +} + diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h new file mode 100644 index 00000000000..233600f69ab --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -0,0 +1,147 @@ +#ifndef __NV30_STATE_H__ +#define __NV30_STATE_H__ + +#include "pipe/p_state.h" + +struct nv30_blend_state { + uint32_t b_enable; + uint32_t b_srcfunc; + uint32_t b_dstfunc; + uint32_t b_eqn; + + uint32_t l_enable; + uint32_t l_op; + + uint32_t c_mask; + + uint32_t d_enable; +}; + +struct nv30_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv30_rasterizer_state { + uint32_t shade_model; + + uint32_t line_width; + uint32_t line_smooth_en; + uint32_t line_stipple_en; + uint32_t line_stipple; + + uint32_t point_size; + + uint32_t poly_smooth_en; + uint32_t poly_stipple_en; + + uint32_t poly_mode_front; + uint32_t poly_mode_back; + + uint32_t front_face; + uint32_t cull_face; + uint32_t cull_face_en; + + uint32_t point_sprite; +}; + +struct nv30_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv30_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv30_vertex_program { + const struct pipe_shader_state *pipe; + + boolean translated; + struct nv30_vertex_program_exec *insns; + unsigned nr_insns; + struct nv30_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; +}; + +struct nv30_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv30_fragment_program { + const struct pipe_shader_state *pipe; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv30_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + +struct nv30_stencil_push { + uint32_t enable; + uint32_t wmask; + uint32_t func; + uint32_t ref; + uint32_t vmask; + uint32_t fail; + uint32_t zfail; + uint32_t zpass; +}; + +struct nv30_depth_stencil_alpha_state { + struct { + uint32_t func; + uint32_t write_enable; + uint32_t test_enable; + } depth; + + struct { + struct nv30_stencil_push back; + struct nv30_stencil_push front; + } stencil; + + struct { + uint32_t enabled; + uint32_t func; + uint32_t ref; + } alpha; +}; + +struct nv30_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c new file mode 100644 index 00000000000..70b98836f0f --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -0,0 +1,83 @@ +#include "nv30_context.h" +#include "nv30_state.h" + +void +nv30_emit_hw_state(struct nv30_context *nv30) +{ + int i; + + if (nv30->dirty & NV30_NEW_FRAGPROG) { + nv30_fragprog_bind(nv30, nv30->fragprog.current); + /*XXX: clear NV30_NEW_FRAGPROG if no new program uploaded */ + } + + if (nv30->dirty_samplers || (nv30->dirty & NV30_NEW_FRAGPROG)) { + nv30_fragtex_bind(nv30); +/* + BEGIN_RING(rankine, NV34TCL_TX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(rankine, NV34TCL_TX_CACHE_CTL, 1); + OUT_RING (1);*/ + nv30->dirty &= ~NV30_NEW_FRAGPROG; + } + + if (nv30->dirty & NV30_NEW_VERTPROG) { + nv30_vertprog_bind(nv30, nv30->vertprog.current); + nv30->dirty &= ~NV30_NEW_VERTPROG; + } + + nv30->dirty_samplers = 0; + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv30_vbo.c + */ + + /* Render targets */ + if (nv30->rt_enable & NV34TCL_RT_ENABLE_COLOR0) { + BEGIN_RING(rankine, NV34TCL_DMA_COLOR0, 1); + OUT_RELOCo(nv30->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(rankine, NV34TCL_COLOR0_OFFSET, 1); + OUT_RELOCl(nv30->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } + + if (nv30->rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + BEGIN_RING(rankine, NV34TCL_DMA_COLOR1, 1); + OUT_RELOCo(nv30->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(rankine, NV34TCL_COLOR1_OFFSET, 1); + OUT_RELOCl(nv30->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } + + if (nv30->zeta) { + BEGIN_RING(rankine, NV34TCL_DMA_ZETA, 1); + OUT_RELOCo(nv30->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(rankine, NV34TCL_ZETA_OFFSET, 1); + OUT_RELOCl(nv30->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + /* XXX allocate LMA */ +/* BEGIN_RING(rankine, NV34TCL_LMA_DEPTH_OFFSET, 1); + OUT_RING(0);*/ + } + + /* Texture images */ + for (i = 0; i < 16; i++) { + if (!(nv30->fp_samplers & (1 << i))) + continue; + BEGIN_RING(rankine, NV34TCL_TX_OFFSET(i), 2); + OUT_RELOCl(nv30->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv30->tex[i].buffer, nv30->tex[i].format, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | + NOUVEAU_BO_OR, NV34TCL_TX_FORMAT_DMA0, + NV34TCL_TX_FORMAT_DMA1); + } + + /* Fragment program */ + BEGIN_RING(rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); + OUT_RELOC (nv30->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, + NV34TCL_FP_ACTIVE_PROGRAM_DMA1); +} + diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c new file mode 100644 index 00000000000..974965679f5 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -0,0 +1,137 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv30_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/p_tile.h" + +static boolean +nv30_surface_format_supported(struct pipe_context *pipe, + enum pipe_format format, uint type) +{ + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + } + break; + default: + assert(0); + }; + + return FALSE; +} + +static struct pipe_surface * +nv30_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = nv30mt->level[level].pitch / ps->cpp; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv30mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv30mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv30mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv30_surface_copy(struct pipe_context *pipe, unsigned do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_winsys *nvws = nv30->nvws; + + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); +} + +static void +nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_winsys *nvws = nv30->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv30_init_surface_functions(struct nv30_context *nv30) +{ + nv30->pipe.is_format_supported = nv30_surface_format_supported; + nv30->pipe.get_tex_surface = nv30_get_tex_surface; + nv30->pipe.surface_copy = nv30_surface_copy; + nv30->pipe.surface_fill = nv30_surface_fill; +} diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c new file mode 100644 index 00000000000..9e00cdac3fb --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -0,0 +1,425 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +static INLINE int +nv30_vbo_ncomp(uint format) +{ + int ncomp = 0; + + if (pf_size_x(format)) ncomp++; + if (pf_size_y(format)) ncomp++; + if (pf_size_z(format)) ncomp++; + if (pf_size_w(format)) ncomp++; + + return ncomp; +} + +static INLINE int +nv30_vbo_type(uint format) +{ + switch (pf_type(format)) { + case PIPE_FORMAT_TYPE_FLOAT: + return NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; + case PIPE_FORMAT_TYPE_UNORM: + return NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE; + default: + NOUVEAU_ERR("Unknown format 0x%08x\n", format); + return NV40TCL_VTXFMT_TYPE_FLOAT; + } +} + +static boolean +nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib, + struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb) +{ + struct pipe_winsys *ws = nv30->pipe.winsys; + int type, ncomp; + void *map; + + type = nv30_vbo_type(ve->src_format); + ncomp = nv30_vbo_ncomp(ve->src_format); + + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map += vb->buffer_offset + ve->src_offset; + + switch (type) { + case NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT: + { + float *v = map; + + BEGIN_RING(rankine, NV34TCL_VERTEX_ATTR_4F_X(attrib), 4); + switch (ncomp) { + case 4: + OUT_RINGf(v[0]); + OUT_RINGf(v[1]); + OUT_RINGf(v[2]); + OUT_RINGf(v[3]); + break; + case 3: + OUT_RINGf(v[0]); + OUT_RINGf(v[1]); + OUT_RINGf(v[2]); + OUT_RINGf(1.0); + break; + case 2: + OUT_RINGf(v[0]); + OUT_RINGf(v[1]); + OUT_RINGf(0.0); + OUT_RINGf(1.0); + break; + case 1: + OUT_RINGf(v[0]); + OUT_RINGf(0.0); + OUT_RINGf(0.0); + OUT_RINGf(1.0); + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + } + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + + ws->buffer_unmap(ws, vb->buffer); + + return TRUE; +} + +static void +nv30_vbo_arrays_update(struct nv30_context *nv30) +{ + struct nv30_vertex_program *vp = nv30->vertprog.active; + uint32_t inputs, vtxfmt[16]; + int hw, num_hw; + + nv30->vb_enable = 0; + + inputs = vp->ir; + for (hw = 0; hw < 16 && inputs; hw++) { + if (inputs & (1 << hw)) { + num_hw = hw; + inputs &= ~(1 << hw); + } + } + num_hw++; + + inputs = vp->ir; + for (hw = 0; hw < num_hw; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + + if (!(inputs & (1 << hw))) { + vtxfmt[hw] = NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; + continue; + } + + ve = &nv30->vtxelt[hw]; + vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + + if (vb->pitch == 0) { + vtxfmt[hw] = NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; + if (nv30_vbo_static_attrib(nv30, hw, ve, vb) == TRUE) + continue; + } + + nv30->vb_enable |= (1 << hw); + nv30->vb[hw].delta = vb->buffer_offset + ve->src_offset; + nv30->vb[hw].buffer = vb->buffer; + + vtxfmt[hw] = ((vb->pitch << NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT) | + (nv30_vbo_ncomp(ve->src_format) << + NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT) | + nv30_vbo_type(ve->src_format)); + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_ARRAY_FORMAT(0), num_hw); + OUT_RINGp (vtxfmt, num_hw); +} + +static boolean +nv30_vbo_validate_state(struct nv30_context *nv30, + struct pipe_buffer *ib, unsigned ib_format) +{ + unsigned inputs; + + nv30_emit_hw_state(nv30); + + if (nv30->dirty & NV30_NEW_ARRAYS) { + nv30_vbo_arrays_update(nv30); + nv30->dirty &= ~NV30_NEW_ARRAYS; + } + + inputs = nv30->vb_enable; + while (inputs) { + unsigned a = ffs(inputs) - 1; + + inputs &= ~(1 << a); + + BEGIN_RING(rankine, NV34TCL_VERTEX_BUFFER_ADDRESS(a), 1); + OUT_RELOC (nv30->vb[a].buffer, nv30->vb[a].delta, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0, + NV34TCL_VERTEX_BUFFER_ADDRESS_DMA1); + } + + if (ib) { + BEGIN_RING(rankine, NV40TCL_IDXBUF_ADDRESS, 2); + OUT_RELOCl(ib, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD); + OUT_RELOCd(ib, ib_format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_OR, + 0, NV40TCL_IDXBUF_FORMAT_DMA1); + } + + BEGIN_RING(rankine, 0x1710, 1); + OUT_RING (0); /* vtx cache flush */ + + return TRUE; +} + +boolean +nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, + unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned nr; + boolean ret; + + ret = nv30_vbo_validate_state(nv30, NULL, 0); + if (!ret) { + NOUVEAU_ERR("state validate failed\n"); + return FALSE; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (count & 0xff); + if (nr) { + BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = count >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + pipe->flush(pipe, 0); + return TRUE; +} + +static INLINE void +nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, + unsigned start, unsigned count) +{ + uint8_t *elts = (uint8_t *)ib + start; + int push, i; + + if (count & 1) { + BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; count--; + } + + while (count) { + push = MIN2(count, 2047 * 2); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + count -= push; + elts += push; + } +} + +static INLINE void +nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, + unsigned start, unsigned count) +{ + uint16_t *elts = (uint16_t *)ib + start; + int push, i; + + if (count & 1) { + BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; count--; + } + + while (count) { + push = MIN2(count, 2047 * 2); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + count -= push; + elts += push; + } +} + +static INLINE void +nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, + unsigned start, unsigned count) +{ + uint32_t *elts = (uint32_t *)ib + start; + int push; + + while (count) { + push = MIN2(count, 2047); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); + + count -= push; + elts += push; + } +} + +static boolean +nv30_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + boolean ret; + void *map; + + ret = nv30_vbo_validate_state(nv30, NULL, 0); + if (!ret) { + NOUVEAU_ERR("state validate failed\n"); + return FALSE; + } + + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return FALSE; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + switch (ib_size) { + case 1: + nv30_draw_elements_u08(nv30, map, start, count); + break; + case 2: + nv30_draw_elements_u16(nv30, map, start, count); + break; + case 4: + nv30_draw_elements_u32(nv30, map, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + ws->buffer_unmap(ws, ib); + + return TRUE; +} + +static boolean +nv30_draw_elements_vbo(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned nr, type; + boolean ret; + + switch (ib_size) { + case 2: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + return FALSE; + } + + ret = nv30_vbo_validate_state(nv30, ib, type); + if (!ret) { + NOUVEAU_ERR("failed state validation\n"); + return FALSE; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (count & 0xff); + if (nr) { + BEGIN_RING(rankine, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = count >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(rankine, NV40TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + return TRUE; +} + +boolean +nv30_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ +/* if (indexSize != 1) { + nv30_draw_elements_vbo(pipe, indexBuffer, indexSize, + mode, start, count); + } else */{ + nv30_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count); + } + + pipe->flush(pipe, 0); + return TRUE; +} + + diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c new file mode 100644 index 00000000000..4a8269d5dd7 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -0,0 +1,777 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 2. Arb. swz/negation + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv30_shader.h" + +#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv30_sr_neg((s)) +#define abs(s) nv30_sr_abs((s)) + +struct nv30_vpc { + struct nv30_vertex_program *vp; + + struct nv30_vertex_program_exec *vpi; + + unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; + + int high_temp; + int temp_temp_count; + + struct nv30_sreg *imm; + unsigned nr_imm; +}; + +static struct nv30_sreg +temp(struct nv30_vpc *vpc) +{ + int idx; + + idx = vpc->temp_temp_count++; + idx += vpc->high_temp + 1; + return nv30_sr(NV30SR_TEMP, idx); +} + +static struct nv30_sreg +constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv30_vertex_program *vp = vpc->vp; + struct nv30_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv30_sr(NV30SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv30_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) +{ + struct nv30_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV30SR_TEMP: + sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV30SR_INPUT: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); + break; + case NV30SR_CONST: + sr |= (NV30_VP_SRC_REG_TYPE_CONST << + NV30_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV30SR_NONE: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV30_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); + +/* + * |VVV| + * d�.�b + * \u/ + * + */ + + switch (pos) { + case 0: + hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> + NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << + NV30_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> + NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << + NV30_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) +{ + struct nv30_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV30SR_TEMP: + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + break; + case NV30SR_OUTPUT: + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + default: + break; + } + + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + break; + default: + assert(0); + } +} + +static void +nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, + struct nv30_sreg s2) +{ + struct nv30_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); + + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; +// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv30_sreg +tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv30_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + if (vpc->high_temp < fsrc->SrcRegister.Index) + vpc->high_temp = fsrc->SrcRegister.Index; + src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv30_sreg +tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv30_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = nv30_sr(NV30SR_OUTPUT, + vpc->output_map[fdst->DstRegister.Index]); + + break; + case TGSI_FILE_TEMPORARY: + dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index); + if (vpc->high_temp < dst.index) + vpc->high_temp = dst.index; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv30_sreg src[3], dst, tmp; + struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + int mask; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + vpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + /*XXX: index comparison is broken now that consts come from + * two different register files. + */ + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV30_VP_INST_DEST_POS; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV30_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV30_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->output_map[fdec->u.DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv30_vertprog_prepare(struct nv30_vpc *vpc) +{ + struct tgsi_parse_context p; + int nr_imm = 0; + + tgsi_parse_init(&p, vpc->vp->pipe->tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); + assert(vpc->imm); + } + + return TRUE; +} + +void +nv30_vertprog_translate(struct nv30_context *nv30, + struct nv30_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv30_vpc *vpc = NULL; + + vpc = CALLOC(1, sizeof(struct nv30_vpc)); + if (!vpc) + return; + vpc->vp = vp; + vpc->high_temp = -1; + + if (!nv30_vertprog_prepare(vpc)) { + free(vpc); + return; + } + + tgsi_parse_init(&parse, vp->pipe->tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_OUTPUT: + if (!nv30_vertprog_parse_decl_output(vpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +// assert(imm->Immediate.Size == 4); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv30_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + free(vpc); +} + +void +nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv30->nvws; + struct pipe_winsys *ws = nv30->pipe.winsys; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + /* Translate TGSI shader into hw bytecode */ + if (!vp->translated) { + nv30_vertprog_translate(nv30, vp); + if (!vp->translated) + assert(0); + } + + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv30->vertprog.exec_heap; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv30_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv30->vertprog.data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv30_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv30_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv30_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV30_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (nv30->vertprog.constant_buf) { + map = ws->buffer_map(ws, nv30->vertprog.constant_buf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv30_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (map) { + ws->buffer_unmap(ws, nv30->vertprog.constant_buf); + } + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); + } +#endif + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (/*vp->exec->start*/0); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + BEGIN_RING(rankine, NV34TCL_VP_START_FROM_ID, 1); +// OUT_RING (vp->exec->start); + OUT_RING (0); + + nv30->vertprog.active = vp; +} + +void +nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) +{ + if (vp->nr_consts) + free(vp->consts); + if (vp->nr_insns) + free(vp->insns); +} + diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile new file mode 100644 index 00000000000..2a9de4a2dcb --- /dev/null +++ b/src/gallium/drivers/nv40/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv40 + +DRIVER_SOURCES = \ + nv40_clear.c \ + nv40_context.c \ + nv40_draw.c \ + nv40_fragprog.c \ + nv40_fragtex.c \ + nv40_miptree.c \ + nv40_query.c \ + nv40_state.c \ + nv40_state_emit.c \ + nv40_surface.c \ + nv40_vbo.c \ + nv40_vertprog.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c new file mode 100644 index 00000000000..2c4e8f01fda --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" + +void +nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c new file mode 100644 index 00000000000..8b5cc693de0 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -0,0 +1,312 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" + +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static const char * +nv40_get_name(struct pipe_context *pipe) +{ + struct nv40_context *nv40 = nv40_context(pipe); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", nv40->chipset); + return buffer; +} + +static const char * +nv40_get_vendor(struct pipe_context *pipe) +{ + return "nouveau"; +} + +static int +nv40_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv40_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + case PIPE_CAP_BITMAP_TEXCOORD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv40_flush(struct pipe_context *pipe, unsigned flags) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_winsys *nvws = nv40->nvws; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (1); + } + + if (flags & PIPE_FLUSH_WAIT) { + nvws->notifier_reset(nv40->hw->sync, 0); + BEGIN_RING(curie, 0x104, 1); + OUT_RING (0); + BEGIN_RING(curie, 0x100, 1); + OUT_RING (0); + } + + FIRE_RING(); + + if (flags & PIPE_FLUSH_WAIT) + nvws->notifier_wait(nv40->hw->sync, 0, 0, 2000); +} + +static void +nv40_channel_takedown(struct nv40_channel_context *cnv40) +{ + struct nouveau_winsys *nvws = cnv40->nvws; + + nvws->res_free(&cnv40->vp_exec_heap); + nvws->res_free(&cnv40->vp_data_heap); + nvws->res_free(&cnv40->query_heap); + nvws->notifier_free(&cnv40->query); + nvws->notifier_free(&cnv40->sync); + nvws->grobj_free(&cnv40->curie); + free(cnv40); +} + +static struct nv40_channel_context * +nv40_channel_init(struct pipe_winsys *ws, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv40_channel_context *cnv40 = NULL; + struct nouveau_stateobj *so; + unsigned curie_class = 0; + int ret; + + switch (chipset & 0xf0) { + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV40TCL; + else + if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + default: + break; + } + + if (!curie_class) { + NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); + return NULL; + } + + cnv40 = CALLOC(1, sizeof(struct nv40_channel_context)); + if (!cnv40) + return NULL; + cnv40->chipset = chipset; + cnv40->nvws = nvws; + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &cnv40->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv40_channel_takedown(cnv40); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &cnv40->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv40_channel_takedown(cnv40); + return NULL; + } + + ret = nvws->res_init(&cnv40->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv40_channel_takedown(cnv40); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&cnv40->vp_exec_heap, 0, 512) || + nvws->res_init(&cnv40->vp_data_heap, 0, 256)) { + nv40_channel_takedown(cnv40); + return NULL; + } + + /* 3D object */ + ret = nvws->grobj_alloc(nvws, curie_class, &cnv40->curie); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Static curie initialisation */ + so = so_new(128, 0); + so_method(so, cnv40->curie, NV40TCL_DMA_NOTIFY, 1); + so_data (so, cnv40->sync->handle); + so_method(so, cnv40->curie, NV40TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, cnv40->curie, NV40TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, cnv40->curie, NV40TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, cnv40->curie, NV40TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, cnv40->curie, NV40TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, cnv40->query->handle); + so_method(so, cnv40->curie, NV40TCL_DMA_UNK01AC, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, cnv40->curie, NV40TCL_DMA_COLOR2, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + + so_method(so, cnv40->curie, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, cnv40->curie, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, cnv40->curie, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, cnv40->curie, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, cnv40->curie, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, cnv40->curie, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, cnv40->curie, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, cnv40->curie, 0x1e94, 1); + so_data (so, 0x00000001); + + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws->channel, 0); + + return cnv40; +} + +static void +nv40_destroy(struct pipe_context *pipe) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + if (nv40->draw) + draw_destroy(nv40->draw); + + if (nv40->hw) { + if (--nv40->hw->refcount == 0) + nv40_channel_takedown(nv40->hw); + } + + free(nv40); +} + +struct pipe_context * +nv40_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv40_context *nv40; + + nv40 = CALLOC(1, sizeof(struct nv40_context)); + if (!nv40) + return NULL; + + nv40->hw = nv40_channel_init(ws, nvws, chipset); + if (!nv40->hw) { + nv40_destroy(&nv40->pipe); + return NULL; + } + + nv40->chipset = chipset; + nv40->nvws = nvws; + + nv40->pipe.winsys = ws; + nv40->pipe.destroy = nv40_destroy; + nv40->pipe.get_name = nv40_get_name; + nv40->pipe.get_vendor = nv40_get_vendor; + nv40->pipe.get_param = nv40_get_param; + nv40->pipe.get_paramf = nv40_get_paramf; + nv40->pipe.draw_arrays = nv40_draw_arrays; + nv40->pipe.draw_elements = nv40_draw_elements; + nv40->pipe.clear = nv40_clear; + nv40->pipe.flush = nv40_flush; + + nv40_init_query_functions(nv40); + nv40_init_surface_functions(nv40); + nv40_init_state_functions(nv40); + nv40_init_miptree_functions(nv40); + + nv40->draw = draw_create(); + assert(nv40->draw); + draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); + + return &nv40->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h new file mode 100644 index 00000000000..f511759e3be --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -0,0 +1,153 @@ +#ifndef __NV40_CONTEXT_H__ +#define __NV40_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv40_channel_context *ctx = nv40->hw +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv40_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV40_NEW_BLEND (1 << 0) +#define NV40_NEW_RAST (1 << 1) +#define NV40_NEW_ZSA (1 << 2) +#define NV40_NEW_SAMPLER (1 << 3) +#define NV40_NEW_FB (1 << 4) +#define NV40_NEW_STIPPLE (1 << 5) +#define NV40_NEW_SCISSOR (1 << 6) +#define NV40_NEW_VIEWPORT (1 << 7) +#define NV40_NEW_BCOL (1 << 8) +#define NV40_NEW_VERTPROG (1 << 9) +#define NV40_NEW_FRAGPROG (1 << 10) +#define NV40_NEW_ARRAYS (1 << 11) + +struct nv40_channel_context { + struct nouveau_winsys *nvws; + unsigned refcount; + + unsigned chipset; + + /* HW graphics objects */ + struct nouveau_grobj *curie; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; +}; + +struct nv40_context { + struct pipe_context pipe; + struct nouveau_winsys *nvws; + + struct nv40_channel_context *hw; + struct draw_context *draw; + + int chipset; + + uint32_t dirty; + + struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + struct nouveau_stateobj *so_framebuffer; + struct nouveau_stateobj *so_fragtex[16]; + struct nouveau_stateobj *so_vtxbuf; + struct nouveau_stateobj *so_blend; + struct nouveau_stateobj *so_rast; + struct nouveau_stateobj *so_zsa; + struct nouveau_stateobj *so_bcol; + struct nouveau_stateobj *so_scissor; + struct nouveau_stateobj *so_viewport; + struct nouveau_stateobj *so_stipple; + + struct { + struct nv40_vertex_program *active; + + struct nv40_vertex_program *current; + struct pipe_buffer *constant_buf; + } vertprog; + + struct { + struct nv40_fragment_program *active; + + struct nv40_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; +}; + +static INLINE struct nv40_context * +nv40_context(struct pipe_context *pipe) +{ + return (struct nv40_context *)pipe; +} + +extern void nv40_init_state_functions(struct nv40_context *nv40); +extern void nv40_init_surface_functions(struct nv40_context *nv40); +extern void nv40_init_miptree_functions(struct nv40_context *nv40); +extern void nv40_init_query_functions(struct nv40_context *nv40); + +/* nv40_draw.c */ +extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); + +/* nv40_vertprog.c */ +extern void nv40_vertprog_translate(struct nv40_context *, + struct nv40_vertex_program *); +extern void nv40_vertprog_bind(struct nv40_context *, + struct nv40_vertex_program *); +extern void nv40_vertprog_destroy(struct nv40_context *, + struct nv40_vertex_program *); + +/* nv40_fragprog.c */ +extern void nv40_fragprog_translate(struct nv40_context *, + struct nv40_fragment_program *); +extern void nv40_fragprog_bind(struct nv40_context *, + struct nv40_fragment_program *); +extern void nv40_fragprog_destroy(struct nv40_context *, + struct nv40_fragment_program *); + +/* nv40_fragtex.c */ +extern void nv40_fragtex_bind(struct nv40_context *); + +/* nv40_state.c and friends */ +extern void nv40_emit_hw_state(struct nv40_context *nv40); +extern void nv40_state_tex_update(struct nv40_context *nv40); + +/* nv40_vbo.c */ +extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nv40_clear.c */ +extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv40/nv40_dma.h b/src/gallium/drivers/nv40/nv40_dma.h new file mode 100644 index 00000000000..1fb82677689 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_dma.h @@ -0,0 +1,66 @@ +#ifndef __NV40_DMA_H__ +#define __NV40_DMA_H__ + +#include "pipe/nouveau/nouveau_winsys.h" + +#define OUT_RING(data) do { \ + (*nv40->nvws->channel->pushbuf->cur++) = (data); \ +} while(0) + +#define OUT_RINGp(src,size) do { \ + memcpy(nv40->nvws->channel->pushbuf->cur, (src), (size) * 4); \ + nv40->nvws->channel->pushbuf->cur += (size); \ +} while(0) + +#define OUT_RINGf(data) do { \ + union { float v; uint32_t u; } c; \ + c.v = (data); \ + OUT_RING(c.u); \ +} while(0) + +#define BEGIN_RING(obj,mthd,size) do { \ + if (nv40->nvws->channel->pushbuf->remaining < ((size) + 1)) \ + nv40->nvws->push_flush(nv40->nvws->channel, ((size) + 1)); \ + OUT_RING((nv40->obj->subc << 13) | ((size) << 18) | (mthd)); \ + nv40->nvws->channel->pushbuf->remaining -= ((size) + 1); \ +} while(0) + +#define BEGIN_RING_NI(obj,mthd,size) do { \ + BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ +} while(0) + +#define FIRE_RING() do { \ + nv40->nvws->push_flush(nv40->nvws->channel, 0); \ +} while(0) + +#define OUT_RELOC(bo,data,flags,vor,tor) do { \ + nv40->nvws->push_reloc(nv40->nvws->channel, \ + nv40->nvws->channel->pushbuf->cur, \ + (struct nouveau_bo *)(bo), \ + (data), (flags), (vor), (tor)); \ + OUT_RING(0); \ +} while(0) + +/* Raw data + flags depending on FB/TT buffer */ +#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ + OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ +} while(0) + +/* FB/TT object handle */ +#define OUT_RELOCo(bo,flags) do { \ + OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ + nv40->nvws->channel->vram->handle, \ + nv40->nvws->channel->gart->handle); \ +} while(0) + +/* Low 32-bits of offset */ +#define OUT_RELOCl(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ +} while(0) + +/* High 32-bits of offset */ +#define OUT_RELOCh(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ +} while(0) + +#endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c new file mode 100644 index 00000000000..a39bb85e99b --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -0,0 +1,62 @@ +#include "draw/draw_private.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" + +struct nv40_draw_stage { + struct draw_stage draw; + struct nv40_context *nv40; +}; + +static void +nv40_draw_point(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_line(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_tri(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_flush(struct draw_stage *draw, unsigned flags) +{ +} + +static void +nv40_draw_reset_stipple_counter(struct draw_stage *draw) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_destroy(struct draw_stage *draw) +{ + free(draw); +} + +struct draw_stage * +nv40_draw_render_stage(struct nv40_context *nv40) +{ + struct nv40_draw_stage *nv40draw = CALLOC_STRUCT(nv40_draw_stage); + + nv40draw->nv40 = nv40; + nv40draw->draw.draw = nv40->draw; + nv40draw->draw.point = nv40_draw_point; + nv40draw->draw.line = nv40_draw_line; + nv40draw->draw.tri = nv40_draw_tri; + nv40draw->draw.flush = nv40_draw_flush; + nv40draw->draw.reset_stipple_counter = nv40_draw_reset_stipple_counter; + nv40draw->draw.destroy = nv40_draw_destroy; + + return &nv40draw->draw; +} + diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c new file mode 100644 index 00000000000..07a418c1e9f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -0,0 +1,842 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" + +#include "nv40_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV40_FP_OP_COND_TR +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) +#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv40_fpc { + struct nv40_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + int high_temp; + int temp_temp_count; + int num_regs; + + uint depth_id; + uint colour_id; + + unsigned inst_offset; + + struct { + int pipe; + float vals[4]; + } consts[MAX_CONSTS]; + int nr_consts; + + struct nv40_sreg imm[MAX_IMM]; + unsigned nr_imm; +}; + +static INLINE struct nv40_sreg +temp(struct nv40_fpc *fpc) +{ + int idx; + + idx = fpc->temp_temp_count++; + idx += fpc->high_temp + 1; + return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE struct nv40_sreg +constant(struct nv40_fpc *fpc, int pipe, float vals[4]) +{ + int idx; + + if (fpc->nr_consts == MAX_CONSTS) + assert(0); + idx = fpc->nr_consts++; + + fpc->consts[idx].pipe = pipe; + if (pipe == -1) + memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); + return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +static void +grow_insns(struct nv40_fpc *fpc, int size) +{ + struct nv40_fragment_program *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.type) { + case NV40SR_INPUT: + sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); + break; + case NV40SR_OUTPUT: + sr |= NV40_FP_REG_SRC_HALF; + /* fall-through */ + case NV40SR_TEMP: + sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); + sr |= (src.index << NV40_FP_REG_SRC_SHIFT); + break; + case NV40SR_CONST: + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + if (fpc->consts[src.index].pipe >= 0) { + struct nv40_fragment_program_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = fpc->consts[src.index].pipe; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } else { + memcpy(&fp->insn[fpc->inst_offset + 4], + fpc->consts[src.index].vals, + sizeof(uint32_t) * 4); + } + + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + break; + case NV40SR_NONE: + sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV40_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NV40SR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NV40SR_OUTPUT: + if (dst.index == 1) { + fp->fp_control |= 0xe; + } else { + hw[0] |= NV40_FP_OP_OUT_REG_HALF; + } + break; + case NV40SR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); +} + +static void +nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (op == NV40_FP_OP_OPCODE_KIL) + fp->fp_control |= NV40TCL_FP_CONTROL_KIL; + hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NV40_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, dst); + emit_src(fpc, 0, s0); + emit_src(fpc, 1, s1); + emit_src(fpc, 2, s2); +} + +static void +nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + + nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + + fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << unit); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(NV40SR_INPUT, + fpc->attrib_map[fsrc->SrcRegister.Index]); + break; + case TGSI_FILE_CONSTANT: + src = constant(fpc, fsrc->SrcRegister.Index, NULL); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->SrcRegister.Index < fpc->nr_imm); + src = fpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index + 1); + if (fpc->high_temp < src.index) + fpc->high_temp = src.index; + break; + /* This is clearly insane, but gallium hands us shaders like this. + * Luckily fragprog results are just temp regs.. + */ + case TGSI_FILE_OUTPUT: + if (fsrc->SrcRegister.Index == fpc->colour_id) + return nv40_sr(NV40SR_OUTPUT, 0); + else + return nv40_sr(NV40SR_OUTPUT, 1); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + int idx; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + if (fdst->DstRegister.Index == fpc->colour_id) + return nv40_sr(NV40SR_OUTPUT, 0); + else + return nv40_sr(NV40SR_OUTPUT, 1); + break; + case TGSI_FILE_TEMPORARY: + idx = fdst->DstRegister.Index + 1; + if (fpc->high_temp < idx) + fpc->high_temp = idx; + return nv40_sr(NV40SR_TEMP, idx); + case TGSI_FILE_NULL: + return nv40_sr(NV40SR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + return nv40_sr(NV40SR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, + struct nv40_sreg *src) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= (1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= (1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(fpc); + + if (mask) + arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(fpc, 0, STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv40_sreg one = temp(fpc); + arith(fpc, 0, STR, one, neg_mask, one, none, none); + arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg src[3], dst, tmp; + int mask, sat, unit; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + fpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(fpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + NOUVEAU_MSG("extra src attr %d\n", + fsrc->SrcRegister.Index); + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->SrcRegister.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = temp(fpc); + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KIL: + arith(fpc, 0, KIL, none, 0, none, none, none); + break; + case TGSI_OPCODE_KILP: + dst = nv40_sr(NV40SR_NONE, 0); + dst.cc_update = 1; + arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RET: + assert(0); + break; + case TGSI_OPCODE_RFL: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + break; + case TGSI_OPCODE_RSQ: + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + break; + case TGSI_OPCODE_SCS: + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide == + TGSI_EXTSWIZZLE_W) { + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + } else + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. + SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->u.DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + fpc->depth_id = fdec->u.DeclarationRange.First; + break; + case TGSI_SEMANTIC_COLOR: + fpc->colour_id = fdec->u.DeclarationRange.First; + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + return TRUE; +} + +void +nv40_fragprog_translate(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv40_fpc *fpc = NULL; + + fpc = CALLOC(1, sizeof(struct nv40_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->high_temp = -1; + fpc->num_regs = 2; + + tgsi_parse_init(&parse, fp->pipe->tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nv40_fragprog_parse_decl_output(fpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + float vals[4]; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < MAX_IMM); + + vals[0] = imm->u.ImmediateFloat32[0].Float; + vals[1] = imm->u.ImmediateFloat32[1].Float; + vals[2] = imm->u.ImmediateFloat32[2].Float; + vals[3] = imm->u.ImmediateFloat32[3].Float; + fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv40_fragprog_parse_instruction(fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + + /* Terminate final instruction */ + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction, may or may not be necessary. */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + fp->translated = TRUE; + fp->on_hw = FALSE; +out_err: + tgsi_parse_free(&parse); + free(fpc); +} + +void +nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_stateobj *so; + int i; + + if (!fp->translated) { + nv40_fragprog_translate(nv40, fp); + if (!fp->translated) + assert(0); + } + + if (fp->nr_consts) { + float *map = ws->buffer_map(ws, nv40->fragprog.constant_buf, + PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nv40_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + fp->on_hw = 0; + } + ws->buffer_unmap(ws, nv40->fragprog.constant_buf); + } + + if (!fp->on_hw) { + const uint32_t le = 1; + uint32_t *map; + + if (!fp->buffer) + fp->buffer = ws->buffer_create(ws, 0x100, 0, + fp->insn_len * 4); + map = ws->buffer_map(ws, fp->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); + fp->on_hw = TRUE; + } + + so = so_new(4, 1); + so_method(so, nv40->hw->curie, NV40TCL_FP_ADDRESS, 1); + so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); + so_method(so, nv40->hw->curie, NV40TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + + so_emit(nv40->nvws, so); + so_ref(so, &fp->so); + so_ref(NULL, &so); + + nv40->fragprog.active = fp; +} + +void +nv40_fragprog_destroy(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + if (fp->insn_len) + free(fp->insn); +} + diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c new file mode 100644 index 00000000000..5af5fbe7465 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -0,0 +1,151 @@ +#include "nv40_context.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV40TCL_TEX_FORMAT_FORMAT_##tf, \ + (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \ + NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \ + NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \ + NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \ +} + +struct nv40_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; +}; + +static struct nv40_texture_format +nv40_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), + _(U_L8 , L8 , S1, S1, S1, ONE, X, X, X, X), + _(U_A8 , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), + _(U_I8 , L8 , S1, S1, S1, S1, X, X, X, X), + _(U_A8_L8 , A8L8 , S1, S1, S1, S1, X, X, X, Y), + _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), + _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), +// _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT1 , 0x86, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT3 , 0x87, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT5 , 0x88, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), + {}, +}; + +static struct nv40_texture_format * +nv40_fragtex_format(uint pipe_format) +{ + struct nv40_texture_format *tf = nv40_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + return NULL; +} + + +static void +nv40_fragtex_build(struct nv40_context *nv40, int unit) +{ + struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; + struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; + struct pipe_texture *pt = &nv40mt->base; + struct nv40_texture_format *tf; + struct nouveau_stateobj *so; + uint32_t txf, txs, txp; + int swizzled = 0; /*XXX: implement in region code? */ + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + tf = nv40_fragtex_format(pt->format); + if (!tf) + assert(0); + + txf = ps->fmt; + txf |= tf->format | 0x8000; + txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); + + if (1) /* XXX */ + txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV40TCL_TEX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= NV40TCL_TEX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV40TCL_TEX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV40TCL_TEX_FORMAT_DIMS_1D; + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + if (swizzled) { + txp = 0; + } else { + txp = nv40mt->level[0].pitch; + txf |= NV40TCL_TEX_FORMAT_LINEAR; + } + + txs = tf->swizzle; + + so = so_new(16, 2); + so_method(so, nv40->hw->curie, NV40TCL_TEX_OFFSET(unit), 8); + so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, + NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | 0x2000 /*voodoo*/); + so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | + pt->height[0]); + so_data (so, ps->bcol); + so_method(so, nv40->hw->curie, NV40TCL_TEX_SIZE1(unit), 1); + so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + + so_emit(nv40->nvws, so); + so_ref (so, &nv40->so_fragtex[unit]); + so_ref (NULL, &so); +} + +void +nv40_fragtex_bind(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog.active; + unsigned samplers, unit; + + samplers = nv40->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so_ref(NULL, &nv40->so_fragtex[unit]); + BEGIN_RING(curie, NV40TCL_TEX_ENABLE(unit), 1); + OUT_RING (0); + } + + samplers = nv40->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + nv40_fragtex_build(nv40, unit); + } + + nv40->fp_samplers = fp->samplers; +} + diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c new file mode 100644 index 00000000000..92e6b3a43df --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -0,0 +1,104 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" + +#include "nv40_context.h" + +static void +nv40_miptree_layout(struct nv40_miptree *nv40mt) +{ + struct pipe_texture *pt = &nv40mt->base; + boolean swizzled = FALSE; + uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint offset = 0; + int nr_faces, l, f; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else + if (pt->target == PIPE_TEXTURE_3D) { + nr_faces = pt->depth[0]; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + + if (swizzled) + nv40mt->level[l].pitch = pt->width[l] * pt->cpp; + else + nv40mt->level[l].pitch = pt->width[0] * pt->cpp; + nv40mt->level[l].pitch = (nv40mt->level[l].pitch + 63) & ~63; + + nv40mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv40mt->level[l].image_offset[f] = offset; + offset += nv40mt->level[l].pitch * pt->height[l]; + } + } + + nv40mt->total_size = offset; +} + +static struct pipe_texture * +nv40_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv40_miptree *mt; + + mt = MALLOC(sizeof(struct nv40_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + nv40_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->total_size); + if (!mt->buffer) { + free(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv40_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + struct pipe_winsys *ws = pipe->winsys; + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv40_miptree *nv40mt = (struct nv40_miptree *)mt; + int l; + + pipe_buffer_reference(ws, &nv40mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv40mt->level[l].image_offset) + free(nv40mt->level[l].image_offset); + } + free(nv40mt); + } +} + +void +nv40_init_miptree_functions(struct nv40_context *nv40) +{ + nv40->pipe.texture_create = nv40_miptree_create; + nv40->pipe.texture_release = nv40_miptree_release; +} + diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c new file mode 100644 index 00000000000..8bca2788b93 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -0,0 +1,113 @@ +#include "pipe/p_context.h" + +#include "nv40_context.h" + +struct nv40_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv40_query * +nv40_query(struct pipe_query *pipe) +{ + return (struct nv40_query *)pipe; +} + +static struct pipe_query * +nv40_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nv40_query *q; + + q = CALLOC(1, sizeof(struct nv40_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + if (q->object) + nv40->nvws->res_free(&q->object); + free(q); +} + +static void +nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (nv40->nvws->res_alloc(nv40->hw->query_heap, 1, NULL, &q->object)) + assert(0); + nv40->nvws->notifier_reset(nv40->hw->query, q->object->start); + + BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (1); + BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (1); + + q->ready = FALSE; +} + +static void +nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); + OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(); +} + +static boolean +nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64 *result) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + struct nouveau_winsys *nvws = nv40->nvws; + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nvws->notifier_status(nv40->hw->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + nvws->notifier_wait(nv40->hw->query, q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, + 0); + } + + q->result = nvws->notifier_retval(nv40->hw->query, + q->object->start); + q->ready = TRUE; + nvws->res_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nv40_init_query_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_query = nv40_query_create; + nv40->pipe.destroy_query = nv40_query_destroy; + nv40->pipe.begin_query = nv40_query_begin; + nv40->pipe.end_query = nv40_query_end; + nv40->pipe.get_query_result = nv40_query_result; +} diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h new file mode 100644 index 00000000000..5909c70713c --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -0,0 +1,554 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +# define NV40_VP_INST_COND_FL 0 +# define NV40_VP_INST_COND_LT 1 +# define NV40_VP_INST_COND_EQ 2 +# define NV40_VP_INST_COND_LE 3 +# define NV40_VP_INST_COND_GT 4 +# define NV40_VP_INST_COND_NE 5 +# define NV40_VP_INST_COND_GE 6 +# define NV40_VP_INST_COND_TR 7 +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_MUL 0x02 +# define NV40_VP_INST_OP_ADD 0x03 +# define NV40_VP_INST_OP_MAD 0x04 +# define NV40_VP_INST_OP_DP3 0x05 +# define NV40_VP_INST_OP_DPH 0x06 +# define NV40_VP_INST_OP_DP4 0x07 +# define NV40_VP_INST_OP_DST 0x08 +# define NV40_VP_INST_OP_MIN 0x09 +# define NV40_VP_INST_OP_MAX 0x0A +# define NV40_VP_INST_OP_SLT 0x0B +# define NV40_VP_INST_OP_SGE 0x0C +# define NV40_VP_INST_OP_ARL 0x0D +# define NV40_VP_INST_OP_FRC 0x0E +# define NV40_VP_INST_OP_FLR 0x0F +# define NV40_VP_INST_OP_SEQ 0x10 +# define NV40_VP_INST_OP_SFL 0x11 +# define NV40_VP_INST_OP_SGT 0x12 +# define NV40_VP_INST_OP_SLE 0x13 +# define NV40_VP_INST_OP_SNE 0x14 +# define NV40_VP_INST_OP_STR 0x15 +# define NV40_VP_INST_OP_SSG 0x16 +# define NV40_VP_INST_OP_ARR 0x17 +# define NV40_VP_INST_OP_ARA 0x18 +# define NV40_VP_INST_OP_TXL 0x19 +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_RCP 0x02 +# define NV40_VP_INST_OP_RCC 0x03 +# define NV40_VP_INST_OP_RSQ 0x04 +# define NV40_VP_INST_OP_EXP 0x05 +# define NV40_VP_INST_OP_LOG 0x06 +# define NV40_VP_INST_OP_LIT 0x07 +# define NV40_VP_INST_OP_BRA 0x09 +# define NV40_VP_INST_OP_CAL 0x0B +# define NV40_VP_INST_OP_RET 0x0C +# define NV40_VP_INST_OP_LG2 0x0D +# define NV40_VP_INST_OP_EX2 0x0E +# define NV40_VP_INST_OP_SIN 0x0F +# define NV40_VP_INST_OP_COS 0x10 +# define NV40_VP_INST_OP_PUSHA 0x13 +# define NV40_VP_INST_OP_POPA 0x14 +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +# define NV40_VP_INST_IN_POS 0 +# define NV40_VP_INST_IN_WEIGHT 1 +# define NV40_VP_INST_IN_NORMAL 2 +# define NV40_VP_INST_IN_COL0 3 +# define NV40_VP_INST_IN_COL1 4 +# define NV40_VP_INST_IN_FOGC 5 +# define NV40_VP_INST_IN_TC0 8 +# define NV40_VP_INST_IN_TC(n) (8+n) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST_LAST (1 << 0) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, + * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and + * SWIZZLE_ONE. + * + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as + * SWIZZLE_ZERO is implemented simply by not writing to the relevant components + * of the destination. + * + * Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * DP2 - MUL + ADD + * NRM + */ + +//== Opcode / Destination selection == +#define NV40_FP_OP_PROGRAM_END (1 << 0) +#define NV40_FP_OP_OUT_REG_SHIFT 1 +#define NV40_FP_OP_OUT_REG_MASK (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NV40_FP_OP_OUT_REG_HALF (1 << 7) +#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV40_FP_OP_OUTMASK_SHIFT 9 +#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV40_FP_OP_OUT_X (1 << 9) +# define NV40_FP_OP_OUT_Y (1 <<10) +# define NV40_FP_OP_OUT_Z (1 <<11) +# define NV40_FP_OP_OUT_W (1 <<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV40_FP_OP_INPUT_SRC_SHIFT 13 +#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV40_FP_OP_INPUT_SRC_COL0 0x1 +# define NV40_FP_OP_INPUT_SRC_COL1 0x2 +# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV40_FP_OP_INPUT_SRC_TC0 0x4 +# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NV40_FP_OP_TEX_UNIT_SHIFT 17 +#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) +#define NV40_FP_OP_PRECISION_SHIFT 22 +#define NV40_FP_OP_PRECISION_MASK (3 << 22) +# define NV40_FP_PRECISION_FP32 0 +# define NV40_FP_PRECISION_FP16 1 +# define NV40_FP_PRECISION_FX12 2 +#define NV40_FP_OP_OPCODE_SHIFT 24 +#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV40_FP_OP_OPCODE_NOP 0x00 +# define NV40_FP_OP_OPCODE_MOV 0x01 +# define NV40_FP_OP_OPCODE_MUL 0x02 +# define NV40_FP_OP_OPCODE_ADD 0x03 +# define NV40_FP_OP_OPCODE_MAD 0x04 +# define NV40_FP_OP_OPCODE_DP3 0x05 +# define NV40_FP_OP_OPCODE_DP4 0x06 +# define NV40_FP_OP_OPCODE_DST 0x07 +# define NV40_FP_OP_OPCODE_MIN 0x08 +# define NV40_FP_OP_OPCODE_MAX 0x09 +# define NV40_FP_OP_OPCODE_SLT 0x0A +# define NV40_FP_OP_OPCODE_SGE 0x0B +# define NV40_FP_OP_OPCODE_SLE 0x0C +# define NV40_FP_OP_OPCODE_SGT 0x0D +# define NV40_FP_OP_OPCODE_SNE 0x0E +# define NV40_FP_OP_OPCODE_SEQ 0x0F +# define NV40_FP_OP_OPCODE_FRC 0x10 +# define NV40_FP_OP_OPCODE_FLR 0x11 +# define NV40_FP_OP_OPCODE_KIL 0x12 +# define NV40_FP_OP_OPCODE_PK4B 0x13 +# define NV40_FP_OP_OPCODE_UP4B 0x14 +/* DDX/DDY can only write to XY */ +# define NV40_FP_OP_OPCODE_DDX 0x15 +# define NV40_FP_OP_OPCODE_DDY 0x16 +# define NV40_FP_OP_OPCODE_TEX 0x17 +# define NV40_FP_OP_OPCODE_TXP 0x18 +# define NV40_FP_OP_OPCODE_TXD 0x19 +# define NV40_FP_OP_OPCODE_RCP 0x1A +# define NV40_FP_OP_OPCODE_EX2 0x1C +# define NV40_FP_OP_OPCODE_LG2 0x1D +# define NV40_FP_OP_OPCODE_STR 0x20 +# define NV40_FP_OP_OPCODE_SFL 0x21 +# define NV40_FP_OP_OPCODE_COS 0x22 +# define NV40_FP_OP_OPCODE_SIN 0x23 +# define NV40_FP_OP_OPCODE_PK2H 0x24 +# define NV40_FP_OP_OPCODE_UP2H 0x25 +# define NV40_FP_OP_OPCODE_PK4UB 0x27 +# define NV40_FP_OP_OPCODE_UP4UB 0x28 +# define NV40_FP_OP_OPCODE_PK2US 0x29 +# define NV40_FP_OP_OPCODE_UP2US 0x2A +# define NV40_FP_OP_OPCODE_DP2A 0x2E +# define NV40_FP_OP_OPCODE_TXL 0x2F +# define NV40_FP_OP_OPCODE_TXB 0x31 +# define NV40_FP_OP_OPCODE_DIV 0x3A +# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +# define NV40_FP_OP_BRA_OPCODE_IF 0x2 +# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +# define NV40_FP_OP_BRA_OPCODE_REP 0x4 +# define NV40_FP_OP_BRA_OPCODE_RET 0x5 +#define NV40_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV40_FP_OP_OUT_ABS (1 << 29) +#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV40_FP_OP_COND_SHIFT 18 +#define NV40_FP_OP_COND_MASK (0x07 << 18) +# define NV40_FP_OP_COND_FL 0 +# define NV40_FP_OP_COND_LT 1 +# define NV40_FP_OP_COND_EQ 2 +# define NV40_FP_OP_COND_LE 3 +# define NV40_FP_OP_COND_GT 4 +# define NV40_FP_OP_COND_NE 5 +# define NV40_FP_OP_COND_GE 6 +# define NV40_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NV40_FP_OP_DST_SCALE_SHIFT 28 +#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) +#define NV40_FP_OP_DST_SCALE_1X 0 +#define NV40_FP_OP_DST_SCALE_2X 1 +#define NV40_FP_OP_DST_SCALE_4X 2 +#define NV40_FP_OP_DST_SCALE_8X 3 +#define NV40_FP_OP_DST_SCALE_INV_2X 5 +#define NV40_FP_OP_DST_SCALE_INV_4X 6 +#define NV40_FP_OP_DST_SCALE_INV_8X 7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +// SRC2 high-order +#define NV40_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NV40_FP_REG_TYPE_SHIFT 0 +#define NV40_FP_REG_TYPE_MASK (3 << 0) +# define NV40_FP_REG_TYPE_TEMP 0 +# define NV40_FP_REG_TYPE_INPUT 1 +# define NV40_FP_REG_TYPE_CONST 2 +#define NV40_FP_REG_SRC_SHIFT 2 +#define NV40_FP_REG_SRC_MASK (63 << 2) +#define NV40_FP_REG_SRC_HALF (1 << 8) +#define NV40_FP_REG_SWZ_ALL_SHIFT 9 +#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV40_FP_REG_SWZ_X_SHIFT 9 +#define NV40_FP_REG_SWZ_X_MASK (3 << 9) +#define NV40_FP_REG_SWZ_Y_SHIFT 11 +#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV40_FP_REG_SWZ_Z_SHIFT 13 +#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV40_FP_REG_SWZ_W_SHIFT 15 +#define NV40_FP_REG_SWZ_W_MASK (3 << 15) +# define NV40_FP_SWIZZLE_X 0 +# define NV40_FP_SWIZZLE_Y 1 +# define NV40_FP_SWIZZLE_Z 2 +# define NV40_FP_SWIZZLE_W 3 +#define NV40_FP_REG_NEGATE (1 << 17) + +#define NV40SR_NONE 0 +#define NV40SR_OUTPUT 1 +#define NV40SR_INPUT 2 +#define NV40SR_TEMP 3 +#define NV40SR_CONST 4 + +struct nv40_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nv40_sreg +nv40_sr(int type, int index) +{ + struct nv40_sreg temp = { + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nv40_sreg +nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) +{ + struct nv40_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nv40_sreg +nv40_sr_neg(struct nv40_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_abs(struct nv40_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_scale(struct nv40_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c new file mode 100644 index 00000000000..713f31dbb12 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -0,0 +1,823 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +static void * +nv40_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_grobj *curie = nv40->hw->curie; + struct nouveau_stateobj *so = so_new(16, 0); + + if (cso->blend_enable) { + so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | + nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rgb_dst_factor)); + so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 | + nvgl_blend_eqn(cso->rgb_func)); + } else { + so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, curie, NV40TCL_COLOR_MASK, 1); + so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + if (cso->logicop_enable) { + so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + return (void *)so; +} + +static void +nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + so_ref(hwcso, &nv40->so_blend); + nv40->dirty |= NV40_NEW_BLEND; +} + +static void +nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nouveau_stateobj *so = hwcso; + + so_ref(NULL, &so); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV40TCL_TEX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV40TCL_TEX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + break; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV40TCL_TEX_WRAP_S_REPEAT; + break; + } + + return ret >> NV40TCL_TEX_WRAP_S_SHIFT; +} + +static void * +nv40_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv40_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv40_sampler_state)); + + ps->fmt = 0; + if (!cso->normalized_coords) + ps->fmt |= NV40TCL_TEX_FORMAT_RECT; + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | + (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy >= 2.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; + break; + } + break; + } + + ps->filt = filter; + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } + + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + } + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned unit, + void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_sampler_state *ps = hwcso; + + nv40->tex_sampler[unit] = ps; + nv40->dirty_samplers |= (1 << unit); +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void +nv40_set_sampler_texture(struct pipe_context *pipe, unsigned unit, + struct pipe_texture *miptree) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->tex_miptree[unit] = (struct nv40_miptree *)miptree; + nv40->dirty_samplers |= (1 << unit); +} + +static void * +nv40_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_stateobj *so = so_new(32, 0); + + /*XXX: ignored: + * light_twoside + * offset_cw/ccw -nohw + * scissor + * point_smooth -nohw + * multisample + * offset_units / offset_scale + */ + + so_method(so, nv40->hw->curie, NV40TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : + NV40TCL_SHADE_MODEL_SMOOTH); + + so_method(so, nv40->hw->curie, NV40TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, nv40->hw->curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, nv40->hw->curie, NV40TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, nv40->hw->curie, NV40TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV40TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, 0); + break; + } + so_data(so, NV40TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV40TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, 0); + break; + } + so_data(so, NV40TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, cso->cull_mode != PIPE_WINDING_NONE ? 1 : 0); + + so_method(so, nv40->hw->curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, nv40->hw->curie, NV40TCL_POINT_SPRITE, 1); + if (cso->point_sprite) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + return (void *)so; +} + +static void +nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + so_ref(hwcso, &nv40->so_rast); + nv40->dirty |= NV40_NEW_RAST; +} + +static void +nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nouveau_stateobj *so = hwcso; + + so_ref(NULL, &so); +} + +static void * +nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_stateobj *so = so_new(32, 0); + + so_method(so, nv40->hw->curie, NV40TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, nv40->hw->curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref)); + + if (cso->stencil[0].enabled) { + so_method(so, nv40->hw->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].write_mask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].value_mask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, nv40->hw->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, nv40->hw->curie, NV40TCL_STENCIL_BACK_ENABLE, 8); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].write_mask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].value_mask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, nv40->hw->curie, NV40TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + return (void *)so; +} + +static void +nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + so_ref(hwcso, &nv40->so_zsa); + nv40->dirty |= NV40_NEW_ZSA; +} + +static void +nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nouveau_stateobj *so = hwcso; + + so_ref(NULL, &so); +} + +static void * +nv40_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nv40_vertex_program)); + vp->pipe = cso; + + return (void *)vp; +} + +static void +nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp = hwcso; + + nv40->vertprog.current = vp; + nv40->dirty |= NV40_NEW_VERTPROG; +} + +static void +nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp = hwcso; + + nv40_vertprog_destroy(nv40, vp); + free(vp); +} + +static void * +nv40_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv40_fragment_program)); + fp->pipe = cso; + + return (void *)fp; +} + +static void +nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_fragment_program *fp = hwcso; + + nv40->fragprog.current = fp; + nv40->dirty |= NV40_NEW_FRAGPROG; +} + +static void +nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_fragment_program *fp = hwcso; + + nv40_fragprog_destroy(nv40, fp); + free(fp); +} + +static void +nv40_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_stateobj *so = so_new(2, 0); + + so_method(so, nv40->hw->curie, NV40TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv40->so_bcol); + so_ref(NULL, &so); + nv40->dirty |= NV40_NEW_BCOL; +} + +static void +nv40_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv40->vertprog.constant_buf = buf->buffer; + nv40->dirty |= NV40_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv40->fragprog.constant_buf = buf->buffer; + nv40->dirty |= NV40_NEW_FRAGPROG; + } +} + +static void +nv40_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_surface *rt[4], *zeta; + uint32_t rt_enable, rt_format, w, h; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + + rt_enable = 0; + for (i = 0; i < 4; i++) { + if (!fb->cbufs[i]) + continue; + + if (colour_format) { + assert(w == fb->cbufs[i]->width); + assert(h == fb->cbufs[i]->height); + assert(colour_format == fb->cbufs[i]->format); + } else { + w = fb->cbufs[i]->width; + h = fb->cbufs[i]->height; + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | + NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV40TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR0, 1); + so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_COLOR0_PITCH, 2); + so_data (so, rt[0]->pitch * rt[0]->cpp); + so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR1, 1); + so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_COLOR1_OFFSET, 2); + so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->pitch * rt[1]->cpp); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->hw->curie, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->pitch * rt[2]->cpp); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->hw->curie, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->pitch * rt[3]->cpp); + } + + if (zeta_format) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_ZETA, 1); + so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_ZETA_OFFSET, 1); + so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->hw->curie, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->pitch * zeta->cpp); + } + + so_method(so, nv40->hw->curie, NV40TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv40->hw->curie, NV40TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + + so_ref(so, &nv40->so_framebuffer); + so_ref(NULL, &so); + nv40->dirty |= NV40_NEW_FB; +} + +static void +nv40_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_stateobj *so = so_new(33, 0); + unsigned i; + + so_method(so, nv40->hw->curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, stipple->stipple[i]); + + so_ref(so, &nv40->so_stipple); + so_ref(NULL, &so); + nv40->dirty |= NV40_NEW_STIPPLE; +} + +static void +nv40_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_stateobj *so = so_new(3, 0); + + so_method(so, nv40->hw->curie, NV40TCL_SCISSOR_HORIZ, 2); + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + + so_ref(so, &nv40->so_scissor); + so_ref(NULL, &so); + nv40->dirty |= NV40_NEW_SCISSOR; +} + +static void +nv40_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_stateobj *so = so_new(9, 0); + + so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + + so_ref(so, &nv40->so_viewport); + so_ref(NULL, &so); + nv40->dirty |= NV40_NEW_VIEWPORT; +} + +static void +nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_buffer *vb) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->vtxbuf[index] = *vb; + + nv40->dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_vertex_element(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_element *ve) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->vtxelt[index] = *ve; + + nv40->dirty |= NV40_NEW_ARRAYS; +} + +void +nv40_init_state_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_blend_state = nv40_blend_state_create; + nv40->pipe.bind_blend_state = nv40_blend_state_bind; + nv40->pipe.delete_blend_state = nv40_blend_state_delete; + + nv40->pipe.create_sampler_state = nv40_sampler_state_create; + nv40->pipe.bind_sampler_state = nv40_sampler_state_bind; + nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; + nv40->pipe.set_sampler_texture = nv40_set_sampler_texture; + + nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; + nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; + nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; + + nv40->pipe.create_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_create; + nv40->pipe.bind_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_bind; + nv40->pipe.delete_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_delete; + + nv40->pipe.create_vs_state = nv40_vp_state_create; + nv40->pipe.bind_vs_state = nv40_vp_state_bind; + nv40->pipe.delete_vs_state = nv40_vp_state_delete; + + nv40->pipe.create_fs_state = nv40_fp_state_create; + nv40->pipe.bind_fs_state = nv40_fp_state_bind; + nv40->pipe.delete_fs_state = nv40_fp_state_delete; + + nv40->pipe.set_blend_color = nv40_set_blend_color; + nv40->pipe.set_clip_state = nv40_set_clip_state; + nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; + nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; + nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; + nv40->pipe.set_scissor_state = nv40_set_scissor_state; + nv40->pipe.set_viewport_state = nv40_set_viewport_state; + + nv40->pipe.set_vertex_buffer = nv40_set_vertex_buffer; + nv40->pipe.set_vertex_element = nv40_set_vertex_element; +} + diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h new file mode 100644 index 00000000000..e82ab9de98a --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -0,0 +1,80 @@ +#ifndef __NV40_STATE_H__ +#define __NV40_STATE_H__ + +#include "pipe/p_state.h" + +struct nv40_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv40_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv40_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv40_vertex_program { + const struct pipe_shader_state *pipe; + + boolean translated; + struct nv40_vertex_program_exec *insns; + unsigned nr_insns; + struct nv40_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; +}; + +struct nv40_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv40_fragment_program { + const struct pipe_shader_state *pipe; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv40_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + struct nouveau_stateobj *so; +}; + +struct nv40_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c new file mode 100644 index 00000000000..a10c9955480 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -0,0 +1,77 @@ +#include "nv40_context.h" +#include "nv40_state.h" + +/* Emit relocs for every referenced buffer. + * + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. These relocs appear in the push buffer as + * NOPs, and will only be turned into state changes if a buffer + * actually moves. + */ +static void +nv40_state_emit_dummy_relocs(struct nv40_context *nv40) +{ + unsigned i; + + so_emit_reloc_markers(nv40->nvws, nv40->so_framebuffer); + for (i = 0; i < 16; i++) { + if (!(nv40->fp_samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv40->nvws, nv40->so_fragtex[i]); + } + so_emit_reloc_markers(nv40->nvws, nv40->fragprog.active->so); +} + +void +nv40_emit_hw_state(struct nv40_context *nv40) +{ + if (nv40->dirty & NV40_NEW_FB) + so_emit(nv40->nvws, nv40->so_framebuffer); + + if (nv40->dirty & NV40_NEW_BLEND) + so_emit(nv40->nvws, nv40->so_blend); + + if (nv40->dirty & NV40_NEW_RAST) + so_emit(nv40->nvws, nv40->so_rast); + + if (nv40->dirty & NV40_NEW_ZSA) + so_emit(nv40->nvws, nv40->so_zsa); + + if (nv40->dirty & NV40_NEW_BCOL) + so_emit(nv40->nvws, nv40->so_bcol); + + if (nv40->dirty & NV40_NEW_SCISSOR) + so_emit(nv40->nvws, nv40->so_scissor); + + if (nv40->dirty & NV40_NEW_VIEWPORT) + so_emit(nv40->nvws, nv40->so_viewport); + + if (nv40->dirty & NV40_NEW_STIPPLE) + so_emit(nv40->nvws, nv40->so_stipple); + + if (nv40->dirty & NV40_NEW_FRAGPROG) { + nv40_fragprog_bind(nv40, nv40->fragprog.current); + /*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */ + } + + if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { + nv40_fragtex_bind(nv40); + + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (1); + nv40->dirty &= ~NV40_NEW_FRAGPROG; + } + + if (nv40->dirty & NV40_NEW_VERTPROG) { + nv40_vertprog_bind(nv40, nv40->vertprog.current); + nv40->dirty &= ~NV40_NEW_VERTPROG; + } + + nv40->dirty_samplers = 0; + nv40->dirty = 0; + + nv40_state_emit_dummy_relocs(nv40); +} + diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c new file mode 100644 index 00000000000..9726ab4e4dc --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -0,0 +1,137 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv40_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/p_tile.h" + +static boolean +nv40_surface_format_supported(struct pipe_context *pipe, + enum pipe_format format, uint type) +{ + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + } + break; + default: + assert(0); + }; + + return FALSE; +} + +static struct pipe_surface * +nv40_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(ws, &ps->buffer, nv40mt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = nv40mt->level[level].pitch / ps->cpp; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv40mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv40mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv40mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv40_surface_copy(struct pipe_context *pipe, unsigned do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_winsys *nvws = nv40->nvws; + + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); +} + +static void +nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_winsys *nvws = nv40->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv40_init_surface_functions(struct nv40_context *nv40) +{ + nv40->pipe.is_format_supported = nv40_surface_format_supported; + nv40->pipe.get_tex_surface = nv40_get_tex_surface; + nv40->pipe.surface_copy = nv40_surface_copy; + nv40->pipe.surface_fill = nv40_surface_fill; +} diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c new file mode 100644 index 00000000000..fa827ef0c5d --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -0,0 +1,424 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +static INLINE int +nv40_vbo_ncomp(uint format) +{ + int ncomp = 0; + + if (pf_size_x(format)) ncomp++; + if (pf_size_y(format)) ncomp++; + if (pf_size_z(format)) ncomp++; + if (pf_size_w(format)) ncomp++; + + return ncomp; +} + +static INLINE int +nv40_vbo_type(uint format) +{ + switch (pf_type(format)) { + case PIPE_FORMAT_TYPE_FLOAT: + return NV40TCL_VTXFMT_TYPE_FLOAT; + case PIPE_FORMAT_TYPE_UNORM: + return NV40TCL_VTXFMT_TYPE_UBYTE; + default: + NOUVEAU_ERR("Unknown format 0x%08x\n", format); + return NV40TCL_VTXFMT_TYPE_FLOAT; + } +} + +static boolean +nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, + struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + int type, ncomp; + void *map; + + type = nv40_vbo_type(ve->src_format); + ncomp = nv40_vbo_ncomp(ve->src_format); + + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map += vb->buffer_offset + ve->src_offset; + + switch (type) { + case NV40TCL_VTXFMT_TYPE_FLOAT: + { + float *v = map; + + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); + switch (ncomp) { + case 4: + OUT_RINGf(v[0]); + OUT_RINGf(v[1]); + OUT_RINGf(v[2]); + OUT_RINGf(v[3]); + break; + case 3: + OUT_RINGf(v[0]); + OUT_RINGf(v[1]); + OUT_RINGf(v[2]); + OUT_RINGf(1.0); + break; + case 2: + OUT_RINGf(v[0]); + OUT_RINGf(v[1]); + OUT_RINGf(0.0); + OUT_RINGf(1.0); + break; + case 1: + OUT_RINGf(v[0]); + OUT_RINGf(0.0); + OUT_RINGf(0.0); + OUT_RINGf(1.0); + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + } + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + + ws->buffer_unmap(ws, vb->buffer); + + return TRUE; +} + +static void +nv40_vbo_arrays_update(struct nv40_context *nv40, struct pipe_buffer *ib, + unsigned ib_format) +{ + struct nv40_vertex_program *vp = nv40->vertprog.active; + struct nouveau_stateobj *vtxbuf, *vtxfmt; + unsigned inputs, hw, num_hw; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + inputs = vp->ir; + for (hw = 0; hw < 16 && inputs; hw++) { + if (inputs & (1 << hw)) { + num_hw = hw; + inputs &= ~(1 << hw); + } + } + num_hw++; + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, nv40->hw->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, nv40->hw->curie, NV40TCL_VTXFMT(0), num_hw); + + inputs = vp->ir; + for (hw = 0; hw < num_hw; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + + if (!(inputs & (1 << hw))) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + + ve = &nv40->vtxelt[hw]; + vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV40TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | + (nv40_vbo_ncomp(ve->src_format) << + NV40TCL_VTXFMT_SIZE_SHIFT) | + nv40_vbo_type(ve->src_format))); + } + + if (ib) { + so_method(vtxbuf, nv40->hw->curie, NV40TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV40TCL_IDXBUF_FORMAT_DMA1); + } + + so_emit(nv40->nvws, vtxfmt); + so_emit(nv40->nvws, vtxbuf); + so_ref (vtxbuf, &nv40->so_vtxbuf); + so_ref (NULL, &vtxbuf); + so_ref (NULL, &vtxfmt); +} + +static boolean +nv40_vbo_validate_state(struct nv40_context *nv40, + struct pipe_buffer *ib, unsigned ib_format) +{ + unsigned vdn = nv40->dirty & NV40_NEW_ARRAYS; + + nv40_emit_hw_state(nv40); + if (vdn || ib) { + nv40_vbo_arrays_update(nv40, ib, ib_format); + nv40->dirty &= ~NV40_NEW_ARRAYS; + } + + so_emit_reloc_markers(nv40->nvws, nv40->so_vtxbuf); + + BEGIN_RING(curie, 0x1710, 1); + OUT_RING (0); /* vtx cache flush */ + + return TRUE; +} + +boolean +nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, + unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + unsigned nr; + boolean ret; + + ret = nv40_vbo_validate_state(nv40, NULL, 0); + if (!ret) { + NOUVEAU_ERR("state validate failed\n"); + return FALSE; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (count & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = count >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + pipe->flush(pipe, 0); + return TRUE; +} + +static INLINE void +nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, + unsigned start, unsigned count) +{ + uint8_t *elts = (uint8_t *)ib + start; + int push, i; + + if (count & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; count--; + } + + while (count) { + push = MIN2(count, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + count -= push; + elts += push; + } +} + +static INLINE void +nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, + unsigned start, unsigned count) +{ + uint16_t *elts = (uint16_t *)ib + start; + int push, i; + + if (count & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; count--; + } + + while (count) { + push = MIN2(count, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + count -= push; + elts += push; + } +} + +static INLINE void +nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, + unsigned start, unsigned count) +{ + uint32_t *elts = (uint32_t *)ib + start; + int push; + + while (count) { + push = MIN2(count, 2047); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); + + count -= push; + elts += push; + } +} + +static boolean +nv40_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + boolean ret; + void *map; + + ret = nv40_vbo_validate_state(nv40, NULL, 0); + if (!ret) { + NOUVEAU_ERR("state validate failed\n"); + return FALSE; + } + + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return FALSE; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + switch (ib_size) { + case 1: + nv40_draw_elements_u08(nv40, map, start, count); + break; + case 2: + nv40_draw_elements_u16(nv40, map, start, count); + break; + case 4: + nv40_draw_elements_u32(nv40, map, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + ws->buffer_unmap(ws, ib); + + return TRUE; +} + +static boolean +nv40_draw_elements_vbo(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + unsigned nr, type; + boolean ret; + + switch (ib_size) { + case 2: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + return FALSE; + } + + ret = nv40_vbo_validate_state(nv40, ib, type); + if (!ret) { + NOUVEAU_ERR("failed state validation\n"); + return FALSE; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (count & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = count >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + return TRUE; +} + +boolean +nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + /* 0x4497 doesn't support real index buffers, and there doesn't appear + * to be support on any chipset for 8-bit indices. + */ + if (nv40->hw->curie->grclass == NV44TCL || indexSize == 1) { + nv40_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count); + } else { + nv40_draw_elements_vbo(pipe, indexBuffer, indexSize, + mode, start, count); + } + + pipe->flush(pipe, 0); + return TRUE; +} + + diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c new file mode 100644 index 00000000000..9f4738b8308 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -0,0 +1,790 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 2. Arb. swz/negation + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) + +struct nv40_vpc { + struct nv40_vertex_program *vp; + + struct nv40_vertex_program_exec *vpi; + + unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; + + int high_temp; + int temp_temp_count; + + struct nv40_sreg *imm; + unsigned nr_imm; +}; + +static struct nv40_sreg +temp(struct nv40_vpc *vpc) +{ + int idx; + + idx = vpc->temp_temp_count++; + idx += vpc->high_temp + 1; + return nv40_sr(NV40SR_TEMP, idx); +} + +static struct nv40_sreg +constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv40_vertex_program *vp = vpc->vp; + struct nv40_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv40_sr(NV40SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV40SR_TEMP: + sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV40SR_INPUT: + sr |= (NV40_VP_SRC_REG_TYPE_INPUT << + NV40_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); + break; + case NV40SR_CONST: + sr |= (NV40_VP_SRC_REG_TYPE_CONST << + NV40_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV40SR_NONE: + sr |= (NV40_VP_SRC_REG_TYPE_INPUT << + NV40_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV40_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + + switch (pos) { + case 0: + hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> + NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << + NV40_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> + NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << + NV40_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +{ + struct nv40_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV40SR_TEMP: + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } + break; + case NV40SR_OUTPUT: + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + default: + break; + } + + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + break; + default: + assert(0); + } +} + +static void +nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, + struct nv40_sreg s2) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); + + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + if (vpc->high_temp < fsrc->SrcRegister.Index) + vpc->high_temp = fsrc->SrcRegister.Index; + src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv40_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = nv40_sr(NV40SR_OUTPUT, + vpc->output_map[fdst->DstRegister.Index]); + + break; + case TGSI_FILE_TEMPORARY: + dst = nv40_sr(NV40SR_TEMP, fdst->DstRegister.Index); + if (vpc->high_temp < dst.index) + vpc->high_temp = dst.index; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv40_sreg src[3], dst, tmp; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int mask; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + vpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + /*XXX: index comparison is broken now that consts come from + * two different register files. + */ + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_VP_INST_DEST_POS; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV40_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->output_map[fdec->u.DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv40_vertprog_prepare(struct nv40_vpc *vpc) +{ + struct tgsi_parse_context p; + int nr_imm = 0; + + tgsi_parse_init(&p, vpc->vp->pipe->tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); + assert(vpc->imm); + } + + return TRUE; +} + +void +nv40_vertprog_translate(struct nv40_context *nv40, + struct nv40_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv40_vpc *vpc = NULL; + + vpc = CALLOC(1, sizeof(struct nv40_vpc)); + if (!vpc) + return; + vpc->vp = vp; + vpc->high_temp = -1; + + if (!nv40_vertprog_prepare(vpc)) { + free(vpc); + return; + } + + tgsi_parse_init(&parse, vp->pipe->tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_OUTPUT: + if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +// assert(imm->Immediate.Size == 4); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv40_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + free(vpc); +} + +void +nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv40->nvws; + struct pipe_winsys *ws = nv40->pipe.winsys; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + /* Translate TGSI shader into hw bytecode */ + if (!vp->translated) { + nv40_vertprog_translate(nv40, vp); + if (!vp->translated) + assert(0); + } + + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv40->hw->vp_exec_heap; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv40_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv40->hw->vp_data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv40_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV40_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (nv40->vertprog.constant_buf) { + map = ws->buffer_map(ws, nv40->vertprog.constant_buf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv40_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (map) { + ws->buffer_unmap(ws, nv40->vertprog.constant_buf); + } + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); + } +#endif + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + BEGIN_RING(curie, NV40TCL_VP_START_FROM_ID, 1); + OUT_RING (vp->exec->start); + BEGIN_RING(curie, NV40TCL_VP_ATTRIB_EN, 2); + OUT_RING (vp->ir); + OUT_RING (vp->or); + + nv40->vertprog.active = vp; +} + +void +nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) +{ + if (vp->nr_consts) + free(vp->consts); + if (vp->nr_insns) + free(vp->insns); +} + diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile new file mode 100644 index 00000000000..68eb49ff2a3 --- /dev/null +++ b/src/gallium/drivers/nv50/Makefile @@ -0,0 +1,25 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv50 + +DRIVER_SOURCES = \ + nv50_clear.c \ + nv50_context.c \ + nv50_draw.c \ + nv50_miptree.c \ + nv50_query.c \ + nv50_state.c \ + nv50_surface.c \ + nv50_vbo.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c new file mode 100644 index 00000000000..552b92f72e2 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +void +nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c new file mode 100644 index 00000000000..3c5a54bfd38 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -0,0 +1,202 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "nv50_context.h" + +static boolean +nv50_is_format_supported(struct pipe_context *pipe, enum pipe_format format, + uint type) +{ + return FALSE; +} + +static const char * +nv50_get_name(struct pipe_context *pipe) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", nv50->chipset); + return buffer; +} + +static const char * +nv50_get_vendor(struct pipe_context *pipe) +{ + return "nouveau"; +} + +static int +nv50_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 32; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv50_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv50_flush(struct pipe_context *pipe, unsigned flags) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nouveau_winsys *nvws = nv50->nvws; + + if (flags & PIPE_FLUSH_WAIT) { + nvws->notifier_reset(nv50->sync, 0); + BEGIN_RING(tesla, 0x104, 1); + OUT_RING (0); + BEGIN_RING(tesla, 0x100, 1); + OUT_RING (0); + } + + FIRE_RING(); + + if (flags & PIPE_FLUSH_WAIT) + nvws->notifier_wait(nv50->sync, 0, 0, 2000); +} + +static void +nv50_destroy(struct pipe_context *pipe) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + + draw_destroy(nv50->draw); + free(nv50); +} + +static boolean +nv50_init_hwctx(struct nv50_context *nv50, int tesla_class) +{ + struct nouveau_winsys *nvws = nv50->nvws; + int ret; + + if ((ret = nvws->grobj_alloc(nvws, tesla_class, &nv50->tesla))) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + BEGIN_RING(tesla, NV50TCL_DMA_NOTIFY, 1); + OUT_RING (nv50->sync->handle); + + FIRE_RING (); + return TRUE; +} + +#define GRCLASS5097_CHIPSETS 0x00000000 +#define GRCLASS8297_CHIPSETS 0x00000010 +struct pipe_context * +nv50_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv50_context *nv50; + int tesla_class, ret; + + if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) { + NOUVEAU_ERR("Not a G8x chipset\n"); + return NULL; + } + + if (GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) { + tesla_class = 0x5097; + } else + if (GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) { + tesla_class = 0x8297; + } else { + NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); + return NULL; + } + + nv50 = CALLOC_STRUCT(nv50_context); + if (!nv50) + return NULL; + nv50->chipset = chipset; + nv50->nvws = nvws; + + if ((ret = nvws->notifier_alloc(nvws, 1, &nv50->sync))) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + free(nv50); + return NULL; + } + + if (!nv50_init_hwctx(nv50, tesla_class)) { + free(nv50); + return NULL; + } + + nv50->pipe.winsys = pipe_winsys; + + nv50->pipe.destroy = nv50_destroy; + nv50->pipe.is_format_supported = nv50_is_format_supported; + nv50->pipe.get_name = nv50_get_name; + nv50->pipe.get_vendor = nv50_get_vendor; + nv50->pipe.get_param = nv50_get_param; + nv50->pipe.get_paramf = nv50_get_paramf; + + nv50->pipe.draw_arrays = nv50_draw_arrays; + nv50->pipe.draw_elements = nv50_draw_elements; + nv50->pipe.clear = nv50_clear; + + nv50->pipe.flush = nv50_flush; + + nv50_init_miptree_functions(nv50); + nv50_init_surface_functions(nv50); + nv50_init_state_functions(nv50); + nv50_init_query_functions(nv50); + + nv50->draw = draw_create(); + assert(nv50->draw); + draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + + return &nv50->pipe; +} + + diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h new file mode 100644 index 00000000000..b99254f6191 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -0,0 +1,57 @@ +#ifndef __NV50_CONTEXT_H__ +#define __NV50_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv50_context *ctx = nv50 +#include "nouveau/nouveau_push.h" + +#include "nv50_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +struct nv50_context { + struct pipe_context pipe; + struct nouveau_winsys *nvws; + + struct draw_context *draw; + + int chipset; + struct nouveau_grobj *tesla; + struct nouveau_notifier *sync; +}; + + +extern void nv50_init_miptree_functions(struct nv50_context *nv50); +extern void nv50_init_surface_functions(struct nv50_context *nv50); +extern void nv50_init_state_functions(struct nv50_context *nv50); +extern void nv50_init_query_functions(struct nv50_context *nv50); + +/* nv50_draw.c */ +extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); + +/* nv50_vbo.c */ +extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv50_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nv50_clear.c */ +extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c new file mode 100644 index 00000000000..c6ed6838c6a --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -0,0 +1,55 @@ +#include "draw/draw_private.h" +#include "pipe/p_util.h" + +#include "nv50_context.h" + +struct nv50_draw_stage { + struct draw_stage draw; + struct nv50_context *nv50; +}; + +static void +nv50_draw_point(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_draw_line(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_draw_tri(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_draw_flush(struct draw_stage *draw, unsigned flags) +{ +} + +static void +nv50_draw_reset_stipple_counter(struct draw_stage *draw) +{ + NOUVEAU_ERR("\n"); +} + +struct draw_stage * +nv50_draw_render_stage(struct nv50_context *nv50) +{ + struct nv50_draw_stage *nv50draw = CALLOC_STRUCT(nv50_draw_stage); + + nv50draw->nv50 = nv50; + nv50draw->draw.draw = nv50->draw; + nv50draw->draw.point = nv50_draw_point; + nv50draw->draw.line = nv50_draw_line; + nv50draw->draw.tri = nv50_draw_tri; + nv50draw->draw.flush = nv50_draw_flush; + nv50draw->draw.reset_stipple_counter = nv50_draw_reset_stipple_counter; + + return &nv50draw->draw; +} + diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c new file mode 100644 index 00000000000..0c034ed4387 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -0,0 +1,25 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + +#include "nv50_context.h" + +static struct pipe_texture * +nv50_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) +{ + NOUVEAU_ERR("unimplemented\n"); + return NULL; +} + +static void +nv50_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + NOUVEAU_ERR("unimplemented\n"); +} + +void +nv50_init_miptree_functions(struct nv50_context *nv50) +{ + nv50->pipe.texture_create = nv50_miptree_create; + nv50->pipe.texture_release = nv50_miptree_release; +} diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c new file mode 100644 index 00000000000..d8c3491c2c5 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -0,0 +1,47 @@ +#include "pipe/p_context.h" + +#include "nv50_context.h" + +static struct pipe_query * +nv50_query_create(struct pipe_context *pipe, unsigned type) +{ + NOUVEAU_ERR("unimplemented\n"); + return NULL; +} + +static void +nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *q) +{ + NOUVEAU_ERR("unimplemented\n"); +} + +static void +nv50_query_begin(struct pipe_context *pipe, struct pipe_query *q) +{ + NOUVEAU_ERR("unimplemented\n"); +} + +static void +nv50_query_end(struct pipe_context *pipe, struct pipe_query *q) +{ + NOUVEAU_ERR("unimplemented\n"); +} + +static boolean +nv50_query_result(struct pipe_context *pipe, struct pipe_query *q, + boolean wait, uint64 *result) +{ + NOUVEAU_ERR("unimplemented\n"); + *result = 0xdeadcafe; + return TRUE; +} + +void +nv50_init_query_functions(struct nv50_context *nv50) +{ + nv50->pipe.create_query = nv50_query_create; + nv50->pipe.destroy_query = nv50_query_destroy; + nv50->pipe.begin_query = nv50_query_begin; + nv50->pipe.end_query = nv50_query_end; + nv50->pipe.get_query_result = nv50_query_result; +} diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c new file mode 100644 index 00000000000..99dcab51b26 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -0,0 +1,213 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + +#include "nv50_context.h" +#include "nv50_state.h" + +static void * +nv50_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + return NULL; +} + +static void +nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ +} + +static void +nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +} + +static void * +nv50_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + return NULL; +} + +static void +nv50_sampler_state_bind(struct pipe_context *pipe, unsigned unit, + void *hwcso) +{ +} + +static void +nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +} + +static void +nv50_set_sampler_texture(struct pipe_context *pipe, unsigned unit, + struct pipe_texture *pt) +{ +} + +static void * +nv50_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + return NULL; +} + +static void +nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ +} + +static void +nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +} + +static void * +nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + return NULL; +} + +static void +nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ +} + +static void +nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ +} + +static void * +nv50_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return NULL; +} + +static void +nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +} + +static void +nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +} + +static void * +nv50_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + return NULL; +} + +static void +nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +} + +static void +nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +} + +static void +nv50_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ +} + +static void +nv50_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ +} + +static void +nv50_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ +} + +static void +nv50_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ +} + +static void +nv50_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ +} + +static void +nv50_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ +} + +static void +nv50_set_vertex_buffer(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_buffer *vb) +{ +} + +static void +nv50_set_vertex_element(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_element *ve) +{ +} + +void +nv50_init_state_functions(struct nv50_context *nv50) +{ + nv50->pipe.create_blend_state = nv50_blend_state_create; + nv50->pipe.bind_blend_state = nv50_blend_state_bind; + nv50->pipe.delete_blend_state = nv50_blend_state_delete; + + nv50->pipe.create_sampler_state = nv50_sampler_state_create; + nv50->pipe.bind_sampler_state = nv50_sampler_state_bind; + nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; + nv50->pipe.set_sampler_texture = nv50_set_sampler_texture; + + nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; + nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; + nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete; + + nv50->pipe.create_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_create; + nv50->pipe.bind_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_bind; + nv50->pipe.delete_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_delete; + + nv50->pipe.create_vs_state = nv50_vp_state_create; + nv50->pipe.bind_vs_state = nv50_vp_state_bind; + nv50->pipe.delete_vs_state = nv50_vp_state_delete; + + nv50->pipe.create_fs_state = nv50_fp_state_create; + nv50->pipe.bind_fs_state = nv50_fp_state_bind; + nv50->pipe.delete_fs_state = nv50_fp_state_delete; + + nv50->pipe.set_blend_color = nv50_set_blend_color; + nv50->pipe.set_clip_state = nv50_set_clip_state; + nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; + nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state; + nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple; + nv50->pipe.set_scissor_state = nv50_set_scissor_state; + nv50->pipe.set_viewport_state = nv50_set_viewport_state; + + nv50->pipe.set_vertex_buffer = nv50_set_vertex_buffer; + nv50->pipe.set_vertex_element = nv50_set_vertex_element; +} + diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h new file mode 100644 index 00000000000..a3b781d4c61 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -0,0 +1,7 @@ +#ifndef __NV50_STATE_H__ +#define __NV50_STATE_H__ + +#include "pipe/p_state.h" + + +#endif diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c new file mode 100644 index 00000000000..ca92ff02b89 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -0,0 +1,75 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv50_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/p_tile.h" + +static struct pipe_surface * +nv50_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + NOUVEAU_ERR("unimplemented\n"); + return NULL; +} + +static void +nv50_surface_copy(struct pipe_context *pipe, unsigned flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nouveau_winsys *nvws = nv50->nvws; + + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); +} + +static void +nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nouveau_winsys *nvws = nv50->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv50_init_surface_functions(struct nv50_context *nv50) +{ + nv50->pipe.get_tex_surface = nv50_get_tex_surface; + nv50->pipe.surface_copy = nv50_surface_copy; + nv50->pipe.surface_fill = nv50_surface_fill; +} diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c new file mode 100644 index 00000000000..6c0dc23a439 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -0,0 +1,24 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "nv50_context.h" +#include "nv50_state.h" + +boolean +nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, + unsigned count) +{ + NOUVEAU_ERR("unimplemented\n"); + return TRUE; +} + +boolean +nv50_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + NOUVEAU_ERR("unimplemented\n"); + return TRUE; +} + diff --git a/src/gallium/winsys/dri/nouveau/Makefile b/src/gallium/winsys/dri/nouveau/Makefile new file mode 100644 index 00000000000..b463f218fd5 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/Makefile @@ -0,0 +1,43 @@ + +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = nouveau_dri.so + +MINIGLX_SOURCES = + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/nv30/libnv30.a \ + $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a + +DRIVER_SOURCES = \ + nouveau_bo.c \ + nouveau_channel.c \ + nouveau_context.c \ + nouveau_device.c \ + nouveau_dma.c \ + nouveau_fence.c \ + nouveau_grobj.c \ + nouveau_lock.c \ + nouveau_notifier.c \ + nouveau_pushbuf.c \ + nouveau_resource.c \ + nouveau_screen.c \ + nouveau_swapbuffers.c \ + nouveau_winsys.c \ + nouveau_winsys_pipe.c \ + nouveau_winsys_softpipe.c \ + nv04_surface.c \ + nv50_surface.c + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: diff --git a/src/gallium/winsys/dri/nouveau/nouveau_bo.c b/src/gallium/winsys/dri/nouveau/nouveau_bo.c new file mode 100644 index 00000000000..6887ffa6886 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_bo.c @@ -0,0 +1,402 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <errno.h> +#include <assert.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" +#include "nouveau_local.h" + +static void +nouveau_mem_free(struct nouveau_device *dev, struct drm_nouveau_mem_alloc *ma, + void **map) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + struct drm_nouveau_mem_free mf; + + if (map && *map) { + drmUnmap(*map, ma->size); + *map = NULL; + } + + if (ma->size) { + mf.offset = ma->offset; + mf.flags = ma->flags; + drmCommandWrite(nvdev->fd, DRM_NOUVEAU_MEM_FREE, + &mf, sizeof(mf)); + ma->size = 0; + } +} + +static int +nouveau_mem_alloc(struct nouveau_device *dev, unsigned size, unsigned align, + uint32_t flags, struct drm_nouveau_mem_alloc *ma, void **map) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + int ret; + + ma->alignment = align; + ma->size = size; + ma->flags = flags; + if (map) + ma->flags |= NOUVEAU_MEM_MAPPED; + ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_MEM_ALLOC, ma, + sizeof(struct drm_nouveau_mem_alloc)); + if (ret) + return ret; + + if (map) { + ret = drmMap(nvdev->fd, ma->map_handle, ma->size, map); + if (ret) { + *map = NULL; + nouveau_mem_free(dev, ma, map); + return ret; + } + } + + return 0; +} + +static void +nouveau_bo_tmp_del(void *priv) +{ + struct nouveau_resource *r = priv; + + nouveau_fence_ref(NULL, (struct nouveau_fence **)&r->priv); + nouveau_resource_free(&r); +} + +static struct nouveau_resource * +nouveau_bo_tmp(struct nouveau_channel *chan, unsigned size, + struct nouveau_fence *fence) +{ + struct nouveau_device_priv *nvdev = nouveau_device(chan->device); + struct nouveau_resource *r = NULL; + struct nouveau_fence *ref = NULL; + + if (fence) + nouveau_fence_ref(fence, &ref); + else + nouveau_fence_new(chan, &ref); + assert(ref); + + while (nouveau_resource_alloc(nvdev->sa_heap, size, ref, &r)) { + nouveau_fence_flush(chan); + } + nouveau_fence_signal_cb(ref, nouveau_bo_tmp_del, r); + + return r; +} + +int +nouveau_bo_init(struct nouveau_device *dev) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + int ret; + + ret = nouveau_mem_alloc(dev, 128*1024, 0, NOUVEAU_MEM_AGP | + NOUVEAU_MEM_PCI, &nvdev->sa, &nvdev->sa_map); + if (ret) + return ret; + + ret = nouveau_resource_init(&nvdev->sa_heap, 0, nvdev->sa.size); + if (ret) { + nouveau_mem_free(dev, &nvdev->sa, &nvdev->sa_map); + return ret; + } + + return 0; +} + +void +nouveau_bo_takedown(struct nouveau_device *dev) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + nouveau_mem_free(dev, &nvdev->sa, &nvdev->sa_map); +} + +int +nouveau_bo_new(struct nouveau_device *dev, uint32_t flags, int align, + int size, struct nouveau_bo **bo) +{ + struct nouveau_bo_priv *nvbo; + int ret; + + if (!dev || !bo || *bo) + return -EINVAL; + + nvbo = calloc(1, sizeof(struct nouveau_bo_priv)); + if (!nvbo) + return -ENOMEM; + nvbo->base.device = dev; + nvbo->base.size = size; + nvbo->base.handle = bo_to_ptr(nvbo); + nvbo->drm.alignment = align; + nvbo->refcount = 1; + + ret = nouveau_bo_set_status(&nvbo->base, flags); + if (ret) { + free(nvbo); + return ret; + } + + *bo = &nvbo->base; + return 0; +} + +int +nouveau_bo_user(struct nouveau_device *dev, void *ptr, int size, + struct nouveau_bo **bo) +{ + struct nouveau_bo_priv *nvbo; + + if (!dev || !bo || *bo) + return -EINVAL; + + nvbo = calloc(1, sizeof(*nvbo)); + if (!nvbo) + return -ENOMEM; + nvbo->base.device = dev; + + nvbo->sysmem = ptr; + nvbo->user = 1; + + nvbo->base.size = size; + nvbo->base.offset = nvbo->drm.offset; + nvbo->base.handle = bo_to_ptr(nvbo); + nvbo->refcount = 1; + *bo = &nvbo->base; + return 0; +} + +int +nouveau_bo_ref(struct nouveau_device *dev, uint64_t handle, + struct nouveau_bo **bo) +{ + struct nouveau_bo_priv *nvbo = ptr_to_bo(handle); + + if (!dev || !bo || *bo) + return -EINVAL; + + nvbo->refcount++; + *bo = &nvbo->base; + return 0; +} + +void +nouveau_bo_del(struct nouveau_bo **bo) +{ + struct nouveau_bo_priv *nvbo; + + if (!bo || !*bo) + return; + nvbo = nouveau_bo(*bo); + *bo = NULL; + + if (--nvbo->refcount) + return; + + if (nvbo->fence) + nouveau_fence_wait(&nvbo->fence); + nouveau_mem_free(nvbo->base.device, &nvbo->drm, &nvbo->map); + if (nvbo->sysmem && !nvbo->user) + free(nvbo->sysmem); + free(nvbo); +} + +int +nouveau_bo_map(struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + + if (!nvbo) + return -EINVAL; + + if (flags & NOUVEAU_BO_WR) + nouveau_fence_wait(&nvbo->fence); + else + nouveau_fence_wait(&nvbo->wr_fence); + + if (nvbo->sysmem) + bo->map = nvbo->sysmem; + else + bo->map = nvbo->map; + return 0; +} + +void +nouveau_bo_unmap(struct nouveau_bo *bo) +{ + bo->map = NULL; +} + +static int +nouveau_bo_upload(struct nouveau_bo_priv *nvbo) +{ + if (nvbo->fence) + nouveau_fence_wait(&nvbo->fence); + memcpy(nvbo->map, nvbo->sysmem, nvbo->drm.size); + return 0; +} + +int +nouveau_bo_set_status(struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + struct drm_nouveau_mem_alloc new; + void *new_map = NULL, *new_sysmem = NULL; + unsigned new_flags = 0, ret; + + assert(!bo->map); + + /* Check current memtype vs requested, if they match do nothing */ + if ((nvbo->drm.flags & NOUVEAU_MEM_FB) && (flags & NOUVEAU_BO_VRAM)) + return 0; + if ((nvbo->drm.flags & NOUVEAU_MEM_AGP) && (flags & NOUVEAU_BO_GART)) + return 0; + if (nvbo->drm.size == 0 && nvbo->sysmem && (flags & NOUVEAU_BO_LOCAL)) + return 0; + + memset(&new, 0x00, sizeof(new)); + + /* Allocate new memory */ + if (flags & NOUVEAU_BO_VRAM) + new_flags |= NOUVEAU_MEM_FB; + else + if (flags & NOUVEAU_BO_GART) + new_flags |= (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI); + + if (new_flags) { + ret = nouveau_mem_alloc(bo->device, bo->size, + nvbo->drm.alignment, new_flags, + &new, &new_map); + if (ret) + return ret; + } else { + new_sysmem = malloc(bo->size); + } + + /* Copy old -> new */ + /*XXX: use M2MF */ + if (nvbo->sysmem || nvbo->map) { + nouveau_bo_map(bo, NOUVEAU_BO_RD); + memcpy(new_map, bo->map, bo->size); + nouveau_bo_unmap(bo); + } + + /* Free old memory */ + if (nvbo->fence) + nouveau_fence_wait(&nvbo->fence); + nouveau_mem_free(bo->device, &nvbo->drm, &nvbo->map); + if (nvbo->sysmem) + free(nvbo->sysmem); + + nvbo->drm = new; + nvbo->map = new_map; + nvbo->sysmem = new_sysmem; + bo->flags = flags; + bo->offset = nvbo->drm.offset; + return 0; +} + +static int +nouveau_bo_validate_user(struct nouveau_channel *chan, struct nouveau_bo *bo, + struct nouveau_fence *fence, uint32_t flags) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_device_priv *nvdev = nouveau_device(chan->device); + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + struct nouveau_resource *r; + + if (nvchan->user_charge + bo->size > nvdev->sa.size) + return 1; + nvchan->user_charge += bo->size; + + if (!(flags & NOUVEAU_BO_GART)) + return 1; + + r = nouveau_bo_tmp(chan, bo->size, fence); + if (!r) + return 1; + + memcpy(nvdev->sa_map + r->start, nvbo->sysmem, bo->size); + + nvbo->offset = nvdev->sa.offset + r->start; + nvbo->flags = NOUVEAU_BO_GART; + return 0; +} + +static int +nouveau_bo_validate_bo(struct nouveau_channel *chan, struct nouveau_bo *bo, + struct nouveau_fence *fence, uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + int ret; + + ret = nouveau_bo_set_status(bo, flags); + if (ret) + return ret; + + if (nvbo->user) + nouveau_bo_upload(nvbo); + + nvbo->offset = nvbo->drm.offset; + if (nvbo->drm.flags & (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI)) + nvbo->flags = NOUVEAU_BO_GART; + else + nvbo->flags = NOUVEAU_BO_VRAM; + + return 0; +} + +int +nouveau_bo_validate(struct nouveau_channel *chan, struct nouveau_bo *bo, + struct nouveau_fence *fence, uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + int ret; + + assert(bo->map == NULL); + + if (nvbo->user) { + ret = nouveau_bo_validate_user(chan, bo, fence, flags); + if (ret) { + ret = nouveau_bo_validate_bo(chan, bo, fence, flags); + if (ret) + return ret; + } + } else { + ret = nouveau_bo_validate_bo(chan, bo, fence, flags); + if (ret) + return ret; + } + + if (flags & NOUVEAU_BO_WR) + nouveau_fence_ref(fence, &nvbo->wr_fence); + nouveau_fence_ref(fence, &nvbo->fence); + return 0; +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_channel.c b/src/gallium/winsys/dri/nouveau/nouveau_channel.c new file mode 100644 index 00000000000..df80d04add5 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_channel.c @@ -0,0 +1,118 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" + +int +nouveau_channel_alloc(struct nouveau_device *dev, uint32_t fb_ctxdma, + uint32_t tt_ctxdma, struct nouveau_channel **chan) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + struct nouveau_channel_priv *nvchan; + int ret; + + if (!nvdev || !chan || *chan) + return -EINVAL; + + nvchan = calloc(1, sizeof(struct nouveau_channel_priv)); + if (!nvchan) + return -ENOMEM; + nvchan->base.device = dev; + + nvchan->drm.fb_ctxdma_handle = fb_ctxdma; + nvchan->drm.tt_ctxdma_handle = tt_ctxdma; + ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_CHANNEL_ALLOC, + &nvchan->drm, sizeof(nvchan->drm)); + if (ret) { + free(nvchan); + return ret; + } + + nvchan->base.id = nvchan->drm.channel; + if (nouveau_grobj_ref(&nvchan->base, nvchan->drm.fb_ctxdma_handle, + &nvchan->base.vram) || + nouveau_grobj_ref(&nvchan->base, nvchan->drm.tt_ctxdma_handle, + &nvchan->base.gart)) { + nouveau_channel_free((void *)&nvchan); + return -EINVAL; + } + + ret = drmMap(nvdev->fd, nvchan->drm.ctrl, nvchan->drm.ctrl_size, + (void*)&nvchan->user); + if (ret) { + nouveau_channel_free((void *)&nvchan); + return ret; + } + nvchan->put = &nvchan->user[0x40/4]; + nvchan->get = &nvchan->user[0x44/4]; + nvchan->ref_cnt = &nvchan->user[0x48/4]; + + ret = drmMap(nvdev->fd, nvchan->drm.notifier, nvchan->drm.notifier_size, + (drmAddressPtr)&nvchan->notifier_block); + if (ret) { + nouveau_channel_free((void *)&nvchan); + return ret; + } + + ret = drmMap(nvdev->fd, nvchan->drm.cmdbuf, nvchan->drm.cmdbuf_size, + (void*)&nvchan->pushbuf); + if (ret) { + nouveau_channel_free((void *)&nvchan); + return ret; + } + + nouveau_dma_channel_init(&nvchan->base); + nouveau_pushbuf_init(&nvchan->base); + + *chan = &nvchan->base; + return 0; +} + +void +nouveau_channel_free(struct nouveau_channel **chan) +{ + struct nouveau_channel_priv *nvchan; + struct nouveau_device_priv *nvdev; + struct drm_nouveau_channel_free cf; + + if (!chan || !*chan) + return; + nvchan = nouveau_channel(*chan); + *chan = NULL; + nvdev = nouveau_device(nvchan->base.device); + + FIRE_RING_CH(&nvchan->base); + + nouveau_grobj_free(&nvchan->base.vram); + nouveau_grobj_free(&nvchan->base.gart); + + cf.channel = nvchan->drm.channel; + drmCommandWrite(nvdev->fd, DRM_NOUVEAU_CHANNEL_FREE, &cf, sizeof(cf)); + free(nvchan); +} + + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c new file mode 100644 index 00000000000..01fada5b89b --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -0,0 +1,206 @@ +#include "main/glheader.h" +#include "glapi/glthread.h" +#include <GL/internal/glcore.h> +#include "utils.h" + +#include "state_tracker/st_public.h" +#include "pipe/p_defines.h" +#include "pipe/p_context.h" + +#include "nouveau_context.h" +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_winsys_pipe.h" + +#ifdef DEBUG +static const struct dri_debug_control debug_control[] = { + { "bo", DEBUG_BO }, + { NULL, 0 } +}; +int __nouveau_debug = 0; +#endif + +GLboolean +nouveau_context_create(const __GLcontextModes *glVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv; + struct nouveau_screen *nv_screen = driScrnPriv->private; + struct nouveau_context *nv = CALLOC_STRUCT(nouveau_context); + struct nouveau_device_priv *nvdev; + struct pipe_context *pipe = NULL; + struct st_context *st_share = NULL; + int ret; + + if (sharedContextPrivate) { + st_share = ((struct nouveau_context *)sharedContextPrivate)->st; + } + + if ((ret = nouveau_device_get_param(nv_screen->device, + NOUVEAU_GETPARAM_CHIPSET_ID, + &nv->chipset))) { + NOUVEAU_ERR("Error determining chipset id: %d\n", ret); + return GL_FALSE; + } + + if ((ret = nouveau_channel_alloc(nv_screen->device, + 0x8003d001, 0x8003d002, + &nv->channel))) { + NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); + return GL_FALSE; + } + + driContextPriv->driverPrivate = (void *)nv; + nv->nv_screen = nv_screen; + nv->dri_screen = driScrnPriv; + + nvdev = nouveau_device(nv_screen->device); + nvdev->ctx = driContextPriv->hHWContext; + nvdev->lock = (drmLock *)&driScrnPriv->pSAREA->lock; + + driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, + nv->dri_screen->myNum, "nouveau"); +#ifdef DEBUG + __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), + debug_control); +#endif + + /*XXX: Hack up a fake region and buffer object for front buffer. + * This will go away with TTM, replaced with a simple reference + * of the front buffer handle passed to us by the DDX. + */ + { + struct pipe_surface *fb_surf; + struct nouveau_pipe_buffer *fb_buf; + struct nouveau_bo_priv *fb_bo; + + fb_bo = calloc(1, sizeof(struct nouveau_bo_priv)); + fb_bo->drm.offset = nv_screen->front_offset; + fb_bo->drm.flags = NOUVEAU_MEM_FB; + fb_bo->drm.size = nv_screen->front_pitch * + nv_screen->front_height; + fb_bo->refcount = 1; + fb_bo->base.flags = NOUVEAU_BO_PIN | NOUVEAU_BO_VRAM; + fb_bo->base.offset = fb_bo->drm.offset; + fb_bo->base.handle = (unsigned long)fb_bo; + fb_bo->base.size = fb_bo->drm.size; + fb_bo->base.device = nv_screen->device; + + fb_buf = calloc(1, sizeof(struct nouveau_pipe_buffer)); + fb_buf->bo = &fb_bo->base; + + fb_surf = calloc(1, sizeof(struct pipe_surface)); + fb_surf->cpp = nv_screen->front_cpp; + fb_surf->pitch = nv_screen->front_pitch / fb_surf->cpp; + fb_surf->height = nv_screen->front_height; + fb_surf->refcount = 1; + fb_surf->buffer = &fb_buf->base; + + nv->frontbuffer = fb_surf; + } + + if ((ret = nouveau_grobj_alloc(nv->channel, 0x00000000, 0x30, + &nv->NvNull))) { + NOUVEAU_ERR("Error creating NULL object: %d\n", ret); + return GL_FALSE; + } + nv->next_handle = 0x80000000; + + if ((ret = nouveau_notifier_alloc(nv->channel, nv->next_handle++, 1, + &nv->sync_notifier))) { + NOUVEAU_ERR("Error creating channel sync notifier: %d\n", ret); + return GL_FALSE; + } + + if (nv->chipset < 0x50) + ret = nouveau_surface_init_nv04(nv); + else + ret = nouveau_surface_init_nv50(nv); + if (ret) { + return GL_FALSE; + } + + if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { + pipe = nouveau_pipe_create(nv); + if (!pipe) + NOUVEAU_ERR("Couldn't create hw pipe\n"); + } + + if (!pipe) { + NOUVEAU_MSG("Using softpipe\n"); + pipe = nouveau_create_softpipe(nv); + if (!pipe) { + NOUVEAU_ERR("Error creating pipe, bailing\n"); + return GL_FALSE; + } + } + + pipe->priv = nv; + nv->st = st_create_context(pipe, glVis, st_share); + return GL_TRUE; +} + +void +nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) +{ + struct nouveau_context *nv = driContextPriv->driverPrivate; + + assert(nv); + + st_flush(nv->st, PIPE_FLUSH_WAIT); + st_destroy_context(nv->st); + + nouveau_grobj_free(&nv->NvCtxSurf2D); + nouveau_grobj_free(&nv->NvImageBlit); + nouveau_channel_free(&nv->channel); + + free(nv); +} + +GLboolean +nouveau_context_bind(__DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv) +{ + struct nouveau_context *nv; + struct nouveau_framebuffer *draw, *read; + + if (!driContextPriv) { + st_make_current(NULL, NULL, NULL); + return GL_TRUE; + } + + nv = driContextPriv->driverPrivate; + draw = driDrawPriv->driverPrivate; + read = driReadPriv->driverPrivate; + + st_make_current(nv->st, draw->stfb, read->stfb); + + if ((nv->dri_drawable != driDrawPriv) || + (nv->last_stamp != driDrawPriv->lastStamp)) { + nv->dri_drawable = driDrawPriv; + st_resize_framebuffer(draw->stfb, driDrawPriv->w, + driDrawPriv->h); + nv->last_stamp = driDrawPriv->lastStamp; + } + + if (driDrawPriv != driReadPriv) { + st_resize_framebuffer(read->stfb, driReadPriv->w, + driReadPriv->h); + } + + return GL_TRUE; +} + +GLboolean +nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) +{ + struct nouveau_context *nv = driContextPriv->driverPrivate; + (void)nv; + + st_flush(nv->st, 0); + return GL_TRUE; +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.h b/src/gallium/winsys/dri/nouveau/nouveau_context.h new file mode 100644 index 00000000000..5805f969ba8 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.h @@ -0,0 +1,89 @@ +#ifndef __NOUVEAU_CONTEXT_H__ +#define __NOUVEAU_CONTEXT_H__ + +#include "dri_util.h" +#include "xmlconfig.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau_device.h" +#include "nouveau_drmif.h" +#include "nouveau_dma.h" + +struct nouveau_framebuffer { + struct st_framebuffer *stfb; +}; + +struct nouveau_context { + struct st_context *st; + + /* Misc HW info */ + uint64_t chipset; + + /* DRI stuff */ + __DRIscreenPrivate *dri_screen; + __DRIdrawablePrivate *dri_drawable; + unsigned int last_stamp; + driOptionCache dri_option_cache; + drm_context_t drm_context; + drmLock drm_lock; + GLboolean locked; + struct nouveau_screen *nv_screen; + struct pipe_surface *frontbuffer; + + /* Hardware context */ + struct nouveau_channel *channel; + struct nouveau_notifier *sync_notifier; + struct nouveau_grobj *NvNull; + struct nouveau_grobj *NvCtxSurf2D; + struct nouveau_grobj *NvImageBlit; + struct nouveau_grobj *NvGdiRect; + struct nouveau_grobj *NvM2MF; + struct nouveau_grobj *Nv2D; + uint32_t next_handle; + uint32_t next_subchannel; + uint32_t next_sequence; + + /* pipe_surface accel */ + struct pipe_surface *surf_src, *surf_dst; + unsigned surf_src_offset, surf_dst_offset; + int (*surface_copy_prep)(struct nouveau_context *, + struct pipe_surface *dst, + struct pipe_surface *src); + void (*surface_copy)(struct nouveau_context *, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h); + void (*surface_copy_done)(struct nouveau_context *); + int (*surface_fill)(struct nouveau_context *, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned, unsigned); +}; + +extern GLboolean nouveau_context_create(const __GLcontextModes *, + __DRIcontextPrivate *, void *); +extern void nouveau_context_destroy(__DRIcontextPrivate *); +extern GLboolean nouveau_context_bind(__DRIcontextPrivate *, + __DRIdrawablePrivate *draw, + __DRIdrawablePrivate *read); +extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *); + +#ifdef DEBUG +extern int __nouveau_debug; + +#define DEBUG_BO (1 << 0) + +#define DBG(flag, ...) do { \ + if (__nouveau_debug & (DEBUG_##flag)) \ + NOUVEAU_ERR(__VA_ARGS__); \ +} while(0) +#else +#define DBG(flag, ...) +#endif + +extern void LOCK_HARDWARE(struct nouveau_context *); +extern void UNLOCK_HARDWARE(struct nouveau_context *); + +extern int nouveau_surface_init_nv04(struct nouveau_context *); +extern int nouveau_surface_init_nv50(struct nouveau_context *); + +extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int); +extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *); + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_device.c b/src/gallium/winsys/dri/nouveau/nouveau_device.c new file mode 100644 index 00000000000..409e4415f76 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_device.c @@ -0,0 +1,146 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> + +#include "nouveau_drmif.h" + +int +nouveau_device_open_existing(struct nouveau_device **dev, int close, + int fd, drm_context_t ctx) +{ + struct nouveau_device_priv *nvdev; + int ret; + + if (!dev || *dev) + return -EINVAL; + + nvdev = calloc(1, sizeof(*nvdev)); + if (!nvdev) + return -ENOMEM; + nvdev->fd = fd; + nvdev->ctx = ctx; + nvdev->needs_close = close; + + drmCommandNone(nvdev->fd, DRM_NOUVEAU_CARD_INIT); + + if ((ret = nouveau_bo_init(&nvdev->base))) { + nouveau_device_close((void *)&nvdev); + return ret; + } + + *dev = &nvdev->base; + return 0; +} + +int +nouveau_device_open(struct nouveau_device **dev, const char *busid) +{ + drm_context_t ctx; + int fd, ret; + + if (!dev || *dev) + return -EINVAL; + + fd = drmOpen("nouveau", busid); + if (fd < 0) + return -EINVAL; + + ret = drmCreateContext(fd, &ctx); + if (ret) { + drmClose(fd); + return ret; + } + + ret = nouveau_device_open_existing(dev, 1, fd, ctx); + if (ret) { + drmDestroyContext(fd, ctx); + drmClose(fd); + return ret; + } + + return 0; +} + +void +nouveau_device_close(struct nouveau_device **dev) +{ + struct nouveau_device_priv *nvdev; + + if (dev || !*dev) + return; + nvdev = nouveau_device(*dev); + *dev = NULL; + + nouveau_bo_takedown(&nvdev->base); + + if (nvdev->needs_close) { + drmDestroyContext(nvdev->fd, nvdev->ctx); + drmClose(nvdev->fd); + } + free(nvdev); +} + +int +nouveau_device_get_param(struct nouveau_device *dev, + uint64_t param, uint64_t *value) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + struct drm_nouveau_getparam g; + int ret; + + if (!nvdev || !value) + return -EINVAL; + + g.param = param; + ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_GETPARAM, + &g, sizeof(g)); + if (ret) + return ret; + + *value = g.value; + return 0; +} + +int +nouveau_device_set_param(struct nouveau_device *dev, + uint64_t param, uint64_t value) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + struct drm_nouveau_setparam s; + int ret; + + if (!nvdev) + return -EINVAL; + + s.param = param; + s.value = value; + ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_SETPARAM, + &s, sizeof(s)); + if (ret) + return ret; + + return 0; +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_device.h b/src/gallium/winsys/dri/nouveau/nouveau_device.h new file mode 100644 index 00000000000..744a89f74bf --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_device.h @@ -0,0 +1,29 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_DEVICE_H__ +#define __NOUVEAU_DEVICE_H__ + +struct nouveau_device { +}; + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_dma.c b/src/gallium/winsys/dri/nouveau/nouveau_dma.c new file mode 100644 index 00000000000..f8a8ba04f6d --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_dma.c @@ -0,0 +1,219 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdint.h> +#include <assert.h> +#include <errno.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" +#include "nouveau_local.h" + +static inline uint32_t +READ_GET(struct nouveau_channel_priv *nvchan) +{ + return *nvchan->get; +} + +static inline void +WRITE_PUT(struct nouveau_channel_priv *nvchan, uint32_t val) +{ + uint32_t put = ((val << 2) + nvchan->dma->base); + volatile int dum; + + NOUVEAU_DMA_BARRIER; + dum = READ_GET(nvchan); + + *nvchan->put = put; + nvchan->dma->put = val; +#ifdef NOUVEAU_DMA_TRACE + NOUVEAU_MSG("WRITE_PUT %d/0x%08x\n", nvchan->drm.channel, put); +#endif + + NOUVEAU_DMA_BARRIER; +} + +static inline int +LOCAL_GET(struct nouveau_dma_priv *dma, uint32_t *val) +{ + uint32_t get = *val; + + if (get >= dma->base && get <= (dma->base + (dma->max << 2))) { + *val = (get - dma->base) >> 2; + return 1; + } + + return 0; +} + +void +nouveau_dma_channel_init(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + int i; + + nvchan->dma = &nvchan->dma_master; + nvchan->dma->base = nvchan->drm.put_base; + nvchan->dma->cur = nvchan->dma->put = 0; + nvchan->dma->max = (nvchan->drm.cmdbuf_size >> 2) - 2; + nvchan->dma->free = nvchan->dma->max - nvchan->dma->cur; + + RING_SPACE_CH(chan, RING_SKIPS); + for (i = 0; i < RING_SKIPS; i++) + OUT_RING_CH(chan, 0); +} + +#define CHECK_TIMEOUT() do { \ + if ((NOUVEAU_TIME_MSEC() - t_start) > NOUVEAU_DMA_TIMEOUT) \ + return - EBUSY; \ +} while(0) + +int +nouveau_dma_wait(struct nouveau_channel *chan, int size) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + uint32_t get, t_start; + + FIRE_RING_CH(chan); + + t_start = NOUVEAU_TIME_MSEC(); + while (dma->free < size) { + CHECK_TIMEOUT(); + + get = READ_GET(nvchan); + if (!LOCAL_GET(dma, &get)) + continue; + + if (dma->put >= get) { + dma->free = dma->max - dma->cur; + + if (dma->free < size) { +#ifdef NOUVEAU_DMA_DEBUG + dma->push_free = 1; +#endif + OUT_RING_CH(chan, 0x20000000 | dma->base); + if (get <= RING_SKIPS) { + /*corner case - will be idle*/ + if (dma->put <= RING_SKIPS) + WRITE_PUT(nvchan, + RING_SKIPS + 1); + + do { + CHECK_TIMEOUT(); + get = READ_GET(nvchan); + if (!LOCAL_GET(dma, &get)) + get = 0; + } while (get <= RING_SKIPS); + } + + WRITE_PUT(nvchan, RING_SKIPS); + dma->cur = dma->put = RING_SKIPS; + dma->free = get - (RING_SKIPS + 1); + } + } else { + dma->free = get - dma->cur - 1; + } + } + + return 0; +} + +#ifdef NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF +static void +nouveau_dma_parse_pushbuf(struct nouveau_channel *chan, int get, int put) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + unsigned mthd_count = 0; + + while (get != put) { + uint32_t gpuget = (get << 2) + nvchan->drm.put_base; + uint32_t data; + + if (get < 0 || get >= nvchan->drm.cmdbuf_size) { + NOUVEAU_ERR("DMA_PT 0x%08x\n", gpuget); + assert(0); + } + data = nvchan->pushbuf[get++]; + + if (mthd_count) { + NOUVEAU_MSG("0x%08x 0x%08x\n", gpuget, data); + mthd_count--; + continue; + } + + switch (data & 0x60000000) { + case 0x00000000: + mthd_count = (data >> 18) & 0x7ff; + NOUVEAU_MSG("0x%08x 0x%08x MTHD " + "Sc %d Mthd 0x%04x Size %d\n", + gpuget, data, (data>>13) & 7, data & 0x1ffc, + mthd_count); + break; + case 0x20000000: + get = (data & 0x1ffffffc) >> 2; + NOUVEAU_MSG("0x%08x 0x%08x JUMP 0x%08x\n", + gpuget, data, data & 0x1ffffffc); + continue; + case 0x40000000: + mthd_count = (data >> 18) & 0x7ff; + NOUVEAU_MSG("0x%08x 0x%08x NINC " + "Sc %d Mthd 0x%04x Size %d\n", + gpuget, data, (data>>13) & 7, data & 0x1ffc, + mthd_count); + break; + case 0x60000000: + /* DMA_OPCODE_CALL apparently, doesn't seem to work on + * my NV40 at least.. + */ + /* fall-through */ + default: + NOUVEAU_MSG("DMA_PUSHER 0x%08x 0x%08x\n", + gpuget, data); + assert(0); + } + } +} +#endif + +void +nouveau_dma_kickoff(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + + if (dma->cur == dma->put) + return; + +#ifdef NOUVEAU_DMA_DEBUG + if (dma->push_free) { + NOUVEAU_ERR("Packet incomplete: %d left\n", dma->push_free); + return; + } +#endif + +#ifdef NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF + nouveau_dma_parse_pushbuf(chan, dma->put, dma->cur); +#endif + + WRITE_PUT(nvchan, dma->cur); +} diff --git a/src/gallium/winsys/dri/nouveau/nouveau_dma.h b/src/gallium/winsys/dri/nouveau/nouveau_dma.h new file mode 100644 index 00000000000..cfa6d26e828 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_dma.h @@ -0,0 +1,143 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_DMA_H__ +#define __NOUVEAU_DMA_H__ + +#include <string.h> +#include "nouveau_drmif.h" +#include "nouveau_local.h" + +#define RING_SKIPS 8 + +extern int nouveau_dma_wait(struct nouveau_channel *chan, int size); +extern void nouveau_dma_subc_bind(struct nouveau_grobj *); +extern void nouveau_dma_channel_init(struct nouveau_channel *); +extern void nouveau_dma_kickoff(struct nouveau_channel *); + +#ifdef NOUVEAU_DMA_DEBUG +static char faulty[1024]; +#endif + +static inline void +nouveau_dma_out(struct nouveau_channel *chan, uint32_t data) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + +#ifdef NOUVEAU_DMA_DEBUG + if (dma->push_free == 0) { + NOUVEAU_ERR("No space left in packet at %s\n", faulty); + return; + } + dma->push_free--; +#endif +#ifdef NOUVEAU_DMA_TRACE + { + uint32_t offset = (dma->cur << 2) + dma->base; + NOUVEAU_MSG("\tOUT_RING %d/0x%08x -> 0x%08x\n", + nvchan->drm.channel, offset, data); + } +#endif + nvchan->pushbuf[dma->cur + (dma->base - nvchan->drm.put_base)/4] = data; + dma->cur++; +} + +static inline void +nouveau_dma_outp(struct nouveau_channel *chan, uint32_t *ptr, int size) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + (void)dma; + +#ifdef NOUVEAU_DMA_DEBUG + if (dma->push_free < size) { + NOUVEAU_ERR("Packet too small. Free=%d, Need=%d\n", + dma->push_free, size); + return; + } +#endif +#ifdef NOUVEAU_DMA_TRACE + while (size--) { + nouveau_dma_out(chan, *ptr); + ptr++; + } +#else + memcpy(&nvchan->pushbuf[dma->cur], ptr, size << 2); +#ifdef NOUVEAU_DMA_DEBUG + dma->push_free -= size; +#endif + dma->cur += size; +#endif +} + +static inline void +nouveau_dma_space(struct nouveau_channel *chan, int size) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + + if (dma->free < size) { + if (nouveau_dma_wait(chan, size) && chan->hang_notify) + chan->hang_notify(chan); + } + dma->free -= size; +#ifdef NOUVEAU_DMA_DEBUG + dma->push_free = size; +#endif +} + +static inline void +nouveau_dma_begin(struct nouveau_channel *chan, struct nouveau_grobj *grobj, + int method, int size, const char* file, int line) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + (void)dma; + +#ifdef NOUVEAU_DMA_TRACE + NOUVEAU_MSG("BEGIN_RING %d/%08x/%d/0x%04x/%d\n", nvchan->drm.channel, + grobj->handle, grobj->subc, method, size); +#endif + +#ifdef NOUVEAU_DMA_DEBUG + if (dma->push_free) { + NOUVEAU_ERR("Previous packet incomplete: %d left at %s\n", + dma->push_free, faulty); + return; + } + sprintf(faulty,"%s:%d",file,line); +#endif + + nouveau_dma_space(chan, (size + 1)); + nouveau_dma_out(chan, (size << 18) | (grobj->subc << 13) | method); +} + +#define RING_SPACE_CH(ch,sz) nouveau_dma_space((ch), (sz)) +#define BEGIN_RING_CH(ch,gr,m,sz) nouveau_dma_begin((ch), (gr), (m), (sz), __FUNCTION__, __LINE__ ) +#define OUT_RING_CH(ch, data) nouveau_dma_out((ch), (data)) +#define OUT_RINGp_CH(ch,ptr,dwords) nouveau_dma_outp((ch), (void*)(ptr), \ + (dwords)) +#define FIRE_RING_CH(ch) nouveau_dma_kickoff((ch)) +#define WAIT_RING_CH(ch,sz) nouveau_dma_wait((ch), (sz)) + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_dri.h b/src/gallium/winsys/dri/nouveau/nouveau_dri.h new file mode 100644 index 00000000000..1207c2d609c --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_dri.h @@ -0,0 +1,28 @@ +#ifndef _NOUVEAU_DRI_ +#define _NOUVEAU_DRI_ + +#include "xf86drm.h" +#include "drm.h" +#include "nouveau_drm.h" + +struct nouveau_dri { + uint32_t device_id; /**< \brief PCI device ID */ + uint32_t width; /**< \brief width in pixels of display */ + uint32_t height; /**< \brief height in scanlines of display */ + uint32_t depth; /**< \brief depth of display (8, 15, 16, 24) */ + uint32_t bpp; /**< \brief bit depth of display (8, 16, 24, 32) */ + + uint32_t bus_type; /**< \brief ths bus type */ + uint32_t bus_mode; /**< \brief bus mode (used for AGP, maybe also for PCI-E ?) */ + + uint32_t front_offset; /**< \brief front buffer offset */ + uint32_t front_pitch; /**< \brief front buffer pitch */ + uint32_t back_offset; /**< \brief private back buffer offset */ + uint32_t back_pitch; /**< \brief private back buffer pitch */ + uint32_t depth_offset; /**< \brief private depth buffer offset */ + uint32_t depth_pitch; /**< \brief private depth buffer pitch */ + +}; + +#endif + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_drmif.h b/src/gallium/winsys/dri/nouveau/nouveau_drmif.h new file mode 100644 index 00000000000..37e404fc6cb --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_drmif.h @@ -0,0 +1,304 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_DRMIF_H__ +#define __NOUVEAU_DRMIF_H__ + +#include <stdint.h> +#include <xf86drm.h> +#include <nouveau_drm.h> + +#include "nouveau_device.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_notifier.h" +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" + +struct nouveau_device_priv { + struct nouveau_device base; + + int fd; + drm_context_t ctx; + drmLock *lock; + int needs_close; + + struct drm_nouveau_mem_alloc sa; + void *sa_map; + struct nouveau_resource *sa_heap; +}; +#define nouveau_device(n) ((struct nouveau_device_priv *)(n)) + +extern int +nouveau_device_open_existing(struct nouveau_device **, int close, + int fd, drm_context_t ctx); + +extern int +nouveau_device_open(struct nouveau_device **, const char *busid); + +extern void +nouveau_device_close(struct nouveau_device **); + +extern int +nouveau_device_get_param(struct nouveau_device *, uint64_t param, uint64_t *v); + +extern int +nouveau_device_set_param(struct nouveau_device *, uint64_t param, uint64_t val); + +struct nouveau_fence { + struct nouveau_channel *channel; +}; + +struct nouveau_fence_cb { + struct nouveau_fence_cb *next; + void (*func)(void *); + void *priv; +}; + +struct nouveau_fence_priv { + struct nouveau_fence base; + int refcount; + + struct nouveau_fence *next; + struct nouveau_fence_cb *signal_cb; + + uint32_t sequence; + int emitted; + int signalled; +}; +#define nouveau_fence(n) ((struct nouveau_fence_priv *)(n)) + +extern int +nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **); + +extern int +nouveau_fence_ref(struct nouveau_fence *, struct nouveau_fence **); + +extern int +nouveau_fence_signal_cb(struct nouveau_fence *, void (*)(void *), void *); + +extern void +nouveau_fence_emit(struct nouveau_fence *); + +extern int +nouveau_fence_wait(struct nouveau_fence **); + +extern void +nouveau_fence_flush(struct nouveau_channel *); + +struct nouveau_pushbuf_reloc { + uint64_t next; + uint64_t handle; + uint32_t *ptr; + uint32_t flags; + uint32_t data; + uint32_t vor; + uint32_t tor; +}; + +struct nouveau_pushbuf_bo { + uint64_t next; + uint64_t handle; + uint64_t flags; + uint64_t relocs; + int nr_relocs; +}; + +struct nouveau_pushbuf_priv { + struct nouveau_pushbuf base; + + unsigned nop_jump; + unsigned start; + unsigned size; + + uint64_t buffers; + int nr_buffers; +}; +#define nouveau_pushbuf(n) ((struct nouveau_pushbuf_priv *)(n)) + +#define pbbo_to_ptr(o) ((uint64_t)(unsigned long)(o)) +#define ptr_to_pbbo(h) ((struct nouveau_pushbuf_bo *)(unsigned long)(h)) +#define pbrel_to_ptr(o) ((uint64_t)(unsigned long)(o)) +#define ptr_to_pbrel(h) ((struct nouveau_pushbuf_reloc *)(unsigned long)(h)) +#define bo_to_ptr(o) ((uint64_t)(unsigned long)(o)) +#define ptr_to_bo(h) ((struct nouveau_bo_priv *)(unsigned long)(h)) + +extern int +nouveau_pushbuf_init(struct nouveau_channel *); + +extern int +nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); + +extern int +nouveau_pushbuf_emit_reloc(struct nouveau_channel *, void *ptr, + struct nouveau_bo *, uint32_t data, uint32_t flags, + uint32_t vor, uint32_t tor); + +struct nouveau_dma_priv { + uint32_t base; + uint32_t max; + uint32_t cur; + uint32_t put; + uint32_t free; + + int push_free; +} dma; + +struct nouveau_channel_priv { + struct nouveau_channel base; + + struct drm_nouveau_channel_alloc drm; + + uint32_t *pushbuf; + void *notifier_block; + + volatile uint32_t *user; + volatile uint32_t *put; + volatile uint32_t *get; + volatile uint32_t *ref_cnt; + + struct nouveau_dma_priv dma_master; + struct nouveau_dma_priv dma_bufmgr; + struct nouveau_dma_priv *dma; + + struct nouveau_fence *fence_head; + struct nouveau_fence *fence_tail; + uint32_t fence_sequence; + + struct nouveau_pushbuf_priv pb; + + unsigned user_charge; +}; +#define nouveau_channel(n) ((struct nouveau_channel_priv *)(n)) + +extern int +nouveau_channel_alloc(struct nouveau_device *, uint32_t fb, uint32_t tt, + struct nouveau_channel **); + +extern void +nouveau_channel_free(struct nouveau_channel **); + +struct nouveau_grobj_priv { + struct nouveau_grobj base; +}; +#define nouveau_grobj(n) ((struct nouveau_grobj_priv *)(n)) + +extern int nouveau_grobj_alloc(struct nouveau_channel *, uint32_t handle, + int class, struct nouveau_grobj **); +extern int nouveau_grobj_ref(struct nouveau_channel *, uint32_t handle, + struct nouveau_grobj **); +extern void nouveau_grobj_free(struct nouveau_grobj **); + + +struct nouveau_notifier_priv { + struct nouveau_notifier base; + + struct drm_nouveau_notifierobj_alloc drm; + volatile void *map; +}; +#define nouveau_notifier(n) ((struct nouveau_notifier_priv *)(n)) + +extern int +nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle, int count, + struct nouveau_notifier **); + +extern void +nouveau_notifier_free(struct nouveau_notifier **); + +extern void +nouveau_notifier_reset(struct nouveau_notifier *, int id); + +extern uint32_t +nouveau_notifier_status(struct nouveau_notifier *, int id); + +extern uint32_t +nouveau_notifier_return_val(struct nouveau_notifier *, int id); + +extern int +nouveau_notifier_wait_status(struct nouveau_notifier *, int id, int status, + int timeout); + +struct nouveau_bo_priv { + struct nouveau_bo base; + + struct nouveau_fence *fence; + struct nouveau_fence *wr_fence; + + struct drm_nouveau_mem_alloc drm; + void *map; + + void *sysmem; + int user; + + int refcount; + + uint64_t offset; + uint64_t flags; +}; +#define nouveau_bo(n) ((struct nouveau_bo_priv *)(n)) + +extern int +nouveau_bo_init(struct nouveau_device *); + +extern void +nouveau_bo_takedown(struct nouveau_device *); + +extern int +nouveau_bo_new(struct nouveau_device *, uint32_t flags, int align, int size, + struct nouveau_bo **); + +extern int +nouveau_bo_user(struct nouveau_device *, void *ptr, int size, + struct nouveau_bo **); + +extern int +nouveau_bo_ref(struct nouveau_device *, uint64_t handle, struct nouveau_bo **); + +extern int +nouveau_bo_set_status(struct nouveau_bo *, uint32_t flags); + +extern void +nouveau_bo_del(struct nouveau_bo **); + +extern int +nouveau_bo_map(struct nouveau_bo *, uint32_t flags); + +extern void +nouveau_bo_unmap(struct nouveau_bo *); + +extern int +nouveau_bo_validate(struct nouveau_channel *, struct nouveau_bo *, + struct nouveau_fence *fence, uint32_t flags); + +extern int +nouveau_resource_init(struct nouveau_resource **heap, unsigned start, + unsigned size); + +extern int +nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, + struct nouveau_resource **); + +extern void +nouveau_resource_free(struct nouveau_resource **); + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_fence.c b/src/gallium/winsys/dri/nouveau/nouveau_fence.c new file mode 100644 index 00000000000..7714e6f2485 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_fence.c @@ -0,0 +1,215 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> +#include <assert.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" +#include "nouveau_local.h" + +static void +nouveau_fence_del_unsignalled(struct nouveau_fence *fence) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(fence->channel); + struct nouveau_fence *le; + + if (nvchan->fence_head == fence) { + nvchan->fence_head = nouveau_fence(fence)->next; + if (nvchan->fence_head == NULL) + nvchan->fence_tail = NULL; + return; + } + + le = nvchan->fence_head; + while (le && nouveau_fence(le)->next != fence) + le = nouveau_fence(le)->next; + assert(le && nouveau_fence(le)->next == fence); + nouveau_fence(le)->next = nouveau_fence(fence)->next; + if (nvchan->fence_tail == fence) + nvchan->fence_tail = le; +} + +static void +nouveau_fence_del(struct nouveau_fence **fence) +{ + struct nouveau_fence_priv *nvfence; + + if (!fence || !*fence) + return; + nvfence = nouveau_fence(*fence); + *fence = NULL; + + if (--nvfence->refcount) + return; + + if (nvfence->emitted && !nvfence->signalled) { + if (nvfence->signal_cb) { + nvfence->refcount++; + nouveau_fence_wait((void *)&nvfence); + return; + } + + nouveau_fence_del_unsignalled(&nvfence->base); + } + free(nvfence); +} + +int +nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **fence) +{ + struct nouveau_fence_priv *nvfence; + + if (!chan || !fence || *fence) + return -EINVAL; + + nvfence = calloc(1, sizeof(struct nouveau_fence_priv)); + if (!nvfence) + return -ENOMEM; + nvfence->base.channel = chan; + nvfence->refcount = 1; + + *fence = &nvfence->base; + return 0; +} + +int +nouveau_fence_ref(struct nouveau_fence *ref, struct nouveau_fence **fence) +{ + struct nouveau_fence_priv *nvfence; + + if (!fence) + return -EINVAL; + + if (*fence) { + nouveau_fence_del(fence); + *fence = NULL; + } + + if (ref) { + nvfence = nouveau_fence(ref); + nvfence->refcount++; + *fence = &nvfence->base; + } + + return 0; +} + +int +nouveau_fence_signal_cb(struct nouveau_fence *fence, void (*func)(void *), + void *priv) +{ + struct nouveau_fence_priv *nvfence = nouveau_fence(fence); + struct nouveau_fence_cb *cb; + + if (!nvfence || !func) + return -EINVAL; + + cb = malloc(sizeof(struct nouveau_fence_cb)); + if (!cb) + return -ENOMEM; + + cb->func = func; + cb->priv = priv; + cb->next = nvfence->signal_cb; + nvfence->signal_cb = cb; + return 0; +} + +void +nouveau_fence_emit(struct nouveau_fence *fence) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(fence->channel); + struct nouveau_fence_priv *nvfence = nouveau_fence(fence); + + nvfence->emitted = 1; + nvfence->sequence = ++nvchan->fence_sequence; + if (nvfence->sequence == 0xffffffff) + NOUVEAU_ERR("AII wrap unhandled\n"); + + /*XXX: assumes subc 0 is populated */ + RING_SPACE_CH(fence->channel, 2); + OUT_RING_CH (fence->channel, 0x00040050); + OUT_RING_CH (fence->channel, nvfence->sequence); + + if (nvchan->fence_tail) { + nouveau_fence(nvchan->fence_tail)->next = fence; + } else { + nvchan->fence_head = fence; + } + nvchan->fence_tail = fence; +} + +void +nouveau_fence_flush(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + uint32_t sequence = *nvchan->ref_cnt; + + while (nvchan->fence_head) { + struct nouveau_fence_priv *nvfence; + + nvfence = nouveau_fence(nvchan->fence_head); + if (nvfence->sequence > sequence) + break; + + nouveau_fence_del_unsignalled(&nvfence->base); + nvfence->signalled = 1; + + if (nvfence->signal_cb) { + struct nouveau_fence *fence = NULL; + + nouveau_fence_ref(nvchan->fence_head, &fence); + + while (nvfence->signal_cb) { + struct nouveau_fence_cb *cb; + + cb = nvfence->signal_cb; + nvfence->signal_cb = cb->next; + cb->func(cb->priv); + free(cb); + } + + nouveau_fence_ref(NULL, &fence); + } + } +} + +int +nouveau_fence_wait(struct nouveau_fence **fence) +{ + struct nouveau_fence_priv *nvfence; + + if (!fence || !*fence) + return -EINVAL; + nvfence = nouveau_fence(*fence); + + if (nvfence->emitted) { + while (!nvfence->signalled) + nouveau_fence_flush(nvfence->base.channel); + } + nouveau_fence_ref(NULL, fence); + + return 0; +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_grobj.c b/src/gallium/winsys/dri/nouveau/nouveau_grobj.c new file mode 100644 index 00000000000..55dfeb99aa7 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_grobj.c @@ -0,0 +1,107 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> + +#include "nouveau_drmif.h" + +int +nouveau_grobj_alloc(struct nouveau_channel *chan, uint32_t handle, + int class, struct nouveau_grobj **grobj) +{ + struct nouveau_device_priv *nvdev = nouveau_device(chan->device); + struct nouveau_grobj_priv *nvgrobj; + struct drm_nouveau_grobj_alloc g; + int ret; + + if (!nvdev || !grobj || *grobj) + return -EINVAL; + + nvgrobj = calloc(1, sizeof(*nvgrobj)); + if (!nvgrobj) + return -ENOMEM; + nvgrobj->base.channel = chan; + nvgrobj->base.handle = handle; + nvgrobj->base.grclass = class; + + g.channel = chan->id; + g.handle = handle; + g.class = class; + ret = drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GROBJ_ALLOC, + &g, sizeof(g)); + if (ret) { + nouveau_grobj_free((void *)&grobj); + return ret; + } + + *grobj = &nvgrobj->base; + return 0; +} + +int +nouveau_grobj_ref(struct nouveau_channel *chan, uint32_t handle, + struct nouveau_grobj **grobj) +{ + struct nouveau_grobj_priv *nvgrobj; + + if (!chan || !grobj || *grobj) + return -EINVAL; + + nvgrobj = calloc(1, sizeof(struct nouveau_grobj_priv)); + if (!nvgrobj) + return -ENOMEM; + nvgrobj->base.channel = chan; + nvgrobj->base.handle = handle; + nvgrobj->base.grclass = 0; + + *grobj = &nvgrobj->base; + return 0; +} + +void +nouveau_grobj_free(struct nouveau_grobj **grobj) +{ + struct nouveau_device_priv *nvdev; + struct nouveau_channel_priv *chan; + struct nouveau_grobj_priv *nvgrobj; + + if (!grobj || !*grobj) + return; + nvgrobj = nouveau_grobj(*grobj); + *grobj = NULL; + + + chan = nouveau_channel(nvgrobj->base.channel); + nvdev = nouveau_device(chan->base.device); + + if (nvgrobj->base.grclass) { + struct drm_nouveau_gpuobj_free f; + + f.channel = chan->drm.channel; + f.handle = nvgrobj->base.handle; + drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, + &f, sizeof(f)); + } + free(nvgrobj); +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_local.h b/src/gallium/winsys/dri/nouveau/nouveau_local.h new file mode 100644 index 00000000000..59febca2929 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_local.h @@ -0,0 +1,89 @@ +#ifndef __NOUVEAU_LOCAL_H__ +#define __NOUVEAU_LOCAL_H__ + +#include <stdio.h> + +/* Debug output */ +#define NOUVEAU_MSG(fmt, args...) do { \ + fprintf(stdout, "nouveau: "fmt, ##args); \ + fflush(stdout); \ +} while(0) + +#define NOUVEAU_ERR(fmt, args...) do { \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); \ + fflush(stderr); \ +} while(0) + +#define NOUVEAU_TIME_MSEC() 0 + +/* User FIFO control */ +//#define NOUVEAU_DMA_TRACE +//#define NOUVEAU_DMA_DEBUG +//#define NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF +#define NOUVEAU_DMA_BARRIER +#define NOUVEAU_DMA_TIMEOUT 2000 + +/* Push buffer access macros */ +#define OUT_RING(data) do { \ + (*nv->channel->pushbuf->cur++) = (data); \ +} while(0) + +#define OUT_RINGp(src,size) do { \ + memcpy(nv->channel->pushbuf->cur, (src), (size)<<2); \ + nv->channel->pushbuf->cur += (size); \ +} while(0) + +#define OUT_RINGf(data) do { \ + union { float v; uint32_t u; } c; \ + c.v = (data); \ + OUT_RING(c.u); \ +} while(0) + +#define FIRE_RING() do { \ + nouveau_pushbuf_flush(nv->channel, 0); \ +} while(0) + +#define BEGIN_RING_GR(obj,mthd,size) do { \ + if (nv->channel->pushbuf->remaining < ((size) + 1)) \ + nouveau_pushbuf_flush(nv->channel, ((size) + 1)); \ + OUT_RING(((obj)->subc << 13) | ((size) << 18) | (mthd)); \ + nv->channel->pushbuf->remaining -= ((size) + 1); \ +} while(0) + +#define BEGIN_RING(obj,mthd,size) do { \ + BEGIN_RING_GR(nv->obj, (mthd), (size)); \ +} while(0) + +#define BIND_RING(o,s) do { \ + nv->o->subc = (s); \ + BEGIN_RING(o, 0x0000, 1); \ + OUT_RING (nv->o->handle); \ +} while(0) + +#define OUT_RELOC(buf,data,flags,vor,tor) do { \ + nouveau_pipe_emit_reloc(nv->channel, nv->channel->pushbuf->cur++, \ + buf, (data), (flags), (vor), (tor)); \ +} while(0) + +/* Raw data + flags depending on FB/TT buffer */ +#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ + OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ +} while(0) + +/* FB/TT object handle */ +#define OUT_RELOCo(bo,flags) do { \ + OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ + nv->channel->vram->handle, nv->channel->gart->handle); \ +} while(0) + +/* Low 32-bits of offset */ +#define OUT_RELOCl(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ +} while(0) + +/* High 32-bits of offset */ +#define OUT_RELOCh(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ +} while(0) + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_lock.c b/src/gallium/winsys/dri/nouveau/nouveau_lock.c new file mode 100644 index 00000000000..9adb9ac8547 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_lock.c @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "glapi/glthread.h" +#include <GL/internal/glcore.h> + +#include "nouveau_context.h" +#include "nouveau_screen.h" + +_glthread_DECLARE_STATIC_MUTEX( lockMutex ); + +static void +nouveau_contended_lock(struct nouveau_context *nv, GLuint flags) +{ + __DRIdrawablePrivate *dPriv = nv->dri_drawable; + __DRIscreenPrivate *sPriv = nv->dri_screen; + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + drmGetLock(nvdev->fd, nvdev->ctx, flags); + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dPriv) + DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); +} + +/* Lock the hardware and validate our state. + */ +void +LOCK_HARDWARE(struct nouveau_context *nv) +{ + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + char __ret=0; + + _glthread_LOCK_MUTEX(lockMutex); + assert(!nv->locked); + + DRM_CAS(nvdev->lock, nvdev->ctx, + (DRM_LOCK_HELD | nvdev->ctx), __ret); + + if (__ret) + nouveau_contended_lock(nv, 0); + nv->locked = GL_TRUE; +} + + + /* Unlock the hardware using the global current context + */ +void +UNLOCK_HARDWARE(struct nouveau_context *nv) +{ + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + assert(nv->locked); + nv->locked = GL_FALSE; + + DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); + + _glthread_UNLOCK_MUTEX(lockMutex); +} diff --git a/src/gallium/winsys/dri/nouveau/nouveau_notifier.c b/src/gallium/winsys/dri/nouveau/nouveau_notifier.c new file mode 100644 index 00000000000..01e8f38440e --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_notifier.c @@ -0,0 +1,137 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> + +#include "nouveau_drmif.h" +#include "nouveau_local.h" + +#define NOTIFIER(__v) \ + struct nouveau_notifier_priv *nvnotify = nouveau_notifier(notifier); \ + volatile uint32_t *__v = (void*)nvnotify->map + (id * 32) + +int +nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle, + int count, struct nouveau_notifier **notifier) +{ + struct nouveau_notifier_priv *nvnotify; + int ret; + + if (!chan || !notifier || *notifier) + return -EINVAL; + + nvnotify = calloc(1, sizeof(struct nouveau_notifier_priv)); + if (!nvnotify) + return -ENOMEM; + nvnotify->base.channel = chan; + nvnotify->base.handle = handle; + + nvnotify->drm.channel = chan->id; + nvnotify->drm.handle = handle; + nvnotify->drm.count = count; + if ((ret = drmCommandWriteRead(nouveau_device(chan->device)->fd, + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, + &nvnotify->drm, + sizeof(nvnotify->drm)))) { + nouveau_notifier_free((void *)&nvnotify); + return ret; + } + + nvnotify->map = (void *)nouveau_channel(chan)->notifier_block + + nvnotify->drm.offset; + *notifier = &nvnotify->base; + return 0; +} + +void +nouveau_notifier_free(struct nouveau_notifier **notifier) +{ + + struct nouveau_notifier_priv *nvnotify; + struct nouveau_channel_priv *nvchan; + struct nouveau_device_priv *nvdev; + struct drm_nouveau_gpuobj_free f; + + if (!notifier || !*notifier) + return; + nvnotify = nouveau_notifier(*notifier); + *notifier = NULL; + + nvchan = nouveau_channel(nvnotify->base.channel); + nvdev = nouveau_device(nvchan->base.device); + + f.channel = nvchan->drm.channel; + f.handle = nvnotify->base.handle; + drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, &f, sizeof(f)); + free(nvnotify); +} + +void +nouveau_notifier_reset(struct nouveau_notifier *notifier, int id) +{ + NOTIFIER(n); + + n[NV_NOTIFY_TIME_0 /4] = 0x00000000; + n[NV_NOTIFY_TIME_1 /4] = 0x00000000; + n[NV_NOTIFY_RETURN_VALUE/4] = 0x00000000; + n[NV_NOTIFY_STATE /4] = (NV_NOTIFY_STATE_STATUS_IN_PROCESS << + NV_NOTIFY_STATE_STATUS_SHIFT); +} + +uint32_t +nouveau_notifier_status(struct nouveau_notifier *notifier, int id) +{ + NOTIFIER(n); + + return n[NV_NOTIFY_STATE/4] >> NV_NOTIFY_STATE_STATUS_SHIFT; +} + +uint32_t +nouveau_notifier_return_val(struct nouveau_notifier *notifier, int id) +{ + NOTIFIER(n); + + return n[NV_NOTIFY_RETURN_VALUE/4]; +} + +int +nouveau_notifier_wait_status(struct nouveau_notifier *notifier, int id, + int status, int timeout) +{ + NOTIFIER(n); + uint32_t time = 0, t_start = NOUVEAU_TIME_MSEC(); + + while (time <= timeout) { + uint32_t v; + + v = n[NV_NOTIFY_STATE/4] >> NV_NOTIFY_STATE_STATUS_SHIFT; + if (v == status) + return 0; + + if (timeout) + time = NOUVEAU_TIME_MSEC() - t_start; + } + + return -EBUSY; +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c b/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c new file mode 100644 index 00000000000..7d5eddb92ff --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c @@ -0,0 +1,261 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> +#include <assert.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" + +#define PB_BUFMGR_DWORDS (4096 / 2) +#define PB_MIN_USER_DWORDS 2048 + +static int +nouveau_pushbuf_space(struct nouveau_channel *chan, unsigned min) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + + assert((min + 1) <= nvchan->dma->max); + + /* Wait for enough space in push buffer */ + min = min < PB_MIN_USER_DWORDS ? PB_MIN_USER_DWORDS : min; + min += 1; /* a bit extra for the NOP */ + if (nvchan->dma->free < min) + WAIT_RING_CH(chan, min); + + /* Insert NOP, may turn into a jump later */ + RING_SPACE_CH(chan, 1); + nvpb->nop_jump = nvchan->dma->cur; + OUT_RING_CH(chan, 0); + + /* Any remaining space is available to the user */ + nvpb->start = nvchan->dma->cur; + nvpb->size = nvchan->dma->free; + nvpb->base.channel = chan; + nvpb->base.remaining = nvpb->size; + nvpb->base.cur = &nvchan->pushbuf[nvpb->start]; + + return 0; +} + +int +nouveau_pushbuf_init(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *m = &nvchan->dma_master; + struct nouveau_dma_priv *b = &nvchan->dma_bufmgr; + int i; + + if (!nvchan) + return -EINVAL; + + /* Reassign last bit of push buffer for a "separate" bufmgr + * ring buffer + */ + m->max -= PB_BUFMGR_DWORDS; + m->free -= PB_BUFMGR_DWORDS; + + b->base = m->base + ((m->max + 2) << 2); + b->max = PB_BUFMGR_DWORDS - 2; + b->cur = b->put = 0; + b->free = b->max - b->cur; + + /* Some NOPs just to be safe + *XXX: RING_SKIPS + */ + nvchan->dma = b; + RING_SPACE_CH(chan, 8); + for (i = 0; i < 8; i++) + OUT_RING_CH(chan, 0); + nvchan->dma = m; + + nouveau_pushbuf_space(chan, 0); + chan->pushbuf = &nvchan->pb.base; + + return 0; +} + +static uint32_t +nouveau_pushbuf_calc_reloc(struct nouveau_bo *bo, + struct nouveau_pushbuf_reloc *r) +{ + uint32_t push; + + if (r->flags & NOUVEAU_BO_LOW) { + push = bo->offset + r->data; + } else + if (r->flags & NOUVEAU_BO_HIGH) { + push = (bo->offset + r->data) >> 32; + } else { + push = r->data; + } + + if (r->flags & NOUVEAU_BO_OR) { + if (bo->flags & NOUVEAU_BO_VRAM) + push |= r->vor; + else + push |= r->tor; + } + + return push; +} + +/* This would be our TTM "superioctl" */ +int +nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + struct nouveau_pushbuf_bo *pbbo; + struct nouveau_fence *fence = NULL; + int ret; + + if (nvpb->base.remaining == nvpb->size) + return 0; + + nvpb->size -= nvpb->base.remaining; + nvchan->dma->cur += nvpb->size; + nvchan->dma->free -= nvpb->size; + assert(nvchan->dma->cur <= nvchan->dma->max); + + ret = nouveau_fence_new(chan, &fence); + if (ret) + return ret; + + nvchan->dma = &nvchan->dma_bufmgr; + nvchan->pushbuf[nvpb->nop_jump] = 0x20000000 | + (nvchan->dma->base + (nvchan->dma->cur << 2)); + + /* Validate buffers + apply relocations */ + nvchan->user_charge = 0; + while ((pbbo = ptr_to_pbbo(nvpb->buffers))) { + struct nouveau_pushbuf_reloc *r; + struct nouveau_bo *bo = &ptr_to_bo(pbbo->handle)->base; + + ret = nouveau_bo_validate(chan, bo, fence, pbbo->flags); + assert (ret == 0); + + if (bo->offset == nouveau_bo(bo)->offset && + bo->flags == nouveau_bo(bo)->flags) { + while ((r = ptr_to_pbrel(pbbo->relocs))) { + pbbo->relocs = r->next; + free(r); + } + + nvpb->buffers = pbbo->next; + free(pbbo); + continue; + } + bo->offset = nouveau_bo(bo)->offset; + bo->flags = nouveau_bo(bo)->flags; + + while ((r = ptr_to_pbrel(pbbo->relocs))) { + *r->ptr = nouveau_pushbuf_calc_reloc(bo, r); + pbbo->relocs = r->next; + free(r); + } + + nvpb->buffers = pbbo->next; + free(pbbo); + } + nvpb->nr_buffers = 0; + + /* Switch back to user's ring */ + RING_SPACE_CH(chan, 1); + OUT_RING_CH(chan, 0x20000000 | ((nvpb->start << 2) + + nvchan->dma_master.base)); + nvchan->dma = &nvchan->dma_master; + + /* Fence + kickoff */ + nouveau_fence_emit(fence); + FIRE_RING_CH(chan); + nouveau_fence_ref(NULL, &fence); + + /* Allocate space for next push buffer */ + assert(!nouveau_pushbuf_space(chan, min)); + + return 0; +} + +static struct nouveau_pushbuf_bo * +nouveau_pushbuf_emit_buffer(struct nouveau_channel *chan, struct nouveau_bo *bo) +{ + struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(chan->pushbuf); + struct nouveau_pushbuf_bo *pbbo = ptr_to_pbbo(nvpb->buffers); + + while (pbbo) { + if (pbbo->handle == bo->handle) + return pbbo; + pbbo = ptr_to_pbbo(pbbo->next); + } + + pbbo = malloc(sizeof(struct nouveau_pushbuf_bo)); + pbbo->next = nvpb->buffers; + nvpb->buffers = pbbo_to_ptr(pbbo); + nvpb->nr_buffers++; + + pbbo->handle = bo_to_ptr(bo); + pbbo->flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART; + pbbo->relocs = 0; + pbbo->nr_relocs = 0; + return pbbo; +} + +int +nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr, + struct nouveau_bo *bo, uint32_t data, uint32_t flags, + uint32_t vor, uint32_t tor) +{ + struct nouveau_pushbuf_bo *pbbo; + struct nouveau_pushbuf_reloc *r; + + if (!chan) + return -EINVAL; + + pbbo = nouveau_pushbuf_emit_buffer(chan, bo); + if (!pbbo) + return -EFAULT; + + r = malloc(sizeof(struct nouveau_pushbuf_reloc)); + r->next = pbbo->relocs; + pbbo->relocs = pbrel_to_ptr(r); + pbbo->nr_relocs++; + + pbbo->flags |= (flags & NOUVEAU_BO_RDWR); + pbbo->flags &= (flags | NOUVEAU_BO_RDWR); + + r->handle = bo_to_ptr(r); + r->ptr = ptr; + r->flags = flags; + r->data = data; + r->vor = vor; + r->tor = tor; + + if (flags & NOUVEAU_BO_DUMMY) + *(uint32_t *)ptr = 0; + else + *(uint32_t *)ptr = nouveau_pushbuf_calc_reloc(bo, r); + return 0; +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_resource.c b/src/gallium/winsys/dri/nouveau/nouveau_resource.c new file mode 100644 index 00000000000..5d9d578b4fe --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_resource.c @@ -0,0 +1,111 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> + +#include "nouveau_drmif.h" +#include "nouveau_local.h" + +int +nouveau_resource_init(struct nouveau_resource **heap, + unsigned start, unsigned size) +{ + struct nouveau_resource *r; + + r = calloc(1, sizeof(struct nouveau_resource)); + if (!r) + return 1; + + r->start = start; + r->size = size; + *heap = r; + return 0; +} + +int +nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, + struct nouveau_resource **res) +{ + struct nouveau_resource *r; + + if (!heap || !size || !res || *res) + return 1; + + while (heap) { + if (!heap->in_use && heap->size >= size) { + r = calloc(1, sizeof(struct nouveau_resource)); + if (!r) + return 1; + + r->start = (heap->start + heap->size) - size; + r->size = size; + r->in_use = 1; + r->priv = priv; + + heap->size -= size; + + r->next = heap->next; + if (heap->next) + heap->next->prev = r; + r->prev = heap; + heap->next = r; + + *res = r; + return 0; + } + + heap = heap->next; + } + + return 1; +} + +void +nouveau_resource_free(struct nouveau_resource **res) +{ + struct nouveau_resource *r; + + if (!res || !*res) + return; + r = *res; + + if (r->prev && !r->prev->in_use) { + r->prev->next = r->next; + if (r->next) + r->next->prev = r->prev; + r->prev->size += r->size; + free(r); + } else + if (r->next && !r->next->in_use) { + r->next->prev = r->prev; + if (r->prev) + r->prev->next = r->next; + r->next->size += r->size; + r->next->start = r->start; + free(r); + } else { + r->in_use = 0; + } + + *res = NULL; +} diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.c b/src/gallium/winsys/dri/nouveau/nouveau_screen.c new file mode 100644 index 00000000000..f06e1784831 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.c @@ -0,0 +1,308 @@ +#include "utils.h" +#include "vblank.h" +#include "xmlpool.h" + +#include "pipe/p_context.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_cb_fbo.h" + +#include "nouveau_context.h" +#include "nouveau_device.h" +#include "nouveau_drm.h" +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" + +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 10 +#error nouveau_drm.h version does not match expected version +#endif + +/* Extension stuff, enabling of extensions handled by Gallium's GL state + * tracker. But, we still need to define the entry points we want. + */ +#define need_GL_ARB_fragment_program +#define need_GL_ARB_multisample +#define need_GL_ARB_occlusion_query +#define need_GL_ARB_point_parameters +#define need_GL_ARB_shader_objects +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_program +#define need_GL_ARB_vertex_shader +#define need_GL_ARB_vertex_buffer_object +#define need_GL_EXT_compiled_vertex_array +#define need_GL_EXT_fog_coord +#define need_GL_EXT_secondary_color +#define need_GL_EXT_framebuffer_object +#define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 +#include "extension_helper.h" + +const struct dri_extension card_extensions[] = +{ + { "GL_ARB_multisample", GL_ARB_multisample_functions }, + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, + { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, + { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, + { "GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { NULL, 0 } +}; + +PUBLIC const char __driConfigOptions[] = +DRI_CONF_BEGIN +DRI_CONF_END; +static const GLuint __driNConfigOptions = 0; + +extern const struct dri_extension common_extensions[]; +extern const struct dri_extension nv40_extensions[]; + +static GLboolean +nouveau_screen_create(__DRIscreenPrivate *driScrnPriv) +{ + struct nouveau_dri *nv_dri = driScrnPriv->pDevPriv; + struct nouveau_screen *nv_screen; + int ret; + + if (driScrnPriv->devPrivSize != sizeof(struct nouveau_dri)) { + NOUVEAU_ERR("DRI struct mismatch between DDX/DRI\n"); + return GL_FALSE; + } + + nv_screen = CALLOC_STRUCT(nouveau_screen); + if (!nv_screen) + return GL_FALSE; + nv_screen->driScrnPriv = driScrnPriv; + driScrnPriv->private = (void *)nv_screen; + + driParseOptionInfo(&nv_screen->option_cache, + __driConfigOptions, __driNConfigOptions); + + if ((ret = nouveau_device_open_existing(&nv_screen->device, 0, + driScrnPriv->fd, 0))) { + NOUVEAU_ERR("Failed opening nouveau device: %d\n", ret); + return GL_FALSE; + } + + nv_screen->front_offset = nv_dri->front_offset; + nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); + nv_screen->front_cpp = nv_dri->bpp / 8; + nv_screen->front_height = nv_dri->height; + + return GL_TRUE; +} + +static void +nouveau_screen_destroy(__DRIscreenPrivate *driScrnPriv) +{ + struct nouveau_screen *nv_screen = driScrnPriv->private; + + driScrnPriv->private = NULL; + FREE(nv_screen); +} + +static GLboolean +nouveau_create_buffer(__DRIscreenPrivate * driScrnPriv, + __DRIdrawablePrivate * driDrawPriv, + const __GLcontextModes *glVis, GLboolean pixmapBuffer) +{ + struct nouveau_framebuffer *nvfb; + enum pipe_format colour, depth, stencil; + + if (pixmapBuffer) + return GL_FALSE; + + nvfb = CALLOC_STRUCT(nouveau_framebuffer); + if (!nvfb) + return GL_FALSE; + + if (glVis->redBits == 5) + colour = PIPE_FORMAT_R5G6B5_UNORM; + else + colour = PIPE_FORMAT_A8R8G8B8_UNORM; + + if (glVis->depthBits == 16) + depth = PIPE_FORMAT_Z16_UNORM; + else if (glVis->depthBits == 24) + depth = PIPE_FORMAT_Z24S8_UNORM; + else + depth = PIPE_FORMAT_NONE; + + if (glVis->stencilBits == 8) + stencil = PIPE_FORMAT_Z24S8_UNORM; + else + stencil = PIPE_FORMAT_NONE; + + nvfb->stfb = st_create_framebuffer(glVis, colour, depth, stencil, + driDrawPriv->w, driDrawPriv->h, + (void*)nvfb); + if (!nvfb->stfb) { + free(nvfb); + return GL_FALSE; + } + + driDrawPriv->driverPrivate = (void *)nvfb; + return GL_TRUE; +} + +static void +nouveau_destroy_buffer(__DRIdrawablePrivate * driDrawPriv) +{ + struct nouveau_framebuffer *nvfb; + + nvfb = (struct nouveau_framebuffer *)driDrawPriv->driverPrivate; + st_unreference_framebuffer(&nvfb->stfb); + free(nvfb); +} + +static struct __DriverAPIRec +nouveau_api = { + .InitDriver = nouveau_screen_create, + .DestroyScreen = nouveau_screen_destroy, + .CreateContext = nouveau_context_create, + .DestroyContext = nouveau_context_destroy, + .CreateBuffer = nouveau_create_buffer, + .DestroyBuffer = nouveau_destroy_buffer, + .SwapBuffers = nouveau_swap_buffers, + .MakeCurrent = nouveau_context_bind, + .UnbindContext = nouveau_context_unbind, + .GetSwapInfo = NULL, + .GetMSC = NULL, + .WaitForMSC = NULL, + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, + .CopySubBuffer = nouveau_copy_sub_buffer, + .setTexOffset = NULL +}; + +static __GLcontextModes * +nouveau_fill_in_modes(unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer) +{ + __GLcontextModes * modes; + __GLcontextModes * m; + unsigned num_modes; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + int i; + + static const struct { + GLenum format; + GLenum type; + } fb_format_array[] = { + { GL_RGB , GL_UNSIGNED_SHORT_5_6_5 }, + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV }, + { GL_BGR , GL_UNSIGNED_INT_8_8_8_8_REV }, + }; + + /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't + * support pageflipping at all. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML + }; + + u_int8_t depth_bits_array[4] = { 0, 16, 24, 24 }; + u_int8_t stencil_bits_array[4] = { 0, 0, 0, 8 }; + + depth_buffer_factor = 4; + back_buffer_factor = (have_back_buffer) ? 3 : 1; + + num_modes = ((pixel_bits==16) ? 1 : 2) * + depth_buffer_factor * back_buffer_factor * 4; + modes = (*dri_interface->createContextModes)(num_modes, + sizeof(__GLcontextModes)); + m = modes; + + for (i=((pixel_bits==16)?0:1);i<((pixel_bits==16)?1:3);i++) { + if (!driFillInModes(&m, fb_format_array[i].format, + fb_format_array[i].type, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + GLX_TRUE_COLOR)) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + + if (!driFillInModes(&m, fb_format_array[i].format, + fb_format_array[i].type, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + GLX_DIRECT_COLOR)) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + } + + return modes; +} +PUBLIC void * +__driCreateNewScreen_20050727(__DRInativeDisplay *dpy, int scrn, + __DRIscreen *psc, const __GLcontextModes * modes, + const __DRIversion * ddx_version, + const __DRIversion * dri_version, + const __DRIversion * drm_version, + const __DRIframebuffer * frame_buffer, + void * pSAREA, int fd, int internal_api_version, + const __DRIinterfaceMethods * interface, + __GLcontextModes ** driver_modes) +{ + __DRIscreenPrivate *psp; + static const __DRIversion ddx_expected = + { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = + { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; + struct nouveau_dri *nv_dri = NULL; + + dri_interface = interface; + + if (!driCheckDriDdxDrmVersions2("nouveau", + dri_version, &dri_expected, + ddx_version, &ddx_expected, + drm_version, &drm_expected)) { + return NULL; + } + + if (drm_expected.patch != drm_version->patch) { + fprintf(stderr, "Incompatible DRM patch level.\n" + "Expected: %d\n" "Current : %d\n", + drm_expected.patch, drm_version->patch); + return NULL; + } + + psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, + ddx_version, dri_version, drm_version, + frame_buffer, pSAREA, fd, + internal_api_version, + &nouveau_api); + if (psp == NULL) + return NULL; + nv_dri = psp->pDevPriv; + + *driver_modes = nouveau_fill_in_modes(nv_dri->bpp, + (nv_dri->bpp == 16) ? 16 : 24, + (nv_dri->bpp == 16) ? 0 : 8, + 1); + + driInitExtensions(NULL, card_extensions, GL_FALSE); + + return (void *)psp; +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.h b/src/gallium/winsys/dri/nouveau/nouveau_screen.h new file mode 100644 index 00000000000..019823bd44d --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.h @@ -0,0 +1,19 @@ +#ifndef __NOUVEAU_SCREEN_H__ +#define __NOUVEAU_SCREEN_H__ + +#include "xmlconfig.h" +#include "nouveau_device.h" + +struct nouveau_screen { + __DRIscreenPrivate *driScrnPriv; + driOptionCache option_cache; + + struct nouveau_device *device; + + uint32_t front_offset; + uint32_t front_pitch; + uint32_t front_cpp; + uint32_t front_height; +}; + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c new file mode 100644 index 00000000000..91bf243f424 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c @@ -0,0 +1,86 @@ +#include "main/glheader.h" +#include "glapi/glthread.h" +#include <GL/internal/glcore.h> + +#include "pipe/p_context.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_fbo.h" + +#include "nouveau_context.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" + +void +nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, + const drm_clip_rect_t *rect) +{ + struct nouveau_context *nv = dPriv->driContextPriv->driverPrivate; + drm_clip_rect_t *pbox; + int nbox, i; + + LOCK_HARDWARE(nv); + if (!dPriv->numClipRects) { + UNLOCK_HARDWARE(nv); + return; + } + pbox = dPriv->pClipRects; + nbox = dPriv->numClipRects; + + nv->surface_copy_prep(nv, nv->frontbuffer, surf); + for (i = 0; i < nbox; i++, pbox++) { + int sx, sy, dx, dy, w, h; + + sx = pbox->x1 - dPriv->x; + sy = pbox->y1 - dPriv->y; + dx = pbox->x1; + dy = pbox->y1; + w = pbox->x2 - pbox->x1; + h = pbox->y2 - pbox->y1; + + nv->surface_copy(nv, dx, dy, sx, sy, w, h); + } + + FIRE_RING(); + UNLOCK_HARDWARE(nv); + + if (nv->last_stamp != dPriv->lastStamp) { + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + st_resize_framebuffer(nvfb->stfb, dPriv->w, dPriv->h); + nv->last_stamp = dPriv->lastStamp; + } +} + +void +nouveau_copy_sub_buffer(__DRIdrawablePrivate *dPriv, int x, int y, int w, int h) +{ + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + struct pipe_surface *surf; + + surf = st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT); + if (surf) { + drm_clip_rect_t rect; + rect.x1 = x; + rect.y1 = y; + rect.x2 = x + w; + rect.y2 = y + h; + + st_notify_swapbuffers(nvfb->stfb); + nouveau_copy_buffer(dPriv, surf, &rect); + } +} + +void +nouveau_swap_buffers(__DRIdrawablePrivate *dPriv) +{ + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + struct pipe_surface *surf; + + surf = st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT); + if (surf) { + st_notify_swapbuffers(nvfb->stfb); + nouveau_copy_buffer(dPriv, surf, NULL); + } +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.h b/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.h new file mode 100644 index 00000000000..825d3da6da5 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.h @@ -0,0 +1,10 @@ +#ifndef __NOUVEAU_SWAPBUFFERS_H__ +#define __NOUVEAU_SWAPBUFFERS_H__ + +extern void nouveau_copy_buffer(__DRIdrawablePrivate *, struct pipe_surface *, + const drm_clip_rect_t *); +extern void nouveau_copy_sub_buffer(__DRIdrawablePrivate *, + int x, int y, int w, int h); +extern void nouveau_swap_buffers(__DRIdrawablePrivate *); + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c new file mode 100644 index 00000000000..2ca05d84c60 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -0,0 +1,124 @@ +#include "pipe/p_util.h" + +#include "nouveau_context.h" +#include "nouveau_winsys_pipe.h" + +#include "nouveau/nouveau_winsys.h" + +static int +nouveau_pipe_notifier_alloc(struct nouveau_winsys *nvws, int count, + struct nouveau_notifier **notify) +{ + struct nouveau_context *nv = nvws->nv; + + return nouveau_notifier_alloc(nv->channel, nv->next_handle++, + count, notify); +} + +static int +nouveau_pipe_grobj_alloc(struct nouveau_winsys *nvws, int grclass, + struct nouveau_grobj **grobj) +{ + struct nouveau_context *nv = nvws->nv; + int ret; + + ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, + grclass, grobj); + if (ret) + return ret; + + (*grobj)->subc = nv->next_subchannel++; + assert((*grobj)->subc <= 7); + BEGIN_RING_GR(*grobj, 0x0000, 1); + OUT_RING ((*grobj)->handle); + return 0; +} + +static int +nouveau_pipe_surface_copy(struct nouveau_winsys *nvws, struct pipe_surface *dst, + unsigned dx, unsigned dy, struct pipe_surface *src, + unsigned sx, unsigned sy, unsigned w, unsigned h) +{ + struct nouveau_context *nv = nvws->nv; + + if (nv->surface_copy_prep(nv, dst, src)) + return 1; + nv->surface_copy(nv, dx, dy, sx, sy, w, h); + nv->surface_copy_done(nv); + + return 0; +} + +static int +nouveau_pipe_surface_fill(struct nouveau_winsys *nvws, struct pipe_surface *dst, + unsigned dx, unsigned dy, unsigned w, unsigned h, + unsigned value) +{ + if (nvws->nv->surface_fill(nvws->nv, dst, dx, dy, w, h, value)) + return 1; + return 0; +} + +int +nouveau_pipe_emit_reloc(struct nouveau_channel *chan, void *ptr, + struct pipe_buffer *buf, uint32_t data, + uint32_t flags, uint32_t vor, uint32_t tor) +{ + return nouveau_pushbuf_emit_reloc(chan, ptr, nouveau_buffer(buf)->bo, + data, flags, vor, tor); +} + +struct pipe_context * +nouveau_pipe_create(struct nouveau_context *nv) +{ + struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); + struct pipe_context *(*hw_create)(struct pipe_winsys *, + struct nouveau_winsys *, + unsigned); + + if (!nvws) + return NULL; + + switch (nv->chipset & 0xf0) { + case 0x30: + hw_create = nv30_create; + break; + case 0x40: + case 0x60: + hw_create = nv40_create; + break; + case 0x50: + case 0x80: + hw_create = nv50_create; + break; + default: + NOUVEAU_ERR("Unknown chipset NV%02x\n", (int)nv->chipset); + return NULL; + } + + nvws->nv = nv; + nvws->channel = nv->channel; + + nvws->res_init = nouveau_resource_init; + nvws->res_alloc = nouveau_resource_alloc; + nvws->res_free = nouveau_resource_free; + + nvws->push_reloc = nouveau_pipe_emit_reloc; + nvws->push_flush = nouveau_pushbuf_flush; + + nvws->grobj_alloc = nouveau_pipe_grobj_alloc; + nvws->grobj_free = nouveau_grobj_free; + + nvws->notifier_alloc = nouveau_pipe_notifier_alloc; + nvws->notifier_free = nouveau_notifier_free; + nvws->notifier_reset = nouveau_notifier_reset; + nvws->notifier_status = nouveau_notifier_status; + nvws->notifier_retval = nouveau_notifier_return_val; + nvws->notifier_wait = nouveau_notifier_wait_status; + + nvws->surface_copy = nouveau_pipe_surface_copy; + nvws->surface_fill = nouveau_pipe_surface_fill; + + return hw_create(nouveau_create_pipe_winsys(nv), nvws, nv->chipset); +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c new file mode 100644 index 00000000000..e1a9271395b --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c @@ -0,0 +1,196 @@ +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" + +#include "nouveau_context.h" +#include "nouveau_device.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" +#include "nouveau_winsys_pipe.h" + +static void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private) +{ + struct nouveau_context *nv = context_private; + __DRIdrawablePrivate *dPriv = nv->dri_drawable; + + nouveau_copy_buffer(dPriv, surf, NULL); +} + +static void +nouveau_printf(struct pipe_winsys *pws, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} + +static const char * +nouveau_get_name(struct pipe_winsys *pws) +{ + return "Nouveau/DRI"; +} + +static struct pipe_surface * +nouveau_surface_alloc(struct pipe_winsys *ws) +{ + struct pipe_surface *surf; + + surf = CALLOC_STRUCT(pipe_surface); + if (!surf) + return NULL; + + surf->refcount = 1; + surf->winsys = ws; + return surf; +} + +static int +nouveau_surface_alloc_storage(struct pipe_winsys *ws, struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, unsigned flags) +{ + unsigned pitch = ((width * pf_get_size(format)) + 63) & ~63; + + surf->format = format; + surf->width = width; + surf->height = height; + surf->cpp = pf_get_size(format); + surf->pitch = pitch / surf->cpp; + + surf->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + pitch * height); + if (!surf->buffer) + return 1; + + return 0; +} + +static void +nouveau_surface_release(struct pipe_winsys *ws, struct pipe_surface **s) +{ + struct pipe_surface *surf = *s; + + *s = NULL; + if (--surf->refcount <= 0) { + if (surf->buffer) + pipe_buffer_reference(ws, &surf->buffer, NULL); + free(surf); + } +} + +static struct pipe_buffer * +nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, + unsigned usage, unsigned size) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_device *dev = nvpws->nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + uint32_t flags = 0; + + nvbuf = calloc(1, sizeof(*nvbuf)); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.alignment = alignment; + nvbuf->base.usage = usage; + nvbuf->base.size = size; + + flags = NOUVEAU_BO_LOCAL; + if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { + free(nvbuf); + return NULL; + } + + return &nvbuf->base; +} + +static struct pipe_buffer * +nouveau_pipe_bo_user_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_device *dev = nvpws->nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + + nvbuf = calloc(1, sizeof(*nvbuf)); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.size = bytes; + + if (nouveau_bo_user(dev, ptr, bytes, &nvbuf->bo)) { + free(nvbuf); + return NULL; + } + + return &nvbuf->base; +} + +static void +nouveau_pipe_bo_del(struct pipe_winsys *ws, struct pipe_buffer *buf) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + + nouveau_bo_del(&nvbuf->bo); + free(nvbuf); +} + +static void * +nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf, + unsigned flags) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + uint32_t map_flags = 0; + + if (flags & PIPE_BUFFER_USAGE_CPU_READ) + map_flags |= NOUVEAU_BO_RD; + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + map_flags |= NOUVEAU_BO_WR; + + if (nouveau_bo_map(nvbuf->bo, map_flags)) + return NULL; + return nvbuf->bo->map; +} + +static void +nouveau_pipe_bo_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + + nouveau_bo_unmap(nvbuf->bo); +} + +struct pipe_winsys * +nouveau_create_pipe_winsys(struct nouveau_context *nv) +{ + struct nouveau_pipe_winsys *nvpws; + struct pipe_winsys *pws; + + nvpws = CALLOC_STRUCT(nouveau_pipe_winsys); + if (!nvpws) + return NULL; + nvpws->nv = nv; + pws = &nvpws->pws; + + pws->flush_frontbuffer = nouveau_flush_frontbuffer; + pws->printf = nouveau_printf; + + pws->surface_alloc = nouveau_surface_alloc; + pws->surface_alloc_storage = nouveau_surface_alloc_storage; + pws->surface_release = nouveau_surface_release; + + pws->buffer_create = nouveau_pipe_bo_create; + pws->buffer_destroy = nouveau_pipe_bo_del; + pws->user_buffer_create = nouveau_pipe_bo_user_create; + pws->buffer_map = nouveau_pipe_bo_map; + pws->buffer_unmap = nouveau_pipe_bo_unmap; + + pws->get_name = nouveau_get_name; + + return &nvpws->pws; +} + diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.h b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.h new file mode 100644 index 00000000000..6a03ac0d773 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.h @@ -0,0 +1,34 @@ +#ifndef NOUVEAU_PIPE_WINSYS_H +#define NOUVEAU_PIPE_WINSYS_H + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" +#include "nouveau_context.h" + +struct nouveau_pipe_buffer { + struct pipe_buffer base; + struct nouveau_bo *bo; +}; + +static inline struct nouveau_pipe_buffer * +nouveau_buffer(struct pipe_buffer *buf) +{ + return (struct nouveau_pipe_buffer *)buf; +} + +struct nouveau_pipe_winsys { + struct pipe_winsys pws; + + struct nouveau_context *nv; +}; + +extern struct pipe_winsys * +nouveau_create_pipe_winsys(struct nouveau_context *nv); + +struct pipe_context * +nouveau_create_softpipe(struct nouveau_context *nv); + +struct pipe_context * +nouveau_pipe_create(struct nouveau_context *nv); + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c new file mode 100644 index 00000000000..0e1b4273d1e --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + */ + +#include "imports.h" + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "softpipe/sp_winsys.h" + +#include "nouveau_context.h" +#include "nouveau_winsys_pipe.h" + +struct nouveau_softpipe_winsys { + struct softpipe_winsys sws; + struct nouveau_context *nv; +}; + +/** + * Return list of surface formats supported by this driver. + */ +static boolean +nouveau_is_format_supported(struct softpipe_winsys *sws, uint format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + }; + + return FALSE; +} + + + +struct pipe_context * +nouveau_create_softpipe(struct nouveau_context *nv) +{ + struct nouveau_softpipe_winsys *nvsws; + + nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); + + /* Fill in this struct with callbacks that softpipe will need to + * communicate with the window system, buffer manager, etc. + */ + nvsws->sws.is_format_supported = nouveau_is_format_supported; + nvsws->nv = nv; + + /* Create the softpipe context: + */ + return softpipe_create(nouveau_create_pipe_winsys(nv), &nvsws->sws); +} + diff --git a/src/gallium/winsys/dri/nouveau/nv04_surface.c b/src/gallium/winsys/dri/nouveau/nv04_surface.c new file mode 100644 index 00000000000..fe1ea4ed70f --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nv04_surface.c @@ -0,0 +1,226 @@ +#include "pipe/p_context.h" + +#include "nouveau_context.h" + +static INLINE int +nv04_surface_format(int cpp) +{ + switch (cpp) { + case 1: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; + case 2: return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; + case 4: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; + default: + return -1; + } +} + +static INLINE int +nv04_rect_format(int cpp) +{ + switch (cpp) { + case 1: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + case 2: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; + case 4: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + default: + return -1; + } +} + +static void +nv04_surface_copy_m2mf(struct nouveau_context *nv, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h) +{ + struct pipe_surface *dst = nv->surf_dst; + struct pipe_surface *src = nv->surf_src; + unsigned dst_offset, src_offset; + + dst_offset = dst->offset + (dy * dst->pitch + dx) * dst->cpp; + src_offset = src->offset + (sy * src->pitch + sx) * src->cpp; + + while (h) { + int count = (h > 2047) ? 2047 : h; + + BEGIN_RING(NvM2MF, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); + OUT_RELOCl(src->buffer, src_offset, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(dst->buffer, dst_offset, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (src->pitch * src->cpp); + OUT_RING (dst->pitch * dst->cpp); + OUT_RING (w * src->cpp); + OUT_RING (count); + OUT_RING (0x0101); + OUT_RING (0); + + h -= count; + src_offset += src->pitch * src->cpp * count; + dst_offset += dst->pitch * dst->cpp * count; + } +} + +static void +nv04_surface_copy_blit(struct nouveau_context *nv, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h) +{ + BEGIN_RING(NvImageBlit, 0x0300, 3); + OUT_RING ((sy << 16) | sx); + OUT_RING ((dy << 16) | dx); + OUT_RING (( h << 16) | w); +} + +static int +nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, + struct pipe_surface *src) +{ + int format; + + if (src->cpp != dst->cpp) + return 1; + + /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback + * to NV_MEMORY_TO_MEMORY_FORMAT in this case. + */ + if ((src->offset & 63) || (dst->offset & 63)) { + BEGIN_RING(NvM2MF, + NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); + OUT_RELOCo(src->buffer, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD); + OUT_RELOCo(dst->buffer, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | + NOUVEAU_BO_WR); + + nv->surface_copy = nv04_surface_copy_m2mf; + nv->surf_dst = dst; + nv->surf_src = src; + return 0; + + } + + if ((format = nv04_surface_format(dst->cpp)) < 0) { + NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); + return 1; + } + nv->surface_copy = nv04_surface_copy_blit; + + BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(src->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (format); + OUT_RING (((dst->pitch * dst->cpp) << 16) | (src->pitch * src->cpp)); + OUT_RELOCl(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + return 0; +} + +static void +nv04_surface_copy_done(struct nouveau_context *nv) +{ + FIRE_RING(); +} + +static int +nv04_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, + unsigned dx, unsigned dy, unsigned w, unsigned h, + unsigned value) +{ + int cs2d_format, gdirect_format; + + if ((cs2d_format = nv04_surface_format(dst->cpp)) < 0) { + NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); + return 1; + } + + if ((gdirect_format = nv04_rect_format(dst->cpp)) < 0) { + NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); + return 1; + } + + BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (cs2d_format); + OUT_RING (((dst->pitch * dst->cpp) << 16) | (dst->pitch * dst->cpp)); + OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); + OUT_RING (gdirect_format); + BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); + OUT_RING (value); + BEGIN_RING(NvGdiRect, + NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); + OUT_RING ((dx << 16) | dy); + OUT_RING (( w << 16) | h); + + FIRE_RING(); + return 0; +} + +int +nouveau_surface_init_nv04(struct nouveau_context *nv) +{ + unsigned class; + int ret; + + if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, 0x39, + &nv->NvM2MF))) { + NOUVEAU_ERR("Error creating m2mf object: %d\n", ret); + return 1; + } + BIND_RING (NvM2MF, nv->next_subchannel++); + BEGIN_RING(NvM2MF, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); + OUT_RING (nv->sync_notifier->handle); + + class = nv->chipset < 0x10 ? NV04_CONTEXT_SURFACES_2D : + NV10_CONTEXT_SURFACES_2D; + if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, + &nv->NvCtxSurf2D))) { + NOUVEAU_ERR("Error creating 2D surface object: %d\n", ret); + return 1; + } + BIND_RING (NvCtxSurf2D, nv->next_subchannel++); + BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RING (nv->channel->vram->handle); + OUT_RING (nv->channel->vram->handle); + + class = nv->chipset < 0x10 ? NV04_IMAGE_BLIT : + NV12_IMAGE_BLIT; + if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, + &nv->NvImageBlit))) { + NOUVEAU_ERR("Error creating blit object: %d\n", ret); + return 1; + } + BIND_RING (NvImageBlit, nv->next_subchannel++); + BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); + OUT_RING (nv->sync_notifier->handle); + BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_SURFACE, 1); + OUT_RING (nv->NvCtxSurf2D->handle); + BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_OPERATION, 1); + OUT_RING (NV04_IMAGE_BLIT_OPERATION_SRCCOPY); + + class = NV04_GDI_RECTANGLE_TEXT; + if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, + &nv->NvGdiRect))) { + NOUVEAU_ERR("Error creating rect object: %d\n", ret); + return 1; + } + BIND_RING (NvGdiRect, nv->next_subchannel++); + BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); + OUT_RING (nv->sync_notifier->handle); + BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); + OUT_RING (nv->NvCtxSurf2D->handle); + BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); + OUT_RING (NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); + BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); + OUT_RING (NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); + + nv->surface_copy_prep = nv04_surface_copy_prep; + nv->surface_copy = nv04_surface_copy_blit; + nv->surface_copy_done = nv04_surface_copy_done; + nv->surface_fill = nv04_surface_fill; + return 0; +} + diff --git a/src/gallium/winsys/dri/nouveau/nv50_surface.c b/src/gallium/winsys/dri/nouveau/nv50_surface.c new file mode 100644 index 00000000000..15a10028613 --- /dev/null +++ b/src/gallium/winsys/dri/nouveau/nv50_surface.c @@ -0,0 +1,160 @@ +#include "pipe/p_context.h" + +#include "nouveau_context.h" + +static INLINE int +nv50_format(int cpp) +{ + switch (cpp) { + case 4: return NV50_2D_DST_FORMAT_32BPP; + case 3: return NV50_2D_DST_FORMAT_24BPP; + case 2: return NV50_2D_DST_FORMAT_16BPP; + case 1: return NV50_2D_DST_FORMAT_8BPP; + default: + return -1; + } +} + +static int +nv50_surface_copy_prep(struct nouveau_context *nv, + struct pipe_surface *dst, struct pipe_surface *src) +{ + int surf_format; + + assert(src->cpp == dst->cpp); + + surf_format = nv50_format(dst->cpp); + assert(surf_format >= 0); + + BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY0, 2); + OUT_RELOCo(src->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(Nv2D, NV50_2D_DST_FORMAT, 2); + OUT_RING (surf_format); + OUT_RING (1); + BEGIN_RING(Nv2D, NV50_2D_DST_PITCH, 5); + OUT_RING (dst->pitch * dst->cpp); + OUT_RING (dst->pitch); + OUT_RING (dst->height); + OUT_RELOCh(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(Nv2D, NV50_2D_CLIP_X, 4); + OUT_RING (0); + OUT_RING (0); + OUT_RING (dst->pitch); + OUT_RING (dst->height); + + BEGIN_RING(Nv2D, NV50_2D_SRC_FORMAT, 2); + OUT_RING (surf_format); + OUT_RING (1); + BEGIN_RING(Nv2D, NV50_2D_SRC_PITCH, 5); + OUT_RING (src->pitch * src->cpp); + OUT_RING (src->pitch); + OUT_RING (src->height); + OUT_RELOCh(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + + return 0; +} + +static void +nv50_surface_copy(struct nouveau_context *nv, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h) +{ + BEGIN_RING(Nv2D, 0x0110, 1); + OUT_RING (0); + BEGIN_RING(Nv2D, NV50_2D_BLIT_DST_X, 12); + OUT_RING (dx); + OUT_RING (dy); + OUT_RING (w); + OUT_RING (h); + OUT_RING (0); + OUT_RING (1); + OUT_RING (0); + OUT_RING (1); + OUT_RING (0); + OUT_RING (sx); + OUT_RING (0); + OUT_RING (sy); +} + +static void +nv50_surface_copy_done(struct nouveau_context *nv) +{ + FIRE_RING(); +} + +static int +nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, + unsigned dx, unsigned dy, unsigned w, unsigned h, + unsigned value) +{ + int surf_format, rect_format; + + surf_format = nv50_format(dst->cpp); + if (surf_format < 0) + return 1; + + rect_format = nv50_format(dst->cpp); + if (rect_format < 0) + return 1; + + BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY1, 1); + OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(Nv2D, NV50_2D_DST_FORMAT, 2); + OUT_RING (surf_format); + OUT_RING (1); + BEGIN_RING(Nv2D, NV50_2D_DST_PITCH, 5); + OUT_RING (dst->pitch * dst->cpp); + OUT_RING (dst->pitch); + OUT_RING (dst->height); + OUT_RELOCh(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(Nv2D, NV50_2D_CLIP_X, 4); + OUT_RING (0); + OUT_RING (0); + OUT_RING (dst->pitch); + OUT_RING (dst->height); + + BEGIN_RING(Nv2D, 0x0580, 3); + OUT_RING (4); + OUT_RING (rect_format); + OUT_RING (value); + + BEGIN_RING(Nv2D, NV50_2D_RECT_X1, 4); + OUT_RING (dx); + OUT_RING (dy); + OUT_RING (dx + w); + OUT_RING (dy + h); + + FIRE_RING(); + + return 0; +} + +int +nouveau_surface_init_nv50(struct nouveau_context *nv) +{ + int ret; + + ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, NV50_2D, + &nv->Nv2D); + if (ret) + return ret; + BIND_RING (Nv2D, 0); + BEGIN_RING(Nv2D, NV50_2D_DMA_NOTIFY, 1); + OUT_RING (nv->sync_notifier->handle); + BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY0, 2); + OUT_RING (nv->channel->vram->handle); + OUT_RING (nv->channel->vram->handle); + BEGIN_RING(Nv2D, NV50_2D_OPERATION, 1); + OUT_RING (NV50_2D_OPERATION_SRCCOPY); + + nv->surface_copy_prep = nv50_surface_copy_prep; + nv->surface_copy = nv50_surface_copy; + nv->surface_copy_done = nv50_surface_copy_done; + nv->surface_fill = nv50_surface_fill; + return 0; +} + diff --git a/src/mesa/drivers/dri/nouveau_winsys/Makefile b/src/mesa/drivers/dri/nouveau_winsys/Makefile deleted file mode 100644 index 98ec5a79f5d..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/Makefile +++ /dev/null @@ -1,43 +0,0 @@ - -TOP = ../../../../.. -include $(TOP)/configs/current - -LIBNAME = nouveau_dri.so - -MINIGLX_SOURCES = - -PIPE_DRIVERS = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/nv30/libnv30.a \ - $(TOP)/src/mesa/pipe/nv40/libnv40.a \ - $(TOP)/src/mesa/pipe/nv50/libnv50.a - -DRIVER_SOURCES = \ - nouveau_bo.c \ - nouveau_channel.c \ - nouveau_context.c \ - nouveau_device.c \ - nouveau_dma.c \ - nouveau_fence.c \ - nouveau_grobj.c \ - nouveau_lock.c \ - nouveau_notifier.c \ - nouveau_pushbuf.c \ - nouveau_resource.c \ - nouveau_screen.c \ - nouveau_swapbuffers.c \ - nouveau_winsys.c \ - nouveau_winsys_pipe.c \ - nouveau_winsys_softpipe.c \ - nv04_surface.c \ - nv50_surface.c - -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -include ../Makefile.template - -symlinks: diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_bo.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_bo.c deleted file mode 100644 index 6887ffa6886..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_bo.c +++ /dev/null @@ -1,402 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdint.h> -#include <stdlib.h> -#include <errno.h> -#include <assert.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" -#include "nouveau_local.h" - -static void -nouveau_mem_free(struct nouveau_device *dev, struct drm_nouveau_mem_alloc *ma, - void **map) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - struct drm_nouveau_mem_free mf; - - if (map && *map) { - drmUnmap(*map, ma->size); - *map = NULL; - } - - if (ma->size) { - mf.offset = ma->offset; - mf.flags = ma->flags; - drmCommandWrite(nvdev->fd, DRM_NOUVEAU_MEM_FREE, - &mf, sizeof(mf)); - ma->size = 0; - } -} - -static int -nouveau_mem_alloc(struct nouveau_device *dev, unsigned size, unsigned align, - uint32_t flags, struct drm_nouveau_mem_alloc *ma, void **map) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - int ret; - - ma->alignment = align; - ma->size = size; - ma->flags = flags; - if (map) - ma->flags |= NOUVEAU_MEM_MAPPED; - ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_MEM_ALLOC, ma, - sizeof(struct drm_nouveau_mem_alloc)); - if (ret) - return ret; - - if (map) { - ret = drmMap(nvdev->fd, ma->map_handle, ma->size, map); - if (ret) { - *map = NULL; - nouveau_mem_free(dev, ma, map); - return ret; - } - } - - return 0; -} - -static void -nouveau_bo_tmp_del(void *priv) -{ - struct nouveau_resource *r = priv; - - nouveau_fence_ref(NULL, (struct nouveau_fence **)&r->priv); - nouveau_resource_free(&r); -} - -static struct nouveau_resource * -nouveau_bo_tmp(struct nouveau_channel *chan, unsigned size, - struct nouveau_fence *fence) -{ - struct nouveau_device_priv *nvdev = nouveau_device(chan->device); - struct nouveau_resource *r = NULL; - struct nouveau_fence *ref = NULL; - - if (fence) - nouveau_fence_ref(fence, &ref); - else - nouveau_fence_new(chan, &ref); - assert(ref); - - while (nouveau_resource_alloc(nvdev->sa_heap, size, ref, &r)) { - nouveau_fence_flush(chan); - } - nouveau_fence_signal_cb(ref, nouveau_bo_tmp_del, r); - - return r; -} - -int -nouveau_bo_init(struct nouveau_device *dev) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - int ret; - - ret = nouveau_mem_alloc(dev, 128*1024, 0, NOUVEAU_MEM_AGP | - NOUVEAU_MEM_PCI, &nvdev->sa, &nvdev->sa_map); - if (ret) - return ret; - - ret = nouveau_resource_init(&nvdev->sa_heap, 0, nvdev->sa.size); - if (ret) { - nouveau_mem_free(dev, &nvdev->sa, &nvdev->sa_map); - return ret; - } - - return 0; -} - -void -nouveau_bo_takedown(struct nouveau_device *dev) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - nouveau_mem_free(dev, &nvdev->sa, &nvdev->sa_map); -} - -int -nouveau_bo_new(struct nouveau_device *dev, uint32_t flags, int align, - int size, struct nouveau_bo **bo) -{ - struct nouveau_bo_priv *nvbo; - int ret; - - if (!dev || !bo || *bo) - return -EINVAL; - - nvbo = calloc(1, sizeof(struct nouveau_bo_priv)); - if (!nvbo) - return -ENOMEM; - nvbo->base.device = dev; - nvbo->base.size = size; - nvbo->base.handle = bo_to_ptr(nvbo); - nvbo->drm.alignment = align; - nvbo->refcount = 1; - - ret = nouveau_bo_set_status(&nvbo->base, flags); - if (ret) { - free(nvbo); - return ret; - } - - *bo = &nvbo->base; - return 0; -} - -int -nouveau_bo_user(struct nouveau_device *dev, void *ptr, int size, - struct nouveau_bo **bo) -{ - struct nouveau_bo_priv *nvbo; - - if (!dev || !bo || *bo) - return -EINVAL; - - nvbo = calloc(1, sizeof(*nvbo)); - if (!nvbo) - return -ENOMEM; - nvbo->base.device = dev; - - nvbo->sysmem = ptr; - nvbo->user = 1; - - nvbo->base.size = size; - nvbo->base.offset = nvbo->drm.offset; - nvbo->base.handle = bo_to_ptr(nvbo); - nvbo->refcount = 1; - *bo = &nvbo->base; - return 0; -} - -int -nouveau_bo_ref(struct nouveau_device *dev, uint64_t handle, - struct nouveau_bo **bo) -{ - struct nouveau_bo_priv *nvbo = ptr_to_bo(handle); - - if (!dev || !bo || *bo) - return -EINVAL; - - nvbo->refcount++; - *bo = &nvbo->base; - return 0; -} - -void -nouveau_bo_del(struct nouveau_bo **bo) -{ - struct nouveau_bo_priv *nvbo; - - if (!bo || !*bo) - return; - nvbo = nouveau_bo(*bo); - *bo = NULL; - - if (--nvbo->refcount) - return; - - if (nvbo->fence) - nouveau_fence_wait(&nvbo->fence); - nouveau_mem_free(nvbo->base.device, &nvbo->drm, &nvbo->map); - if (nvbo->sysmem && !nvbo->user) - free(nvbo->sysmem); - free(nvbo); -} - -int -nouveau_bo_map(struct nouveau_bo *bo, uint32_t flags) -{ - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - - if (!nvbo) - return -EINVAL; - - if (flags & NOUVEAU_BO_WR) - nouveau_fence_wait(&nvbo->fence); - else - nouveau_fence_wait(&nvbo->wr_fence); - - if (nvbo->sysmem) - bo->map = nvbo->sysmem; - else - bo->map = nvbo->map; - return 0; -} - -void -nouveau_bo_unmap(struct nouveau_bo *bo) -{ - bo->map = NULL; -} - -static int -nouveau_bo_upload(struct nouveau_bo_priv *nvbo) -{ - if (nvbo->fence) - nouveau_fence_wait(&nvbo->fence); - memcpy(nvbo->map, nvbo->sysmem, nvbo->drm.size); - return 0; -} - -int -nouveau_bo_set_status(struct nouveau_bo *bo, uint32_t flags) -{ - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - struct drm_nouveau_mem_alloc new; - void *new_map = NULL, *new_sysmem = NULL; - unsigned new_flags = 0, ret; - - assert(!bo->map); - - /* Check current memtype vs requested, if they match do nothing */ - if ((nvbo->drm.flags & NOUVEAU_MEM_FB) && (flags & NOUVEAU_BO_VRAM)) - return 0; - if ((nvbo->drm.flags & NOUVEAU_MEM_AGP) && (flags & NOUVEAU_BO_GART)) - return 0; - if (nvbo->drm.size == 0 && nvbo->sysmem && (flags & NOUVEAU_BO_LOCAL)) - return 0; - - memset(&new, 0x00, sizeof(new)); - - /* Allocate new memory */ - if (flags & NOUVEAU_BO_VRAM) - new_flags |= NOUVEAU_MEM_FB; - else - if (flags & NOUVEAU_BO_GART) - new_flags |= (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI); - - if (new_flags) { - ret = nouveau_mem_alloc(bo->device, bo->size, - nvbo->drm.alignment, new_flags, - &new, &new_map); - if (ret) - return ret; - } else { - new_sysmem = malloc(bo->size); - } - - /* Copy old -> new */ - /*XXX: use M2MF */ - if (nvbo->sysmem || nvbo->map) { - nouveau_bo_map(bo, NOUVEAU_BO_RD); - memcpy(new_map, bo->map, bo->size); - nouveau_bo_unmap(bo); - } - - /* Free old memory */ - if (nvbo->fence) - nouveau_fence_wait(&nvbo->fence); - nouveau_mem_free(bo->device, &nvbo->drm, &nvbo->map); - if (nvbo->sysmem) - free(nvbo->sysmem); - - nvbo->drm = new; - nvbo->map = new_map; - nvbo->sysmem = new_sysmem; - bo->flags = flags; - bo->offset = nvbo->drm.offset; - return 0; -} - -static int -nouveau_bo_validate_user(struct nouveau_channel *chan, struct nouveau_bo *bo, - struct nouveau_fence *fence, uint32_t flags) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_device_priv *nvdev = nouveau_device(chan->device); - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - struct nouveau_resource *r; - - if (nvchan->user_charge + bo->size > nvdev->sa.size) - return 1; - nvchan->user_charge += bo->size; - - if (!(flags & NOUVEAU_BO_GART)) - return 1; - - r = nouveau_bo_tmp(chan, bo->size, fence); - if (!r) - return 1; - - memcpy(nvdev->sa_map + r->start, nvbo->sysmem, bo->size); - - nvbo->offset = nvdev->sa.offset + r->start; - nvbo->flags = NOUVEAU_BO_GART; - return 0; -} - -static int -nouveau_bo_validate_bo(struct nouveau_channel *chan, struct nouveau_bo *bo, - struct nouveau_fence *fence, uint32_t flags) -{ - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - int ret; - - ret = nouveau_bo_set_status(bo, flags); - if (ret) - return ret; - - if (nvbo->user) - nouveau_bo_upload(nvbo); - - nvbo->offset = nvbo->drm.offset; - if (nvbo->drm.flags & (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI)) - nvbo->flags = NOUVEAU_BO_GART; - else - nvbo->flags = NOUVEAU_BO_VRAM; - - return 0; -} - -int -nouveau_bo_validate(struct nouveau_channel *chan, struct nouveau_bo *bo, - struct nouveau_fence *fence, uint32_t flags) -{ - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - int ret; - - assert(bo->map == NULL); - - if (nvbo->user) { - ret = nouveau_bo_validate_user(chan, bo, fence, flags); - if (ret) { - ret = nouveau_bo_validate_bo(chan, bo, fence, flags); - if (ret) - return ret; - } - } else { - ret = nouveau_bo_validate_bo(chan, bo, fence, flags); - if (ret) - return ret; - } - - if (flags & NOUVEAU_BO_WR) - nouveau_fence_ref(fence, &nvbo->wr_fence); - nouveau_fence_ref(fence, &nvbo->fence); - return 0; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_channel.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_channel.c deleted file mode 100644 index df80d04add5..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_channel.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" - -int -nouveau_channel_alloc(struct nouveau_device *dev, uint32_t fb_ctxdma, - uint32_t tt_ctxdma, struct nouveau_channel **chan) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - struct nouveau_channel_priv *nvchan; - int ret; - - if (!nvdev || !chan || *chan) - return -EINVAL; - - nvchan = calloc(1, sizeof(struct nouveau_channel_priv)); - if (!nvchan) - return -ENOMEM; - nvchan->base.device = dev; - - nvchan->drm.fb_ctxdma_handle = fb_ctxdma; - nvchan->drm.tt_ctxdma_handle = tt_ctxdma; - ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_CHANNEL_ALLOC, - &nvchan->drm, sizeof(nvchan->drm)); - if (ret) { - free(nvchan); - return ret; - } - - nvchan->base.id = nvchan->drm.channel; - if (nouveau_grobj_ref(&nvchan->base, nvchan->drm.fb_ctxdma_handle, - &nvchan->base.vram) || - nouveau_grobj_ref(&nvchan->base, nvchan->drm.tt_ctxdma_handle, - &nvchan->base.gart)) { - nouveau_channel_free((void *)&nvchan); - return -EINVAL; - } - - ret = drmMap(nvdev->fd, nvchan->drm.ctrl, nvchan->drm.ctrl_size, - (void*)&nvchan->user); - if (ret) { - nouveau_channel_free((void *)&nvchan); - return ret; - } - nvchan->put = &nvchan->user[0x40/4]; - nvchan->get = &nvchan->user[0x44/4]; - nvchan->ref_cnt = &nvchan->user[0x48/4]; - - ret = drmMap(nvdev->fd, nvchan->drm.notifier, nvchan->drm.notifier_size, - (drmAddressPtr)&nvchan->notifier_block); - if (ret) { - nouveau_channel_free((void *)&nvchan); - return ret; - } - - ret = drmMap(nvdev->fd, nvchan->drm.cmdbuf, nvchan->drm.cmdbuf_size, - (void*)&nvchan->pushbuf); - if (ret) { - nouveau_channel_free((void *)&nvchan); - return ret; - } - - nouveau_dma_channel_init(&nvchan->base); - nouveau_pushbuf_init(&nvchan->base); - - *chan = &nvchan->base; - return 0; -} - -void -nouveau_channel_free(struct nouveau_channel **chan) -{ - struct nouveau_channel_priv *nvchan; - struct nouveau_device_priv *nvdev; - struct drm_nouveau_channel_free cf; - - if (!chan || !*chan) - return; - nvchan = nouveau_channel(*chan); - *chan = NULL; - nvdev = nouveau_device(nvchan->base.device); - - FIRE_RING_CH(&nvchan->base); - - nouveau_grobj_free(&nvchan->base.vram); - nouveau_grobj_free(&nvchan->base.gart); - - cf.channel = nvchan->drm.channel; - drmCommandWrite(nvdev->fd, DRM_NOUVEAU_CHANNEL_FREE, &cf, sizeof(cf)); - free(nvchan); -} - - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_context.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_context.c deleted file mode 100644 index 01fada5b89b..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_context.c +++ /dev/null @@ -1,206 +0,0 @@ -#include "main/glheader.h" -#include "glapi/glthread.h" -#include <GL/internal/glcore.h> -#include "utils.h" - -#include "state_tracker/st_public.h" -#include "pipe/p_defines.h" -#include "pipe/p_context.h" - -#include "nouveau_context.h" -#include "nouveau_dri.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_winsys_pipe.h" - -#ifdef DEBUG -static const struct dri_debug_control debug_control[] = { - { "bo", DEBUG_BO }, - { NULL, 0 } -}; -int __nouveau_debug = 0; -#endif - -GLboolean -nouveau_context_create(const __GLcontextModes *glVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) -{ - __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv; - struct nouveau_screen *nv_screen = driScrnPriv->private; - struct nouveau_context *nv = CALLOC_STRUCT(nouveau_context); - struct nouveau_device_priv *nvdev; - struct pipe_context *pipe = NULL; - struct st_context *st_share = NULL; - int ret; - - if (sharedContextPrivate) { - st_share = ((struct nouveau_context *)sharedContextPrivate)->st; - } - - if ((ret = nouveau_device_get_param(nv_screen->device, - NOUVEAU_GETPARAM_CHIPSET_ID, - &nv->chipset))) { - NOUVEAU_ERR("Error determining chipset id: %d\n", ret); - return GL_FALSE; - } - - if ((ret = nouveau_channel_alloc(nv_screen->device, - 0x8003d001, 0x8003d002, - &nv->channel))) { - NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); - return GL_FALSE; - } - - driContextPriv->driverPrivate = (void *)nv; - nv->nv_screen = nv_screen; - nv->dri_screen = driScrnPriv; - - nvdev = nouveau_device(nv_screen->device); - nvdev->ctx = driContextPriv->hHWContext; - nvdev->lock = (drmLock *)&driScrnPriv->pSAREA->lock; - - driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, - nv->dri_screen->myNum, "nouveau"); -#ifdef DEBUG - __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), - debug_control); -#endif - - /*XXX: Hack up a fake region and buffer object for front buffer. - * This will go away with TTM, replaced with a simple reference - * of the front buffer handle passed to us by the DDX. - */ - { - struct pipe_surface *fb_surf; - struct nouveau_pipe_buffer *fb_buf; - struct nouveau_bo_priv *fb_bo; - - fb_bo = calloc(1, sizeof(struct nouveau_bo_priv)); - fb_bo->drm.offset = nv_screen->front_offset; - fb_bo->drm.flags = NOUVEAU_MEM_FB; - fb_bo->drm.size = nv_screen->front_pitch * - nv_screen->front_height; - fb_bo->refcount = 1; - fb_bo->base.flags = NOUVEAU_BO_PIN | NOUVEAU_BO_VRAM; - fb_bo->base.offset = fb_bo->drm.offset; - fb_bo->base.handle = (unsigned long)fb_bo; - fb_bo->base.size = fb_bo->drm.size; - fb_bo->base.device = nv_screen->device; - - fb_buf = calloc(1, sizeof(struct nouveau_pipe_buffer)); - fb_buf->bo = &fb_bo->base; - - fb_surf = calloc(1, sizeof(struct pipe_surface)); - fb_surf->cpp = nv_screen->front_cpp; - fb_surf->pitch = nv_screen->front_pitch / fb_surf->cpp; - fb_surf->height = nv_screen->front_height; - fb_surf->refcount = 1; - fb_surf->buffer = &fb_buf->base; - - nv->frontbuffer = fb_surf; - } - - if ((ret = nouveau_grobj_alloc(nv->channel, 0x00000000, 0x30, - &nv->NvNull))) { - NOUVEAU_ERR("Error creating NULL object: %d\n", ret); - return GL_FALSE; - } - nv->next_handle = 0x80000000; - - if ((ret = nouveau_notifier_alloc(nv->channel, nv->next_handle++, 1, - &nv->sync_notifier))) { - NOUVEAU_ERR("Error creating channel sync notifier: %d\n", ret); - return GL_FALSE; - } - - if (nv->chipset < 0x50) - ret = nouveau_surface_init_nv04(nv); - else - ret = nouveau_surface_init_nv50(nv); - if (ret) { - return GL_FALSE; - } - - if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { - pipe = nouveau_pipe_create(nv); - if (!pipe) - NOUVEAU_ERR("Couldn't create hw pipe\n"); - } - - if (!pipe) { - NOUVEAU_MSG("Using softpipe\n"); - pipe = nouveau_create_softpipe(nv); - if (!pipe) { - NOUVEAU_ERR("Error creating pipe, bailing\n"); - return GL_FALSE; - } - } - - pipe->priv = nv; - nv->st = st_create_context(pipe, glVis, st_share); - return GL_TRUE; -} - -void -nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) -{ - struct nouveau_context *nv = driContextPriv->driverPrivate; - - assert(nv); - - st_flush(nv->st, PIPE_FLUSH_WAIT); - st_destroy_context(nv->st); - - nouveau_grobj_free(&nv->NvCtxSurf2D); - nouveau_grobj_free(&nv->NvImageBlit); - nouveau_channel_free(&nv->channel); - - free(nv); -} - -GLboolean -nouveau_context_bind(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv) -{ - struct nouveau_context *nv; - struct nouveau_framebuffer *draw, *read; - - if (!driContextPriv) { - st_make_current(NULL, NULL, NULL); - return GL_TRUE; - } - - nv = driContextPriv->driverPrivate; - draw = driDrawPriv->driverPrivate; - read = driReadPriv->driverPrivate; - - st_make_current(nv->st, draw->stfb, read->stfb); - - if ((nv->dri_drawable != driDrawPriv) || - (nv->last_stamp != driDrawPriv->lastStamp)) { - nv->dri_drawable = driDrawPriv; - st_resize_framebuffer(draw->stfb, driDrawPriv->w, - driDrawPriv->h); - nv->last_stamp = driDrawPriv->lastStamp; - } - - if (driDrawPriv != driReadPriv) { - st_resize_framebuffer(read->stfb, driReadPriv->w, - driReadPriv->h); - } - - return GL_TRUE; -} - -GLboolean -nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) -{ - struct nouveau_context *nv = driContextPriv->driverPrivate; - (void)nv; - - st_flush(nv->st, 0); - return GL_TRUE; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_context.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_context.h deleted file mode 100644 index 7a74f7deecd..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_context.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef __NOUVEAU_CONTEXT_H__ -#define __NOUVEAU_CONTEXT_H__ - -#include "dri_util.h" -#include "xmlconfig.h" - -#include "pipe/nouveau/nouveau_winsys.h" -#include "nouveau_device.h" -#include "nouveau_drmif.h" -#include "nouveau_dma.h" - -struct nouveau_framebuffer { - struct st_framebuffer *stfb; -}; - -struct nouveau_context { - struct st_context *st; - - /* Misc HW info */ - uint64_t chipset; - - /* DRI stuff */ - __DRIscreenPrivate *dri_screen; - __DRIdrawablePrivate *dri_drawable; - unsigned int last_stamp; - driOptionCache dri_option_cache; - drm_context_t drm_context; - drmLock drm_lock; - GLboolean locked; - struct nouveau_screen *nv_screen; - struct pipe_surface *frontbuffer; - - /* Hardware context */ - struct nouveau_channel *channel; - struct nouveau_notifier *sync_notifier; - struct nouveau_grobj *NvNull; - struct nouveau_grobj *NvCtxSurf2D; - struct nouveau_grobj *NvImageBlit; - struct nouveau_grobj *NvGdiRect; - struct nouveau_grobj *NvM2MF; - struct nouveau_grobj *Nv2D; - uint32_t next_handle; - uint32_t next_subchannel; - uint32_t next_sequence; - - /* pipe_surface accel */ - struct pipe_surface *surf_src, *surf_dst; - unsigned surf_src_offset, surf_dst_offset; - int (*surface_copy_prep)(struct nouveau_context *, - struct pipe_surface *dst, - struct pipe_surface *src); - void (*surface_copy)(struct nouveau_context *, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h); - void (*surface_copy_done)(struct nouveau_context *); - int (*surface_fill)(struct nouveau_context *, struct pipe_surface *, - unsigned, unsigned, unsigned, unsigned, unsigned); -}; - -extern GLboolean nouveau_context_create(const __GLcontextModes *, - __DRIcontextPrivate *, void *); -extern void nouveau_context_destroy(__DRIcontextPrivate *); -extern GLboolean nouveau_context_bind(__DRIcontextPrivate *, - __DRIdrawablePrivate *draw, - __DRIdrawablePrivate *read); -extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *); - -#ifdef DEBUG -extern int __nouveau_debug; - -#define DEBUG_BO (1 << 0) - -#define DBG(flag, ...) do { \ - if (__nouveau_debug & (DEBUG_##flag)) \ - NOUVEAU_ERR(__VA_ARGS__); \ -} while(0) -#else -#define DBG(flag, ...) -#endif - -extern void LOCK_HARDWARE(struct nouveau_context *); -extern void UNLOCK_HARDWARE(struct nouveau_context *); - -extern int nouveau_surface_init_nv04(struct nouveau_context *); -extern int nouveau_surface_init_nv50(struct nouveau_context *); - -extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int); -extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *); - -#endif diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_device.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_device.c deleted file mode 100644 index 409e4415f76..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_device.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> - -#include "nouveau_drmif.h" - -int -nouveau_device_open_existing(struct nouveau_device **dev, int close, - int fd, drm_context_t ctx) -{ - struct nouveau_device_priv *nvdev; - int ret; - - if (!dev || *dev) - return -EINVAL; - - nvdev = calloc(1, sizeof(*nvdev)); - if (!nvdev) - return -ENOMEM; - nvdev->fd = fd; - nvdev->ctx = ctx; - nvdev->needs_close = close; - - drmCommandNone(nvdev->fd, DRM_NOUVEAU_CARD_INIT); - - if ((ret = nouveau_bo_init(&nvdev->base))) { - nouveau_device_close((void *)&nvdev); - return ret; - } - - *dev = &nvdev->base; - return 0; -} - -int -nouveau_device_open(struct nouveau_device **dev, const char *busid) -{ - drm_context_t ctx; - int fd, ret; - - if (!dev || *dev) - return -EINVAL; - - fd = drmOpen("nouveau", busid); - if (fd < 0) - return -EINVAL; - - ret = drmCreateContext(fd, &ctx); - if (ret) { - drmClose(fd); - return ret; - } - - ret = nouveau_device_open_existing(dev, 1, fd, ctx); - if (ret) { - drmDestroyContext(fd, ctx); - drmClose(fd); - return ret; - } - - return 0; -} - -void -nouveau_device_close(struct nouveau_device **dev) -{ - struct nouveau_device_priv *nvdev; - - if (dev || !*dev) - return; - nvdev = nouveau_device(*dev); - *dev = NULL; - - nouveau_bo_takedown(&nvdev->base); - - if (nvdev->needs_close) { - drmDestroyContext(nvdev->fd, nvdev->ctx); - drmClose(nvdev->fd); - } - free(nvdev); -} - -int -nouveau_device_get_param(struct nouveau_device *dev, - uint64_t param, uint64_t *value) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - struct drm_nouveau_getparam g; - int ret; - - if (!nvdev || !value) - return -EINVAL; - - g.param = param; - ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_GETPARAM, - &g, sizeof(g)); - if (ret) - return ret; - - *value = g.value; - return 0; -} - -int -nouveau_device_set_param(struct nouveau_device *dev, - uint64_t param, uint64_t value) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - struct drm_nouveau_setparam s; - int ret; - - if (!nvdev) - return -EINVAL; - - s.param = param; - s.value = value; - ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_SETPARAM, - &s, sizeof(s)); - if (ret) - return ret; - - return 0; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_device.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_device.h deleted file mode 100644 index 744a89f74bf..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_device.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_DEVICE_H__ -#define __NOUVEAU_DEVICE_H__ - -struct nouveau_device { -}; - -#endif diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.c deleted file mode 100644 index f8a8ba04f6d..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdint.h> -#include <assert.h> -#include <errno.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" -#include "nouveau_local.h" - -static inline uint32_t -READ_GET(struct nouveau_channel_priv *nvchan) -{ - return *nvchan->get; -} - -static inline void -WRITE_PUT(struct nouveau_channel_priv *nvchan, uint32_t val) -{ - uint32_t put = ((val << 2) + nvchan->dma->base); - volatile int dum; - - NOUVEAU_DMA_BARRIER; - dum = READ_GET(nvchan); - - *nvchan->put = put; - nvchan->dma->put = val; -#ifdef NOUVEAU_DMA_TRACE - NOUVEAU_MSG("WRITE_PUT %d/0x%08x\n", nvchan->drm.channel, put); -#endif - - NOUVEAU_DMA_BARRIER; -} - -static inline int -LOCAL_GET(struct nouveau_dma_priv *dma, uint32_t *val) -{ - uint32_t get = *val; - - if (get >= dma->base && get <= (dma->base + (dma->max << 2))) { - *val = (get - dma->base) >> 2; - return 1; - } - - return 0; -} - -void -nouveau_dma_channel_init(struct nouveau_channel *chan) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - int i; - - nvchan->dma = &nvchan->dma_master; - nvchan->dma->base = nvchan->drm.put_base; - nvchan->dma->cur = nvchan->dma->put = 0; - nvchan->dma->max = (nvchan->drm.cmdbuf_size >> 2) - 2; - nvchan->dma->free = nvchan->dma->max - nvchan->dma->cur; - - RING_SPACE_CH(chan, RING_SKIPS); - for (i = 0; i < RING_SKIPS; i++) - OUT_RING_CH(chan, 0); -} - -#define CHECK_TIMEOUT() do { \ - if ((NOUVEAU_TIME_MSEC() - t_start) > NOUVEAU_DMA_TIMEOUT) \ - return - EBUSY; \ -} while(0) - -int -nouveau_dma_wait(struct nouveau_channel *chan, int size) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - uint32_t get, t_start; - - FIRE_RING_CH(chan); - - t_start = NOUVEAU_TIME_MSEC(); - while (dma->free < size) { - CHECK_TIMEOUT(); - - get = READ_GET(nvchan); - if (!LOCAL_GET(dma, &get)) - continue; - - if (dma->put >= get) { - dma->free = dma->max - dma->cur; - - if (dma->free < size) { -#ifdef NOUVEAU_DMA_DEBUG - dma->push_free = 1; -#endif - OUT_RING_CH(chan, 0x20000000 | dma->base); - if (get <= RING_SKIPS) { - /*corner case - will be idle*/ - if (dma->put <= RING_SKIPS) - WRITE_PUT(nvchan, - RING_SKIPS + 1); - - do { - CHECK_TIMEOUT(); - get = READ_GET(nvchan); - if (!LOCAL_GET(dma, &get)) - get = 0; - } while (get <= RING_SKIPS); - } - - WRITE_PUT(nvchan, RING_SKIPS); - dma->cur = dma->put = RING_SKIPS; - dma->free = get - (RING_SKIPS + 1); - } - } else { - dma->free = get - dma->cur - 1; - } - } - - return 0; -} - -#ifdef NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF -static void -nouveau_dma_parse_pushbuf(struct nouveau_channel *chan, int get, int put) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - unsigned mthd_count = 0; - - while (get != put) { - uint32_t gpuget = (get << 2) + nvchan->drm.put_base; - uint32_t data; - - if (get < 0 || get >= nvchan->drm.cmdbuf_size) { - NOUVEAU_ERR("DMA_PT 0x%08x\n", gpuget); - assert(0); - } - data = nvchan->pushbuf[get++]; - - if (mthd_count) { - NOUVEAU_MSG("0x%08x 0x%08x\n", gpuget, data); - mthd_count--; - continue; - } - - switch (data & 0x60000000) { - case 0x00000000: - mthd_count = (data >> 18) & 0x7ff; - NOUVEAU_MSG("0x%08x 0x%08x MTHD " - "Sc %d Mthd 0x%04x Size %d\n", - gpuget, data, (data>>13) & 7, data & 0x1ffc, - mthd_count); - break; - case 0x20000000: - get = (data & 0x1ffffffc) >> 2; - NOUVEAU_MSG("0x%08x 0x%08x JUMP 0x%08x\n", - gpuget, data, data & 0x1ffffffc); - continue; - case 0x40000000: - mthd_count = (data >> 18) & 0x7ff; - NOUVEAU_MSG("0x%08x 0x%08x NINC " - "Sc %d Mthd 0x%04x Size %d\n", - gpuget, data, (data>>13) & 7, data & 0x1ffc, - mthd_count); - break; - case 0x60000000: - /* DMA_OPCODE_CALL apparently, doesn't seem to work on - * my NV40 at least.. - */ - /* fall-through */ - default: - NOUVEAU_MSG("DMA_PUSHER 0x%08x 0x%08x\n", - gpuget, data); - assert(0); - } - } -} -#endif - -void -nouveau_dma_kickoff(struct nouveau_channel *chan) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - - if (dma->cur == dma->put) - return; - -#ifdef NOUVEAU_DMA_DEBUG - if (dma->push_free) { - NOUVEAU_ERR("Packet incomplete: %d left\n", dma->push_free); - return; - } -#endif - -#ifdef NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF - nouveau_dma_parse_pushbuf(chan, dma->put, dma->cur); -#endif - - WRITE_PUT(nvchan, dma->cur); -} diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.h deleted file mode 100644 index cfa6d26e828..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_dma.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_DMA_H__ -#define __NOUVEAU_DMA_H__ - -#include <string.h> -#include "nouveau_drmif.h" -#include "nouveau_local.h" - -#define RING_SKIPS 8 - -extern int nouveau_dma_wait(struct nouveau_channel *chan, int size); -extern void nouveau_dma_subc_bind(struct nouveau_grobj *); -extern void nouveau_dma_channel_init(struct nouveau_channel *); -extern void nouveau_dma_kickoff(struct nouveau_channel *); - -#ifdef NOUVEAU_DMA_DEBUG -static char faulty[1024]; -#endif - -static inline void -nouveau_dma_out(struct nouveau_channel *chan, uint32_t data) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - -#ifdef NOUVEAU_DMA_DEBUG - if (dma->push_free == 0) { - NOUVEAU_ERR("No space left in packet at %s\n", faulty); - return; - } - dma->push_free--; -#endif -#ifdef NOUVEAU_DMA_TRACE - { - uint32_t offset = (dma->cur << 2) + dma->base; - NOUVEAU_MSG("\tOUT_RING %d/0x%08x -> 0x%08x\n", - nvchan->drm.channel, offset, data); - } -#endif - nvchan->pushbuf[dma->cur + (dma->base - nvchan->drm.put_base)/4] = data; - dma->cur++; -} - -static inline void -nouveau_dma_outp(struct nouveau_channel *chan, uint32_t *ptr, int size) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - (void)dma; - -#ifdef NOUVEAU_DMA_DEBUG - if (dma->push_free < size) { - NOUVEAU_ERR("Packet too small. Free=%d, Need=%d\n", - dma->push_free, size); - return; - } -#endif -#ifdef NOUVEAU_DMA_TRACE - while (size--) { - nouveau_dma_out(chan, *ptr); - ptr++; - } -#else - memcpy(&nvchan->pushbuf[dma->cur], ptr, size << 2); -#ifdef NOUVEAU_DMA_DEBUG - dma->push_free -= size; -#endif - dma->cur += size; -#endif -} - -static inline void -nouveau_dma_space(struct nouveau_channel *chan, int size) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - - if (dma->free < size) { - if (nouveau_dma_wait(chan, size) && chan->hang_notify) - chan->hang_notify(chan); - } - dma->free -= size; -#ifdef NOUVEAU_DMA_DEBUG - dma->push_free = size; -#endif -} - -static inline void -nouveau_dma_begin(struct nouveau_channel *chan, struct nouveau_grobj *grobj, - int method, int size, const char* file, int line) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - (void)dma; - -#ifdef NOUVEAU_DMA_TRACE - NOUVEAU_MSG("BEGIN_RING %d/%08x/%d/0x%04x/%d\n", nvchan->drm.channel, - grobj->handle, grobj->subc, method, size); -#endif - -#ifdef NOUVEAU_DMA_DEBUG - if (dma->push_free) { - NOUVEAU_ERR("Previous packet incomplete: %d left at %s\n", - dma->push_free, faulty); - return; - } - sprintf(faulty,"%s:%d",file,line); -#endif - - nouveau_dma_space(chan, (size + 1)); - nouveau_dma_out(chan, (size << 18) | (grobj->subc << 13) | method); -} - -#define RING_SPACE_CH(ch,sz) nouveau_dma_space((ch), (sz)) -#define BEGIN_RING_CH(ch,gr,m,sz) nouveau_dma_begin((ch), (gr), (m), (sz), __FUNCTION__, __LINE__ ) -#define OUT_RING_CH(ch, data) nouveau_dma_out((ch), (data)) -#define OUT_RINGp_CH(ch,ptr,dwords) nouveau_dma_outp((ch), (void*)(ptr), \ - (dwords)) -#define FIRE_RING_CH(ch) nouveau_dma_kickoff((ch)) -#define WAIT_RING_CH(ch,sz) nouveau_dma_wait((ch), (sz)) - -#endif diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_dri.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_dri.h deleted file mode 100644 index 1207c2d609c..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_dri.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _NOUVEAU_DRI_ -#define _NOUVEAU_DRI_ - -#include "xf86drm.h" -#include "drm.h" -#include "nouveau_drm.h" - -struct nouveau_dri { - uint32_t device_id; /**< \brief PCI device ID */ - uint32_t width; /**< \brief width in pixels of display */ - uint32_t height; /**< \brief height in scanlines of display */ - uint32_t depth; /**< \brief depth of display (8, 15, 16, 24) */ - uint32_t bpp; /**< \brief bit depth of display (8, 16, 24, 32) */ - - uint32_t bus_type; /**< \brief ths bus type */ - uint32_t bus_mode; /**< \brief bus mode (used for AGP, maybe also for PCI-E ?) */ - - uint32_t front_offset; /**< \brief front buffer offset */ - uint32_t front_pitch; /**< \brief front buffer pitch */ - uint32_t back_offset; /**< \brief private back buffer offset */ - uint32_t back_pitch; /**< \brief private back buffer pitch */ - uint32_t depth_offset; /**< \brief private depth buffer offset */ - uint32_t depth_pitch; /**< \brief private depth buffer pitch */ - -}; - -#endif - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_drmif.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_drmif.h deleted file mode 100644 index 67e19f1cfe8..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_drmif.h +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_DRMIF_H__ -#define __NOUVEAU_DRMIF_H__ - -#include <stdint.h> -#include <xf86drm.h> -#include <nouveau_drm.h> - -#include "nouveau_device.h" -#include "pipe/nouveau/nouveau_channel.h" -#include "pipe/nouveau/nouveau_grobj.h" -#include "pipe/nouveau/nouveau_notifier.h" -#include "pipe/nouveau/nouveau_bo.h" -#include "pipe/nouveau/nouveau_resource.h" -#include "pipe/nouveau/nouveau_pushbuf.h" - -struct nouveau_device_priv { - struct nouveau_device base; - - int fd; - drm_context_t ctx; - drmLock *lock; - int needs_close; - - struct drm_nouveau_mem_alloc sa; - void *sa_map; - struct nouveau_resource *sa_heap; -}; -#define nouveau_device(n) ((struct nouveau_device_priv *)(n)) - -extern int -nouveau_device_open_existing(struct nouveau_device **, int close, - int fd, drm_context_t ctx); - -extern int -nouveau_device_open(struct nouveau_device **, const char *busid); - -extern void -nouveau_device_close(struct nouveau_device **); - -extern int -nouveau_device_get_param(struct nouveau_device *, uint64_t param, uint64_t *v); - -extern int -nouveau_device_set_param(struct nouveau_device *, uint64_t param, uint64_t val); - -struct nouveau_fence { - struct nouveau_channel *channel; -}; - -struct nouveau_fence_cb { - struct nouveau_fence_cb *next; - void (*func)(void *); - void *priv; -}; - -struct nouveau_fence_priv { - struct nouveau_fence base; - int refcount; - - struct nouveau_fence *next; - struct nouveau_fence_cb *signal_cb; - - uint32_t sequence; - int emitted; - int signalled; -}; -#define nouveau_fence(n) ((struct nouveau_fence_priv *)(n)) - -extern int -nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **); - -extern int -nouveau_fence_ref(struct nouveau_fence *, struct nouveau_fence **); - -extern int -nouveau_fence_signal_cb(struct nouveau_fence *, void (*)(void *), void *); - -extern void -nouveau_fence_emit(struct nouveau_fence *); - -extern int -nouveau_fence_wait(struct nouveau_fence **); - -extern void -nouveau_fence_flush(struct nouveau_channel *); - -struct nouveau_pushbuf_reloc { - uint64_t next; - uint64_t handle; - uint32_t *ptr; - uint32_t flags; - uint32_t data; - uint32_t vor; - uint32_t tor; -}; - -struct nouveau_pushbuf_bo { - uint64_t next; - uint64_t handle; - uint64_t flags; - uint64_t relocs; - int nr_relocs; -}; - -struct nouveau_pushbuf_priv { - struct nouveau_pushbuf base; - - unsigned nop_jump; - unsigned start; - unsigned size; - - uint64_t buffers; - int nr_buffers; -}; -#define nouveau_pushbuf(n) ((struct nouveau_pushbuf_priv *)(n)) - -#define pbbo_to_ptr(o) ((uint64_t)(unsigned long)(o)) -#define ptr_to_pbbo(h) ((struct nouveau_pushbuf_bo *)(unsigned long)(h)) -#define pbrel_to_ptr(o) ((uint64_t)(unsigned long)(o)) -#define ptr_to_pbrel(h) ((struct nouveau_pushbuf_reloc *)(unsigned long)(h)) -#define bo_to_ptr(o) ((uint64_t)(unsigned long)(o)) -#define ptr_to_bo(h) ((struct nouveau_bo_priv *)(unsigned long)(h)) - -extern int -nouveau_pushbuf_init(struct nouveau_channel *); - -extern int -nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); - -extern int -nouveau_pushbuf_emit_reloc(struct nouveau_channel *, void *ptr, - struct nouveau_bo *, uint32_t data, uint32_t flags, - uint32_t vor, uint32_t tor); - -struct nouveau_dma_priv { - uint32_t base; - uint32_t max; - uint32_t cur; - uint32_t put; - uint32_t free; - - int push_free; -} dma; - -struct nouveau_channel_priv { - struct nouveau_channel base; - - struct drm_nouveau_channel_alloc drm; - - uint32_t *pushbuf; - void *notifier_block; - - volatile uint32_t *user; - volatile uint32_t *put; - volatile uint32_t *get; - volatile uint32_t *ref_cnt; - - struct nouveau_dma_priv dma_master; - struct nouveau_dma_priv dma_bufmgr; - struct nouveau_dma_priv *dma; - - struct nouveau_fence *fence_head; - struct nouveau_fence *fence_tail; - uint32_t fence_sequence; - - struct nouveau_pushbuf_priv pb; - - unsigned user_charge; -}; -#define nouveau_channel(n) ((struct nouveau_channel_priv *)(n)) - -extern int -nouveau_channel_alloc(struct nouveau_device *, uint32_t fb, uint32_t tt, - struct nouveau_channel **); - -extern void -nouveau_channel_free(struct nouveau_channel **); - -struct nouveau_grobj_priv { - struct nouveau_grobj base; -}; -#define nouveau_grobj(n) ((struct nouveau_grobj_priv *)(n)) - -extern int nouveau_grobj_alloc(struct nouveau_channel *, uint32_t handle, - int class, struct nouveau_grobj **); -extern int nouveau_grobj_ref(struct nouveau_channel *, uint32_t handle, - struct nouveau_grobj **); -extern void nouveau_grobj_free(struct nouveau_grobj **); - - -struct nouveau_notifier_priv { - struct nouveau_notifier base; - - struct drm_nouveau_notifierobj_alloc drm; - volatile void *map; -}; -#define nouveau_notifier(n) ((struct nouveau_notifier_priv *)(n)) - -extern int -nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle, int count, - struct nouveau_notifier **); - -extern void -nouveau_notifier_free(struct nouveau_notifier **); - -extern void -nouveau_notifier_reset(struct nouveau_notifier *, int id); - -extern uint32_t -nouveau_notifier_status(struct nouveau_notifier *, int id); - -extern uint32_t -nouveau_notifier_return_val(struct nouveau_notifier *, int id); - -extern int -nouveau_notifier_wait_status(struct nouveau_notifier *, int id, int status, - int timeout); - -struct nouveau_bo_priv { - struct nouveau_bo base; - - struct nouveau_fence *fence; - struct nouveau_fence *wr_fence; - - struct drm_nouveau_mem_alloc drm; - void *map; - - void *sysmem; - int user; - - int refcount; - - uint64_t offset; - uint64_t flags; -}; -#define nouveau_bo(n) ((struct nouveau_bo_priv *)(n)) - -extern int -nouveau_bo_init(struct nouveau_device *); - -extern void -nouveau_bo_takedown(struct nouveau_device *); - -extern int -nouveau_bo_new(struct nouveau_device *, uint32_t flags, int align, int size, - struct nouveau_bo **); - -extern int -nouveau_bo_user(struct nouveau_device *, void *ptr, int size, - struct nouveau_bo **); - -extern int -nouveau_bo_ref(struct nouveau_device *, uint64_t handle, struct nouveau_bo **); - -extern int -nouveau_bo_set_status(struct nouveau_bo *, uint32_t flags); - -extern void -nouveau_bo_del(struct nouveau_bo **); - -extern int -nouveau_bo_map(struct nouveau_bo *, uint32_t flags); - -extern void -nouveau_bo_unmap(struct nouveau_bo *); - -extern int -nouveau_bo_validate(struct nouveau_channel *, struct nouveau_bo *, - struct nouveau_fence *fence, uint32_t flags); - -extern int -nouveau_resource_init(struct nouveau_resource **heap, unsigned start, - unsigned size); - -extern int -nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, - struct nouveau_resource **); - -extern void -nouveau_resource_free(struct nouveau_resource **); - -#endif diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_fence.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_fence.c deleted file mode 100644 index 7714e6f2485..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_fence.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> -#include <assert.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" -#include "nouveau_local.h" - -static void -nouveau_fence_del_unsignalled(struct nouveau_fence *fence) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(fence->channel); - struct nouveau_fence *le; - - if (nvchan->fence_head == fence) { - nvchan->fence_head = nouveau_fence(fence)->next; - if (nvchan->fence_head == NULL) - nvchan->fence_tail = NULL; - return; - } - - le = nvchan->fence_head; - while (le && nouveau_fence(le)->next != fence) - le = nouveau_fence(le)->next; - assert(le && nouveau_fence(le)->next == fence); - nouveau_fence(le)->next = nouveau_fence(fence)->next; - if (nvchan->fence_tail == fence) - nvchan->fence_tail = le; -} - -static void -nouveau_fence_del(struct nouveau_fence **fence) -{ - struct nouveau_fence_priv *nvfence; - - if (!fence || !*fence) - return; - nvfence = nouveau_fence(*fence); - *fence = NULL; - - if (--nvfence->refcount) - return; - - if (nvfence->emitted && !nvfence->signalled) { - if (nvfence->signal_cb) { - nvfence->refcount++; - nouveau_fence_wait((void *)&nvfence); - return; - } - - nouveau_fence_del_unsignalled(&nvfence->base); - } - free(nvfence); -} - -int -nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **fence) -{ - struct nouveau_fence_priv *nvfence; - - if (!chan || !fence || *fence) - return -EINVAL; - - nvfence = calloc(1, sizeof(struct nouveau_fence_priv)); - if (!nvfence) - return -ENOMEM; - nvfence->base.channel = chan; - nvfence->refcount = 1; - - *fence = &nvfence->base; - return 0; -} - -int -nouveau_fence_ref(struct nouveau_fence *ref, struct nouveau_fence **fence) -{ - struct nouveau_fence_priv *nvfence; - - if (!fence) - return -EINVAL; - - if (*fence) { - nouveau_fence_del(fence); - *fence = NULL; - } - - if (ref) { - nvfence = nouveau_fence(ref); - nvfence->refcount++; - *fence = &nvfence->base; - } - - return 0; -} - -int -nouveau_fence_signal_cb(struct nouveau_fence *fence, void (*func)(void *), - void *priv) -{ - struct nouveau_fence_priv *nvfence = nouveau_fence(fence); - struct nouveau_fence_cb *cb; - - if (!nvfence || !func) - return -EINVAL; - - cb = malloc(sizeof(struct nouveau_fence_cb)); - if (!cb) - return -ENOMEM; - - cb->func = func; - cb->priv = priv; - cb->next = nvfence->signal_cb; - nvfence->signal_cb = cb; - return 0; -} - -void -nouveau_fence_emit(struct nouveau_fence *fence) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(fence->channel); - struct nouveau_fence_priv *nvfence = nouveau_fence(fence); - - nvfence->emitted = 1; - nvfence->sequence = ++nvchan->fence_sequence; - if (nvfence->sequence == 0xffffffff) - NOUVEAU_ERR("AII wrap unhandled\n"); - - /*XXX: assumes subc 0 is populated */ - RING_SPACE_CH(fence->channel, 2); - OUT_RING_CH (fence->channel, 0x00040050); - OUT_RING_CH (fence->channel, nvfence->sequence); - - if (nvchan->fence_tail) { - nouveau_fence(nvchan->fence_tail)->next = fence; - } else { - nvchan->fence_head = fence; - } - nvchan->fence_tail = fence; -} - -void -nouveau_fence_flush(struct nouveau_channel *chan) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - uint32_t sequence = *nvchan->ref_cnt; - - while (nvchan->fence_head) { - struct nouveau_fence_priv *nvfence; - - nvfence = nouveau_fence(nvchan->fence_head); - if (nvfence->sequence > sequence) - break; - - nouveau_fence_del_unsignalled(&nvfence->base); - nvfence->signalled = 1; - - if (nvfence->signal_cb) { - struct nouveau_fence *fence = NULL; - - nouveau_fence_ref(nvchan->fence_head, &fence); - - while (nvfence->signal_cb) { - struct nouveau_fence_cb *cb; - - cb = nvfence->signal_cb; - nvfence->signal_cb = cb->next; - cb->func(cb->priv); - free(cb); - } - - nouveau_fence_ref(NULL, &fence); - } - } -} - -int -nouveau_fence_wait(struct nouveau_fence **fence) -{ - struct nouveau_fence_priv *nvfence; - - if (!fence || !*fence) - return -EINVAL; - nvfence = nouveau_fence(*fence); - - if (nvfence->emitted) { - while (!nvfence->signalled) - nouveau_fence_flush(nvfence->base.channel); - } - nouveau_fence_ref(NULL, fence); - - return 0; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_grobj.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_grobj.c deleted file mode 100644 index 55dfeb99aa7..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_grobj.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> - -#include "nouveau_drmif.h" - -int -nouveau_grobj_alloc(struct nouveau_channel *chan, uint32_t handle, - int class, struct nouveau_grobj **grobj) -{ - struct nouveau_device_priv *nvdev = nouveau_device(chan->device); - struct nouveau_grobj_priv *nvgrobj; - struct drm_nouveau_grobj_alloc g; - int ret; - - if (!nvdev || !grobj || *grobj) - return -EINVAL; - - nvgrobj = calloc(1, sizeof(*nvgrobj)); - if (!nvgrobj) - return -ENOMEM; - nvgrobj->base.channel = chan; - nvgrobj->base.handle = handle; - nvgrobj->base.grclass = class; - - g.channel = chan->id; - g.handle = handle; - g.class = class; - ret = drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GROBJ_ALLOC, - &g, sizeof(g)); - if (ret) { - nouveau_grobj_free((void *)&grobj); - return ret; - } - - *grobj = &nvgrobj->base; - return 0; -} - -int -nouveau_grobj_ref(struct nouveau_channel *chan, uint32_t handle, - struct nouveau_grobj **grobj) -{ - struct nouveau_grobj_priv *nvgrobj; - - if (!chan || !grobj || *grobj) - return -EINVAL; - - nvgrobj = calloc(1, sizeof(struct nouveau_grobj_priv)); - if (!nvgrobj) - return -ENOMEM; - nvgrobj->base.channel = chan; - nvgrobj->base.handle = handle; - nvgrobj->base.grclass = 0; - - *grobj = &nvgrobj->base; - return 0; -} - -void -nouveau_grobj_free(struct nouveau_grobj **grobj) -{ - struct nouveau_device_priv *nvdev; - struct nouveau_channel_priv *chan; - struct nouveau_grobj_priv *nvgrobj; - - if (!grobj || !*grobj) - return; - nvgrobj = nouveau_grobj(*grobj); - *grobj = NULL; - - - chan = nouveau_channel(nvgrobj->base.channel); - nvdev = nouveau_device(chan->base.device); - - if (nvgrobj->base.grclass) { - struct drm_nouveau_gpuobj_free f; - - f.channel = chan->drm.channel; - f.handle = nvgrobj->base.handle; - drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, - &f, sizeof(f)); - } - free(nvgrobj); -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_local.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_local.h deleted file mode 100644 index 59febca2929..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_local.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef __NOUVEAU_LOCAL_H__ -#define __NOUVEAU_LOCAL_H__ - -#include <stdio.h> - -/* Debug output */ -#define NOUVEAU_MSG(fmt, args...) do { \ - fprintf(stdout, "nouveau: "fmt, ##args); \ - fflush(stdout); \ -} while(0) - -#define NOUVEAU_ERR(fmt, args...) do { \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); \ - fflush(stderr); \ -} while(0) - -#define NOUVEAU_TIME_MSEC() 0 - -/* User FIFO control */ -//#define NOUVEAU_DMA_TRACE -//#define NOUVEAU_DMA_DEBUG -//#define NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF -#define NOUVEAU_DMA_BARRIER -#define NOUVEAU_DMA_TIMEOUT 2000 - -/* Push buffer access macros */ -#define OUT_RING(data) do { \ - (*nv->channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - memcpy(nv->channel->pushbuf->cur, (src), (size)<<2); \ - nv->channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define FIRE_RING() do { \ - nouveau_pushbuf_flush(nv->channel, 0); \ -} while(0) - -#define BEGIN_RING_GR(obj,mthd,size) do { \ - if (nv->channel->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(nv->channel, ((size) + 1)); \ - OUT_RING(((obj)->subc << 13) | ((size) << 18) | (mthd)); \ - nv->channel->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - BEGIN_RING_GR(nv->obj, (mthd), (size)); \ -} while(0) - -#define BIND_RING(o,s) do { \ - nv->o->subc = (s); \ - BEGIN_RING(o, 0x0000, 1); \ - OUT_RING (nv->o->handle); \ -} while(0) - -#define OUT_RELOC(buf,data,flags,vor,tor) do { \ - nouveau_pipe_emit_reloc(nv->channel, nv->channel->pushbuf->cur++, \ - buf, (data), (flags), (vor), (tor)); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - nv->channel->vram->handle, nv->channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -#endif diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_lock.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_lock.c deleted file mode 100644 index 9adb9ac8547..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_lock.c +++ /dev/null @@ -1,94 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "glapi/glthread.h" -#include <GL/internal/glcore.h> - -#include "nouveau_context.h" -#include "nouveau_screen.h" - -_glthread_DECLARE_STATIC_MUTEX( lockMutex ); - -static void -nouveau_contended_lock(struct nouveau_context *nv, GLuint flags) -{ - __DRIdrawablePrivate *dPriv = nv->dri_drawable; - __DRIscreenPrivate *sPriv = nv->dri_screen; - struct nouveau_screen *nv_screen = nv->nv_screen; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - drmGetLock(nvdev->fd, nvdev->ctx, flags); - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: - */ - if (dPriv) - DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); -} - -/* Lock the hardware and validate our state. - */ -void -LOCK_HARDWARE(struct nouveau_context *nv) -{ - struct nouveau_screen *nv_screen = nv->nv_screen; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - char __ret=0; - - _glthread_LOCK_MUTEX(lockMutex); - assert(!nv->locked); - - DRM_CAS(nvdev->lock, nvdev->ctx, - (DRM_LOCK_HELD | nvdev->ctx), __ret); - - if (__ret) - nouveau_contended_lock(nv, 0); - nv->locked = GL_TRUE; -} - - - /* Unlock the hardware using the global current context - */ -void -UNLOCK_HARDWARE(struct nouveau_context *nv) -{ - struct nouveau_screen *nv_screen = nv->nv_screen; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - assert(nv->locked); - nv->locked = GL_FALSE; - - DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); - - _glthread_UNLOCK_MUTEX(lockMutex); -} diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_notifier.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_notifier.c deleted file mode 100644 index 01e8f38440e..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_notifier.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> - -#include "nouveau_drmif.h" -#include "nouveau_local.h" - -#define NOTIFIER(__v) \ - struct nouveau_notifier_priv *nvnotify = nouveau_notifier(notifier); \ - volatile uint32_t *__v = (void*)nvnotify->map + (id * 32) - -int -nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle, - int count, struct nouveau_notifier **notifier) -{ - struct nouveau_notifier_priv *nvnotify; - int ret; - - if (!chan || !notifier || *notifier) - return -EINVAL; - - nvnotify = calloc(1, sizeof(struct nouveau_notifier_priv)); - if (!nvnotify) - return -ENOMEM; - nvnotify->base.channel = chan; - nvnotify->base.handle = handle; - - nvnotify->drm.channel = chan->id; - nvnotify->drm.handle = handle; - nvnotify->drm.count = count; - if ((ret = drmCommandWriteRead(nouveau_device(chan->device)->fd, - DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, - &nvnotify->drm, - sizeof(nvnotify->drm)))) { - nouveau_notifier_free((void *)&nvnotify); - return ret; - } - - nvnotify->map = (void *)nouveau_channel(chan)->notifier_block + - nvnotify->drm.offset; - *notifier = &nvnotify->base; - return 0; -} - -void -nouveau_notifier_free(struct nouveau_notifier **notifier) -{ - - struct nouveau_notifier_priv *nvnotify; - struct nouveau_channel_priv *nvchan; - struct nouveau_device_priv *nvdev; - struct drm_nouveau_gpuobj_free f; - - if (!notifier || !*notifier) - return; - nvnotify = nouveau_notifier(*notifier); - *notifier = NULL; - - nvchan = nouveau_channel(nvnotify->base.channel); - nvdev = nouveau_device(nvchan->base.device); - - f.channel = nvchan->drm.channel; - f.handle = nvnotify->base.handle; - drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, &f, sizeof(f)); - free(nvnotify); -} - -void -nouveau_notifier_reset(struct nouveau_notifier *notifier, int id) -{ - NOTIFIER(n); - - n[NV_NOTIFY_TIME_0 /4] = 0x00000000; - n[NV_NOTIFY_TIME_1 /4] = 0x00000000; - n[NV_NOTIFY_RETURN_VALUE/4] = 0x00000000; - n[NV_NOTIFY_STATE /4] = (NV_NOTIFY_STATE_STATUS_IN_PROCESS << - NV_NOTIFY_STATE_STATUS_SHIFT); -} - -uint32_t -nouveau_notifier_status(struct nouveau_notifier *notifier, int id) -{ - NOTIFIER(n); - - return n[NV_NOTIFY_STATE/4] >> NV_NOTIFY_STATE_STATUS_SHIFT; -} - -uint32_t -nouveau_notifier_return_val(struct nouveau_notifier *notifier, int id) -{ - NOTIFIER(n); - - return n[NV_NOTIFY_RETURN_VALUE/4]; -} - -int -nouveau_notifier_wait_status(struct nouveau_notifier *notifier, int id, - int status, int timeout) -{ - NOTIFIER(n); - uint32_t time = 0, t_start = NOUVEAU_TIME_MSEC(); - - while (time <= timeout) { - uint32_t v; - - v = n[NV_NOTIFY_STATE/4] >> NV_NOTIFY_STATE_STATUS_SHIFT; - if (v == status) - return 0; - - if (timeout) - time = NOUVEAU_TIME_MSEC() - t_start; - } - - return -EBUSY; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_pushbuf.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_pushbuf.c deleted file mode 100644 index 7d5eddb92ff..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_pushbuf.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> -#include <assert.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" - -#define PB_BUFMGR_DWORDS (4096 / 2) -#define PB_MIN_USER_DWORDS 2048 - -static int -nouveau_pushbuf_space(struct nouveau_channel *chan, unsigned min) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; - - assert((min + 1) <= nvchan->dma->max); - - /* Wait for enough space in push buffer */ - min = min < PB_MIN_USER_DWORDS ? PB_MIN_USER_DWORDS : min; - min += 1; /* a bit extra for the NOP */ - if (nvchan->dma->free < min) - WAIT_RING_CH(chan, min); - - /* Insert NOP, may turn into a jump later */ - RING_SPACE_CH(chan, 1); - nvpb->nop_jump = nvchan->dma->cur; - OUT_RING_CH(chan, 0); - - /* Any remaining space is available to the user */ - nvpb->start = nvchan->dma->cur; - nvpb->size = nvchan->dma->free; - nvpb->base.channel = chan; - nvpb->base.remaining = nvpb->size; - nvpb->base.cur = &nvchan->pushbuf[nvpb->start]; - - return 0; -} - -int -nouveau_pushbuf_init(struct nouveau_channel *chan) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *m = &nvchan->dma_master; - struct nouveau_dma_priv *b = &nvchan->dma_bufmgr; - int i; - - if (!nvchan) - return -EINVAL; - - /* Reassign last bit of push buffer for a "separate" bufmgr - * ring buffer - */ - m->max -= PB_BUFMGR_DWORDS; - m->free -= PB_BUFMGR_DWORDS; - - b->base = m->base + ((m->max + 2) << 2); - b->max = PB_BUFMGR_DWORDS - 2; - b->cur = b->put = 0; - b->free = b->max - b->cur; - - /* Some NOPs just to be safe - *XXX: RING_SKIPS - */ - nvchan->dma = b; - RING_SPACE_CH(chan, 8); - for (i = 0; i < 8; i++) - OUT_RING_CH(chan, 0); - nvchan->dma = m; - - nouveau_pushbuf_space(chan, 0); - chan->pushbuf = &nvchan->pb.base; - - return 0; -} - -static uint32_t -nouveau_pushbuf_calc_reloc(struct nouveau_bo *bo, - struct nouveau_pushbuf_reloc *r) -{ - uint32_t push; - - if (r->flags & NOUVEAU_BO_LOW) { - push = bo->offset + r->data; - } else - if (r->flags & NOUVEAU_BO_HIGH) { - push = (bo->offset + r->data) >> 32; - } else { - push = r->data; - } - - if (r->flags & NOUVEAU_BO_OR) { - if (bo->flags & NOUVEAU_BO_VRAM) - push |= r->vor; - else - push |= r->tor; - } - - return push; -} - -/* This would be our TTM "superioctl" */ -int -nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; - struct nouveau_pushbuf_bo *pbbo; - struct nouveau_fence *fence = NULL; - int ret; - - if (nvpb->base.remaining == nvpb->size) - return 0; - - nvpb->size -= nvpb->base.remaining; - nvchan->dma->cur += nvpb->size; - nvchan->dma->free -= nvpb->size; - assert(nvchan->dma->cur <= nvchan->dma->max); - - ret = nouveau_fence_new(chan, &fence); - if (ret) - return ret; - - nvchan->dma = &nvchan->dma_bufmgr; - nvchan->pushbuf[nvpb->nop_jump] = 0x20000000 | - (nvchan->dma->base + (nvchan->dma->cur << 2)); - - /* Validate buffers + apply relocations */ - nvchan->user_charge = 0; - while ((pbbo = ptr_to_pbbo(nvpb->buffers))) { - struct nouveau_pushbuf_reloc *r; - struct nouveau_bo *bo = &ptr_to_bo(pbbo->handle)->base; - - ret = nouveau_bo_validate(chan, bo, fence, pbbo->flags); - assert (ret == 0); - - if (bo->offset == nouveau_bo(bo)->offset && - bo->flags == nouveau_bo(bo)->flags) { - while ((r = ptr_to_pbrel(pbbo->relocs))) { - pbbo->relocs = r->next; - free(r); - } - - nvpb->buffers = pbbo->next; - free(pbbo); - continue; - } - bo->offset = nouveau_bo(bo)->offset; - bo->flags = nouveau_bo(bo)->flags; - - while ((r = ptr_to_pbrel(pbbo->relocs))) { - *r->ptr = nouveau_pushbuf_calc_reloc(bo, r); - pbbo->relocs = r->next; - free(r); - } - - nvpb->buffers = pbbo->next; - free(pbbo); - } - nvpb->nr_buffers = 0; - - /* Switch back to user's ring */ - RING_SPACE_CH(chan, 1); - OUT_RING_CH(chan, 0x20000000 | ((nvpb->start << 2) + - nvchan->dma_master.base)); - nvchan->dma = &nvchan->dma_master; - - /* Fence + kickoff */ - nouveau_fence_emit(fence); - FIRE_RING_CH(chan); - nouveau_fence_ref(NULL, &fence); - - /* Allocate space for next push buffer */ - assert(!nouveau_pushbuf_space(chan, min)); - - return 0; -} - -static struct nouveau_pushbuf_bo * -nouveau_pushbuf_emit_buffer(struct nouveau_channel *chan, struct nouveau_bo *bo) -{ - struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(chan->pushbuf); - struct nouveau_pushbuf_bo *pbbo = ptr_to_pbbo(nvpb->buffers); - - while (pbbo) { - if (pbbo->handle == bo->handle) - return pbbo; - pbbo = ptr_to_pbbo(pbbo->next); - } - - pbbo = malloc(sizeof(struct nouveau_pushbuf_bo)); - pbbo->next = nvpb->buffers; - nvpb->buffers = pbbo_to_ptr(pbbo); - nvpb->nr_buffers++; - - pbbo->handle = bo_to_ptr(bo); - pbbo->flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART; - pbbo->relocs = 0; - pbbo->nr_relocs = 0; - return pbbo; -} - -int -nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr, - struct nouveau_bo *bo, uint32_t data, uint32_t flags, - uint32_t vor, uint32_t tor) -{ - struct nouveau_pushbuf_bo *pbbo; - struct nouveau_pushbuf_reloc *r; - - if (!chan) - return -EINVAL; - - pbbo = nouveau_pushbuf_emit_buffer(chan, bo); - if (!pbbo) - return -EFAULT; - - r = malloc(sizeof(struct nouveau_pushbuf_reloc)); - r->next = pbbo->relocs; - pbbo->relocs = pbrel_to_ptr(r); - pbbo->nr_relocs++; - - pbbo->flags |= (flags & NOUVEAU_BO_RDWR); - pbbo->flags &= (flags | NOUVEAU_BO_RDWR); - - r->handle = bo_to_ptr(r); - r->ptr = ptr; - r->flags = flags; - r->data = data; - r->vor = vor; - r->tor = tor; - - if (flags & NOUVEAU_BO_DUMMY) - *(uint32_t *)ptr = 0; - else - *(uint32_t *)ptr = nouveau_pushbuf_calc_reloc(bo, r); - return 0; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_resource.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_resource.c deleted file mode 100644 index 5d9d578b4fe..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_resource.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> - -#include "nouveau_drmif.h" -#include "nouveau_local.h" - -int -nouveau_resource_init(struct nouveau_resource **heap, - unsigned start, unsigned size) -{ - struct nouveau_resource *r; - - r = calloc(1, sizeof(struct nouveau_resource)); - if (!r) - return 1; - - r->start = start; - r->size = size; - *heap = r; - return 0; -} - -int -nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, - struct nouveau_resource **res) -{ - struct nouveau_resource *r; - - if (!heap || !size || !res || *res) - return 1; - - while (heap) { - if (!heap->in_use && heap->size >= size) { - r = calloc(1, sizeof(struct nouveau_resource)); - if (!r) - return 1; - - r->start = (heap->start + heap->size) - size; - r->size = size; - r->in_use = 1; - r->priv = priv; - - heap->size -= size; - - r->next = heap->next; - if (heap->next) - heap->next->prev = r; - r->prev = heap; - heap->next = r; - - *res = r; - return 0; - } - - heap = heap->next; - } - - return 1; -} - -void -nouveau_resource_free(struct nouveau_resource **res) -{ - struct nouveau_resource *r; - - if (!res || !*res) - return; - r = *res; - - if (r->prev && !r->prev->in_use) { - r->prev->next = r->next; - if (r->next) - r->next->prev = r->prev; - r->prev->size += r->size; - free(r); - } else - if (r->next && !r->next->in_use) { - r->next->prev = r->prev; - if (r->prev) - r->prev->next = r->next; - r->next->size += r->size; - r->next->start = r->start; - free(r); - } else { - r->in_use = 0; - } - - *res = NULL; -} diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_screen.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_screen.c deleted file mode 100644 index f06e1784831..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_screen.c +++ /dev/null @@ -1,308 +0,0 @@ -#include "utils.h" -#include "vblank.h" -#include "xmlpool.h" - -#include "pipe/p_context.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_cb_fbo.h" - -#include "nouveau_context.h" -#include "nouveau_device.h" -#include "nouveau_drm.h" -#include "nouveau_dri.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" - -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 10 -#error nouveau_drm.h version does not match expected version -#endif - -/* Extension stuff, enabling of extensions handled by Gallium's GL state - * tracker. But, we still need to define the entry points we want. - */ -#define need_GL_ARB_fragment_program -#define need_GL_ARB_multisample -#define need_GL_ARB_occlusion_query -#define need_GL_ARB_point_parameters -#define need_GL_ARB_shader_objects -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_program -#define need_GL_ARB_vertex_shader -#define need_GL_ARB_vertex_buffer_object -#define need_GL_EXT_compiled_vertex_array -#define need_GL_EXT_fog_coord -#define need_GL_EXT_secondary_color -#define need_GL_EXT_framebuffer_object -#define need_GL_VERSION_2_0 -#define need_GL_VERSION_2_1 -#include "extension_helper.h" - -const struct dri_extension card_extensions[] = -{ - { "GL_ARB_multisample", GL_ARB_multisample_functions }, - { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, - { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, - { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, - { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, - { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, - { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, - { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, - { "GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions }, - { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, - { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, - { NULL, 0 } -}; - -PUBLIC const char __driConfigOptions[] = -DRI_CONF_BEGIN -DRI_CONF_END; -static const GLuint __driNConfigOptions = 0; - -extern const struct dri_extension common_extensions[]; -extern const struct dri_extension nv40_extensions[]; - -static GLboolean -nouveau_screen_create(__DRIscreenPrivate *driScrnPriv) -{ - struct nouveau_dri *nv_dri = driScrnPriv->pDevPriv; - struct nouveau_screen *nv_screen; - int ret; - - if (driScrnPriv->devPrivSize != sizeof(struct nouveau_dri)) { - NOUVEAU_ERR("DRI struct mismatch between DDX/DRI\n"); - return GL_FALSE; - } - - nv_screen = CALLOC_STRUCT(nouveau_screen); - if (!nv_screen) - return GL_FALSE; - nv_screen->driScrnPriv = driScrnPriv; - driScrnPriv->private = (void *)nv_screen; - - driParseOptionInfo(&nv_screen->option_cache, - __driConfigOptions, __driNConfigOptions); - - if ((ret = nouveau_device_open_existing(&nv_screen->device, 0, - driScrnPriv->fd, 0))) { - NOUVEAU_ERR("Failed opening nouveau device: %d\n", ret); - return GL_FALSE; - } - - nv_screen->front_offset = nv_dri->front_offset; - nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); - nv_screen->front_cpp = nv_dri->bpp / 8; - nv_screen->front_height = nv_dri->height; - - return GL_TRUE; -} - -static void -nouveau_screen_destroy(__DRIscreenPrivate *driScrnPriv) -{ - struct nouveau_screen *nv_screen = driScrnPriv->private; - - driScrnPriv->private = NULL; - FREE(nv_screen); -} - -static GLboolean -nouveau_create_buffer(__DRIscreenPrivate * driScrnPriv, - __DRIdrawablePrivate * driDrawPriv, - const __GLcontextModes *glVis, GLboolean pixmapBuffer) -{ - struct nouveau_framebuffer *nvfb; - enum pipe_format colour, depth, stencil; - - if (pixmapBuffer) - return GL_FALSE; - - nvfb = CALLOC_STRUCT(nouveau_framebuffer); - if (!nvfb) - return GL_FALSE; - - if (glVis->redBits == 5) - colour = PIPE_FORMAT_R5G6B5_UNORM; - else - colour = PIPE_FORMAT_A8R8G8B8_UNORM; - - if (glVis->depthBits == 16) - depth = PIPE_FORMAT_Z16_UNORM; - else if (glVis->depthBits == 24) - depth = PIPE_FORMAT_Z24S8_UNORM; - else - depth = PIPE_FORMAT_NONE; - - if (glVis->stencilBits == 8) - stencil = PIPE_FORMAT_Z24S8_UNORM; - else - stencil = PIPE_FORMAT_NONE; - - nvfb->stfb = st_create_framebuffer(glVis, colour, depth, stencil, - driDrawPriv->w, driDrawPriv->h, - (void*)nvfb); - if (!nvfb->stfb) { - free(nvfb); - return GL_FALSE; - } - - driDrawPriv->driverPrivate = (void *)nvfb; - return GL_TRUE; -} - -static void -nouveau_destroy_buffer(__DRIdrawablePrivate * driDrawPriv) -{ - struct nouveau_framebuffer *nvfb; - - nvfb = (struct nouveau_framebuffer *)driDrawPriv->driverPrivate; - st_unreference_framebuffer(&nvfb->stfb); - free(nvfb); -} - -static struct __DriverAPIRec -nouveau_api = { - .InitDriver = nouveau_screen_create, - .DestroyScreen = nouveau_screen_destroy, - .CreateContext = nouveau_context_create, - .DestroyContext = nouveau_context_destroy, - .CreateBuffer = nouveau_create_buffer, - .DestroyBuffer = nouveau_destroy_buffer, - .SwapBuffers = nouveau_swap_buffers, - .MakeCurrent = nouveau_context_bind, - .UnbindContext = nouveau_context_unbind, - .GetSwapInfo = NULL, - .GetMSC = NULL, - .WaitForMSC = NULL, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = nouveau_copy_sub_buffer, - .setTexOffset = NULL -}; - -static __GLcontextModes * -nouveau_fill_in_modes(unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer) -{ - __GLcontextModes * modes; - __GLcontextModes * m; - unsigned num_modes; - unsigned depth_buffer_factor; - unsigned back_buffer_factor; - int i; - - static const struct { - GLenum format; - GLenum type; - } fb_format_array[] = { - { GL_RGB , GL_UNSIGNED_SHORT_5_6_5 }, - { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV }, - { GL_BGR , GL_UNSIGNED_INT_8_8_8_8_REV }, - }; - - /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't - * support pageflipping at all. - */ - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML - }; - - u_int8_t depth_bits_array[4] = { 0, 16, 24, 24 }; - u_int8_t stencil_bits_array[4] = { 0, 0, 0, 8 }; - - depth_buffer_factor = 4; - back_buffer_factor = (have_back_buffer) ? 3 : 1; - - num_modes = ((pixel_bits==16) ? 1 : 2) * - depth_buffer_factor * back_buffer_factor * 4; - modes = (*dri_interface->createContextModes)(num_modes, - sizeof(__GLcontextModes)); - m = modes; - - for (i=((pixel_bits==16)?0:1);i<((pixel_bits==16)?1:3);i++) { - if (!driFillInModes(&m, fb_format_array[i].format, - fb_format_array[i].type, - depth_bits_array, - stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, - back_buffer_factor, - GLX_TRUE_COLOR)) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - - if (!driFillInModes(&m, fb_format_array[i].format, - fb_format_array[i].type, - depth_bits_array, - stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, - back_buffer_factor, - GLX_DIRECT_COLOR)) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - } - - return modes; -} -PUBLIC void * -__driCreateNewScreen_20050727(__DRInativeDisplay *dpy, int scrn, - __DRIscreen *psc, const __GLcontextModes * modes, - const __DRIversion * ddx_version, - const __DRIversion * dri_version, - const __DRIversion * drm_version, - const __DRIframebuffer * frame_buffer, - void * pSAREA, int fd, int internal_api_version, - const __DRIinterfaceMethods * interface, - __GLcontextModes ** driver_modes) -{ - __DRIscreenPrivate *psp; - static const __DRIversion ddx_expected = - { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; - static const __DRIversion dri_expected = { 4, 0, 0 }; - static const __DRIversion drm_expected = - { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; - struct nouveau_dri *nv_dri = NULL; - - dri_interface = interface; - - if (!driCheckDriDdxDrmVersions2("nouveau", - dri_version, &dri_expected, - ddx_version, &ddx_expected, - drm_version, &drm_expected)) { - return NULL; - } - - if (drm_expected.patch != drm_version->patch) { - fprintf(stderr, "Incompatible DRM patch level.\n" - "Expected: %d\n" "Current : %d\n", - drm_expected.patch, drm_version->patch); - return NULL; - } - - psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, - ddx_version, dri_version, drm_version, - frame_buffer, pSAREA, fd, - internal_api_version, - &nouveau_api); - if (psp == NULL) - return NULL; - nv_dri = psp->pDevPriv; - - *driver_modes = nouveau_fill_in_modes(nv_dri->bpp, - (nv_dri->bpp == 16) ? 16 : 24, - (nv_dri->bpp == 16) ? 0 : 8, - 1); - - driInitExtensions(NULL, card_extensions, GL_FALSE); - - return (void *)psp; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_screen.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_screen.h deleted file mode 100644 index 019823bd44d..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_screen.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef __NOUVEAU_SCREEN_H__ -#define __NOUVEAU_SCREEN_H__ - -#include "xmlconfig.h" -#include "nouveau_device.h" - -struct nouveau_screen { - __DRIscreenPrivate *driScrnPriv; - driOptionCache option_cache; - - struct nouveau_device *device; - - uint32_t front_offset; - uint32_t front_pitch; - uint32_t front_cpp; - uint32_t front_height; -}; - -#endif diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_swapbuffers.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_swapbuffers.c deleted file mode 100644 index 91bf243f424..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_swapbuffers.c +++ /dev/null @@ -1,86 +0,0 @@ -#include "main/glheader.h" -#include "glapi/glthread.h" -#include <GL/internal/glcore.h> - -#include "pipe/p_context.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_context.h" -#include "state_tracker/st_cb_fbo.h" - -#include "nouveau_context.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" - -void -nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, - const drm_clip_rect_t *rect) -{ - struct nouveau_context *nv = dPriv->driContextPriv->driverPrivate; - drm_clip_rect_t *pbox; - int nbox, i; - - LOCK_HARDWARE(nv); - if (!dPriv->numClipRects) { - UNLOCK_HARDWARE(nv); - return; - } - pbox = dPriv->pClipRects; - nbox = dPriv->numClipRects; - - nv->surface_copy_prep(nv, nv->frontbuffer, surf); - for (i = 0; i < nbox; i++, pbox++) { - int sx, sy, dx, dy, w, h; - - sx = pbox->x1 - dPriv->x; - sy = pbox->y1 - dPriv->y; - dx = pbox->x1; - dy = pbox->y1; - w = pbox->x2 - pbox->x1; - h = pbox->y2 - pbox->y1; - - nv->surface_copy(nv, dx, dy, sx, sy, w, h); - } - - FIRE_RING(); - UNLOCK_HARDWARE(nv); - - if (nv->last_stamp != dPriv->lastStamp) { - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - st_resize_framebuffer(nvfb->stfb, dPriv->w, dPriv->h); - nv->last_stamp = dPriv->lastStamp; - } -} - -void -nouveau_copy_sub_buffer(__DRIdrawablePrivate *dPriv, int x, int y, int w, int h) -{ - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - struct pipe_surface *surf; - - surf = st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT); - if (surf) { - drm_clip_rect_t rect; - rect.x1 = x; - rect.y1 = y; - rect.x2 = x + w; - rect.y2 = y + h; - - st_notify_swapbuffers(nvfb->stfb); - nouveau_copy_buffer(dPriv, surf, &rect); - } -} - -void -nouveau_swap_buffers(__DRIdrawablePrivate *dPriv) -{ - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - struct pipe_surface *surf; - - surf = st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT); - if (surf) { - st_notify_swapbuffers(nvfb->stfb); - nouveau_copy_buffer(dPriv, surf, NULL); - } -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_swapbuffers.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_swapbuffers.h deleted file mode 100644 index 825d3da6da5..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_swapbuffers.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __NOUVEAU_SWAPBUFFERS_H__ -#define __NOUVEAU_SWAPBUFFERS_H__ - -extern void nouveau_copy_buffer(__DRIdrawablePrivate *, struct pipe_surface *, - const drm_clip_rect_t *); -extern void nouveau_copy_sub_buffer(__DRIdrawablePrivate *, - int x, int y, int w, int h); -extern void nouveau_swap_buffers(__DRIdrawablePrivate *); - -#endif diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys.c deleted file mode 100644 index 1494bd48dd4..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys.c +++ /dev/null @@ -1,124 +0,0 @@ -#include "pipe/p_util.h" - -#include "nouveau_context.h" -#include "nouveau_winsys_pipe.h" - -#include "pipe/nouveau/nouveau_winsys.h" - -static int -nouveau_pipe_notifier_alloc(struct nouveau_winsys *nvws, int count, - struct nouveau_notifier **notify) -{ - struct nouveau_context *nv = nvws->nv; - - return nouveau_notifier_alloc(nv->channel, nv->next_handle++, - count, notify); -} - -static int -nouveau_pipe_grobj_alloc(struct nouveau_winsys *nvws, int grclass, - struct nouveau_grobj **grobj) -{ - struct nouveau_context *nv = nvws->nv; - int ret; - - ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, - grclass, grobj); - if (ret) - return ret; - - (*grobj)->subc = nv->next_subchannel++; - assert((*grobj)->subc <= 7); - BEGIN_RING_GR(*grobj, 0x0000, 1); - OUT_RING ((*grobj)->handle); - return 0; -} - -static int -nouveau_pipe_surface_copy(struct nouveau_winsys *nvws, struct pipe_surface *dst, - unsigned dx, unsigned dy, struct pipe_surface *src, - unsigned sx, unsigned sy, unsigned w, unsigned h) -{ - struct nouveau_context *nv = nvws->nv; - - if (nv->surface_copy_prep(nv, dst, src)) - return 1; - nv->surface_copy(nv, dx, dy, sx, sy, w, h); - nv->surface_copy_done(nv); - - return 0; -} - -static int -nouveau_pipe_surface_fill(struct nouveau_winsys *nvws, struct pipe_surface *dst, - unsigned dx, unsigned dy, unsigned w, unsigned h, - unsigned value) -{ - if (nvws->nv->surface_fill(nvws->nv, dst, dx, dy, w, h, value)) - return 1; - return 0; -} - -int -nouveau_pipe_emit_reloc(struct nouveau_channel *chan, void *ptr, - struct pipe_buffer *buf, uint32_t data, - uint32_t flags, uint32_t vor, uint32_t tor) -{ - return nouveau_pushbuf_emit_reloc(chan, ptr, nouveau_buffer(buf)->bo, - data, flags, vor, tor); -} - -struct pipe_context * -nouveau_pipe_create(struct nouveau_context *nv) -{ - struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); - struct pipe_context *(*hw_create)(struct pipe_winsys *, - struct nouveau_winsys *, - unsigned); - - if (!nvws) - return NULL; - - switch (nv->chipset & 0xf0) { - case 0x30: - hw_create = nv30_create; - break; - case 0x40: - case 0x60: - hw_create = nv40_create; - break; - case 0x50: - case 0x80: - hw_create = nv50_create; - break; - default: - NOUVEAU_ERR("Unknown chipset NV%02x\n", (int)nv->chipset); - return NULL; - } - - nvws->nv = nv; - nvws->channel = nv->channel; - - nvws->res_init = nouveau_resource_init; - nvws->res_alloc = nouveau_resource_alloc; - nvws->res_free = nouveau_resource_free; - - nvws->push_reloc = nouveau_pipe_emit_reloc; - nvws->push_flush = nouveau_pushbuf_flush; - - nvws->grobj_alloc = nouveau_pipe_grobj_alloc; - nvws->grobj_free = nouveau_grobj_free; - - nvws->notifier_alloc = nouveau_pipe_notifier_alloc; - nvws->notifier_free = nouveau_notifier_free; - nvws->notifier_reset = nouveau_notifier_reset; - nvws->notifier_status = nouveau_notifier_status; - nvws->notifier_retval = nouveau_notifier_return_val; - nvws->notifier_wait = nouveau_notifier_wait_status; - - nvws->surface_copy = nouveau_pipe_surface_copy; - nvws->surface_fill = nouveau_pipe_surface_fill; - - return hw_create(nouveau_create_pipe_winsys(nv), nvws, nv->chipset); -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_pipe.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_pipe.c deleted file mode 100644 index e1a9271395b..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_pipe.c +++ /dev/null @@ -1,196 +0,0 @@ -#include "pipe/p_winsys.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" - -#include "nouveau_context.h" -#include "nouveau_device.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" -#include "nouveau_winsys_pipe.h" - -static void -nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, - void *context_private) -{ - struct nouveau_context *nv = context_private; - __DRIdrawablePrivate *dPriv = nv->dri_drawable; - - nouveau_copy_buffer(dPriv, surf, NULL); -} - -static void -nouveau_printf(struct pipe_winsys *pws, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); -} - -static const char * -nouveau_get_name(struct pipe_winsys *pws) -{ - return "Nouveau/DRI"; -} - -static struct pipe_surface * -nouveau_surface_alloc(struct pipe_winsys *ws) -{ - struct pipe_surface *surf; - - surf = CALLOC_STRUCT(pipe_surface); - if (!surf) - return NULL; - - surf->refcount = 1; - surf->winsys = ws; - return surf; -} - -static int -nouveau_surface_alloc_storage(struct pipe_winsys *ws, struct pipe_surface *surf, - unsigned width, unsigned height, - enum pipe_format format, unsigned flags) -{ - unsigned pitch = ((width * pf_get_size(format)) + 63) & ~63; - - surf->format = format; - surf->width = width; - surf->height = height; - surf->cpp = pf_get_size(format); - surf->pitch = pitch / surf->cpp; - - surf->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, - pitch * height); - if (!surf->buffer) - return 1; - - return 0; -} - -static void -nouveau_surface_release(struct pipe_winsys *ws, struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - - *s = NULL; - if (--surf->refcount <= 0) { - if (surf->buffer) - pipe_buffer_reference(ws, &surf->buffer, NULL); - free(surf); - } -} - -static struct pipe_buffer * -nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, - unsigned usage, unsigned size) -{ - struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; - struct nouveau_device *dev = nvpws->nv->nv_screen->device; - struct nouveau_pipe_buffer *nvbuf; - uint32_t flags = 0; - - nvbuf = calloc(1, sizeof(*nvbuf)); - if (!nvbuf) - return NULL; - nvbuf->base.refcount = 1; - nvbuf->base.alignment = alignment; - nvbuf->base.usage = usage; - nvbuf->base.size = size; - - flags = NOUVEAU_BO_LOCAL; - if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { - free(nvbuf); - return NULL; - } - - return &nvbuf->base; -} - -static struct pipe_buffer * -nouveau_pipe_bo_user_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) -{ - struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; - struct nouveau_device *dev = nvpws->nv->nv_screen->device; - struct nouveau_pipe_buffer *nvbuf; - - nvbuf = calloc(1, sizeof(*nvbuf)); - if (!nvbuf) - return NULL; - nvbuf->base.refcount = 1; - nvbuf->base.size = bytes; - - if (nouveau_bo_user(dev, ptr, bytes, &nvbuf->bo)) { - free(nvbuf); - return NULL; - } - - return &nvbuf->base; -} - -static void -nouveau_pipe_bo_del(struct pipe_winsys *ws, struct pipe_buffer *buf) -{ - struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); - - nouveau_bo_del(&nvbuf->bo); - free(nvbuf); -} - -static void * -nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf, - unsigned flags) -{ - struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); - uint32_t map_flags = 0; - - if (flags & PIPE_BUFFER_USAGE_CPU_READ) - map_flags |= NOUVEAU_BO_RD; - if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) - map_flags |= NOUVEAU_BO_WR; - - if (nouveau_bo_map(nvbuf->bo, map_flags)) - return NULL; - return nvbuf->bo->map; -} - -static void -nouveau_pipe_bo_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) -{ - struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); - - nouveau_bo_unmap(nvbuf->bo); -} - -struct pipe_winsys * -nouveau_create_pipe_winsys(struct nouveau_context *nv) -{ - struct nouveau_pipe_winsys *nvpws; - struct pipe_winsys *pws; - - nvpws = CALLOC_STRUCT(nouveau_pipe_winsys); - if (!nvpws) - return NULL; - nvpws->nv = nv; - pws = &nvpws->pws; - - pws->flush_frontbuffer = nouveau_flush_frontbuffer; - pws->printf = nouveau_printf; - - pws->surface_alloc = nouveau_surface_alloc; - pws->surface_alloc_storage = nouveau_surface_alloc_storage; - pws->surface_release = nouveau_surface_release; - - pws->buffer_create = nouveau_pipe_bo_create; - pws->buffer_destroy = nouveau_pipe_bo_del; - pws->user_buffer_create = nouveau_pipe_bo_user_create; - pws->buffer_map = nouveau_pipe_bo_map; - pws->buffer_unmap = nouveau_pipe_bo_unmap; - - pws->get_name = nouveau_get_name; - - return &nvpws->pws; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_pipe.h b/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_pipe.h deleted file mode 100644 index 6a03ac0d773..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_pipe.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef NOUVEAU_PIPE_WINSYS_H -#define NOUVEAU_PIPE_WINSYS_H - -#include "pipe/p_context.h" -#include "pipe/p_winsys.h" -#include "nouveau_context.h" - -struct nouveau_pipe_buffer { - struct pipe_buffer base; - struct nouveau_bo *bo; -}; - -static inline struct nouveau_pipe_buffer * -nouveau_buffer(struct pipe_buffer *buf) -{ - return (struct nouveau_pipe_buffer *)buf; -} - -struct nouveau_pipe_winsys { - struct pipe_winsys pws; - - struct nouveau_context *nv; -}; - -extern struct pipe_winsys * -nouveau_create_pipe_winsys(struct nouveau_context *nv); - -struct pipe_context * -nouveau_create_softpipe(struct nouveau_context *nv); - -struct pipe_context * -nouveau_pipe_create(struct nouveau_context *nv); - -#endif diff --git a/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_softpipe.c b/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_softpipe.c deleted file mode 100644 index 3908c175088..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nouveau_winsys_softpipe.c +++ /dev/null @@ -1,83 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> - */ - -#include "imports.h" - -#include "pipe/p_defines.h" -#include "pipe/p_format.h" -#include "pipe/softpipe/sp_winsys.h" - -#include "nouveau_context.h" -#include "nouveau_winsys_pipe.h" - -struct nouveau_softpipe_winsys { - struct softpipe_winsys sws; - struct nouveau_context *nv; -}; - -/** - * Return list of surface formats supported by this driver. - */ -static boolean -nouveau_is_format_supported(struct softpipe_winsys *sws, uint format) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - }; - - return FALSE; -} - - - -struct pipe_context * -nouveau_create_softpipe(struct nouveau_context *nv) -{ - struct nouveau_softpipe_winsys *nvsws; - - nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); - - /* Fill in this struct with callbacks that softpipe will need to - * communicate with the window system, buffer manager, etc. - */ - nvsws->sws.is_format_supported = nouveau_is_format_supported; - nvsws->nv = nv; - - /* Create the softpipe context: - */ - return softpipe_create(nouveau_create_pipe_winsys(nv), &nvsws->sws); -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nv04_surface.c b/src/mesa/drivers/dri/nouveau_winsys/nv04_surface.c deleted file mode 100644 index fe1ea4ed70f..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nv04_surface.c +++ /dev/null @@ -1,226 +0,0 @@ -#include "pipe/p_context.h" - -#include "nouveau_context.h" - -static INLINE int -nv04_surface_format(int cpp) -{ - switch (cpp) { - case 1: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; - case 2: return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; - case 4: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; - default: - return -1; - } -} - -static INLINE int -nv04_rect_format(int cpp) -{ - switch (cpp) { - case 1: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - case 2: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; - case 4: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - default: - return -1; - } -} - -static void -nv04_surface_copy_m2mf(struct nouveau_context *nv, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h) -{ - struct pipe_surface *dst = nv->surf_dst; - struct pipe_surface *src = nv->surf_src; - unsigned dst_offset, src_offset; - - dst_offset = dst->offset + (dy * dst->pitch + dx) * dst->cpp; - src_offset = src->offset + (sy * src->pitch + sx) * src->cpp; - - while (h) { - int count = (h > 2047) ? 2047 : h; - - BEGIN_RING(NvM2MF, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); - OUT_RELOCl(src->buffer, src_offset, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(dst->buffer, dst_offset, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (src->pitch * src->cpp); - OUT_RING (dst->pitch * dst->cpp); - OUT_RING (w * src->cpp); - OUT_RING (count); - OUT_RING (0x0101); - OUT_RING (0); - - h -= count; - src_offset += src->pitch * src->cpp * count; - dst_offset += dst->pitch * dst->cpp * count; - } -} - -static void -nv04_surface_copy_blit(struct nouveau_context *nv, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h) -{ - BEGIN_RING(NvImageBlit, 0x0300, 3); - OUT_RING ((sy << 16) | sx); - OUT_RING ((dy << 16) | dx); - OUT_RING (( h << 16) | w); -} - -static int -nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, - struct pipe_surface *src) -{ - int format; - - if (src->cpp != dst->cpp) - return 1; - - /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback - * to NV_MEMORY_TO_MEMORY_FORMAT in this case. - */ - if ((src->offset & 63) || (dst->offset & 63)) { - BEGIN_RING(NvM2MF, - NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); - OUT_RELOCo(src->buffer, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | - NOUVEAU_BO_WR); - - nv->surface_copy = nv04_surface_copy_m2mf; - nv->surf_dst = dst; - nv->surf_src = src; - return 0; - - } - - if ((format = nv04_surface_format(dst->cpp)) < 0) { - NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); - return 1; - } - nv->surface_copy = nv04_surface_copy_blit; - - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(src->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (format); - OUT_RING (((dst->pitch * dst->cpp) << 16) | (src->pitch * src->cpp)); - OUT_RELOCl(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - return 0; -} - -static void -nv04_surface_copy_done(struct nouveau_context *nv) -{ - FIRE_RING(); -} - -static int -nv04_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, - unsigned dx, unsigned dy, unsigned w, unsigned h, - unsigned value) -{ - int cs2d_format, gdirect_format; - - if ((cs2d_format = nv04_surface_format(dst->cpp)) < 0) { - NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); - return 1; - } - - if ((gdirect_format = nv04_rect_format(dst->cpp)) < 0) { - NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); - return 1; - } - - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (cs2d_format); - OUT_RING (((dst->pitch * dst->cpp) << 16) | (dst->pitch * dst->cpp)); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); - OUT_RING (gdirect_format); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); - OUT_RING (value); - BEGIN_RING(NvGdiRect, - NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); - OUT_RING ((dx << 16) | dy); - OUT_RING (( w << 16) | h); - - FIRE_RING(); - return 0; -} - -int -nouveau_surface_init_nv04(struct nouveau_context *nv) -{ - unsigned class; - int ret; - - if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, 0x39, - &nv->NvM2MF))) { - NOUVEAU_ERR("Error creating m2mf object: %d\n", ret); - return 1; - } - BIND_RING (NvM2MF, nv->next_subchannel++); - BEGIN_RING(NvM2MF, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); - OUT_RING (nv->sync_notifier->handle); - - class = nv->chipset < 0x10 ? NV04_CONTEXT_SURFACES_2D : - NV10_CONTEXT_SURFACES_2D; - if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, - &nv->NvCtxSurf2D))) { - NOUVEAU_ERR("Error creating 2D surface object: %d\n", ret); - return 1; - } - BIND_RING (NvCtxSurf2D, nv->next_subchannel++); - BEGIN_RING(NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RING (nv->channel->vram->handle); - OUT_RING (nv->channel->vram->handle); - - class = nv->chipset < 0x10 ? NV04_IMAGE_BLIT : - NV12_IMAGE_BLIT; - if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, - &nv->NvImageBlit))) { - NOUVEAU_ERR("Error creating blit object: %d\n", ret); - return 1; - } - BIND_RING (NvImageBlit, nv->next_subchannel++); - BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); - OUT_RING (nv->sync_notifier->handle); - BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_SURFACE, 1); - OUT_RING (nv->NvCtxSurf2D->handle); - BEGIN_RING(NvImageBlit, NV04_IMAGE_BLIT_OPERATION, 1); - OUT_RING (NV04_IMAGE_BLIT_OPERATION_SRCCOPY); - - class = NV04_GDI_RECTANGLE_TEXT; - if ((ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, class, - &nv->NvGdiRect))) { - NOUVEAU_ERR("Error creating rect object: %d\n", ret); - return 1; - } - BIND_RING (NvGdiRect, nv->next_subchannel++); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); - OUT_RING (nv->sync_notifier->handle); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); - OUT_RING (nv->NvCtxSurf2D->handle); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); - OUT_RING (NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); - BEGIN_RING(NvGdiRect, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); - OUT_RING (NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); - - nv->surface_copy_prep = nv04_surface_copy_prep; - nv->surface_copy = nv04_surface_copy_blit; - nv->surface_copy_done = nv04_surface_copy_done; - nv->surface_fill = nv04_surface_fill; - return 0; -} - diff --git a/src/mesa/drivers/dri/nouveau_winsys/nv50_surface.c b/src/mesa/drivers/dri/nouveau_winsys/nv50_surface.c deleted file mode 100644 index 15a10028613..00000000000 --- a/src/mesa/drivers/dri/nouveau_winsys/nv50_surface.c +++ /dev/null @@ -1,160 +0,0 @@ -#include "pipe/p_context.h" - -#include "nouveau_context.h" - -static INLINE int -nv50_format(int cpp) -{ - switch (cpp) { - case 4: return NV50_2D_DST_FORMAT_32BPP; - case 3: return NV50_2D_DST_FORMAT_24BPP; - case 2: return NV50_2D_DST_FORMAT_16BPP; - case 1: return NV50_2D_DST_FORMAT_8BPP; - default: - return -1; - } -} - -static int -nv50_surface_copy_prep(struct nouveau_context *nv, - struct pipe_surface *dst, struct pipe_surface *src) -{ - int surf_format; - - assert(src->cpp == dst->cpp); - - surf_format = nv50_format(dst->cpp); - assert(surf_format >= 0); - - BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY0, 2); - OUT_RELOCo(src->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(Nv2D, NV50_2D_DST_FORMAT, 2); - OUT_RING (surf_format); - OUT_RING (1); - BEGIN_RING(Nv2D, NV50_2D_DST_PITCH, 5); - OUT_RING (dst->pitch * dst->cpp); - OUT_RING (dst->pitch); - OUT_RING (dst->height); - OUT_RELOCh(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(Nv2D, NV50_2D_CLIP_X, 4); - OUT_RING (0); - OUT_RING (0); - OUT_RING (dst->pitch); - OUT_RING (dst->height); - - BEGIN_RING(Nv2D, NV50_2D_SRC_FORMAT, 2); - OUT_RING (surf_format); - OUT_RING (1); - BEGIN_RING(Nv2D, NV50_2D_SRC_PITCH, 5); - OUT_RING (src->pitch * src->cpp); - OUT_RING (src->pitch); - OUT_RING (src->height); - OUT_RELOCh(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(src->buffer, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - - return 0; -} - -static void -nv50_surface_copy(struct nouveau_context *nv, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h) -{ - BEGIN_RING(Nv2D, 0x0110, 1); - OUT_RING (0); - BEGIN_RING(Nv2D, NV50_2D_BLIT_DST_X, 12); - OUT_RING (dx); - OUT_RING (dy); - OUT_RING (w); - OUT_RING (h); - OUT_RING (0); - OUT_RING (1); - OUT_RING (0); - OUT_RING (1); - OUT_RING (0); - OUT_RING (sx); - OUT_RING (0); - OUT_RING (sy); -} - -static void -nv50_surface_copy_done(struct nouveau_context *nv) -{ - FIRE_RING(); -} - -static int -nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, - unsigned dx, unsigned dy, unsigned w, unsigned h, - unsigned value) -{ - int surf_format, rect_format; - - surf_format = nv50_format(dst->cpp); - if (surf_format < 0) - return 1; - - rect_format = nv50_format(dst->cpp); - if (rect_format < 0) - return 1; - - BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY1, 1); - OUT_RELOCo(dst->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(Nv2D, NV50_2D_DST_FORMAT, 2); - OUT_RING (surf_format); - OUT_RING (1); - BEGIN_RING(Nv2D, NV50_2D_DST_PITCH, 5); - OUT_RING (dst->pitch * dst->cpp); - OUT_RING (dst->pitch); - OUT_RING (dst->height); - OUT_RELOCh(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(dst->buffer, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(Nv2D, NV50_2D_CLIP_X, 4); - OUT_RING (0); - OUT_RING (0); - OUT_RING (dst->pitch); - OUT_RING (dst->height); - - BEGIN_RING(Nv2D, 0x0580, 3); - OUT_RING (4); - OUT_RING (rect_format); - OUT_RING (value); - - BEGIN_RING(Nv2D, NV50_2D_RECT_X1, 4); - OUT_RING (dx); - OUT_RING (dy); - OUT_RING (dx + w); - OUT_RING (dy + h); - - FIRE_RING(); - - return 0; -} - -int -nouveau_surface_init_nv50(struct nouveau_context *nv) -{ - int ret; - - ret = nouveau_grobj_alloc(nv->channel, nv->next_handle++, NV50_2D, - &nv->Nv2D); - if (ret) - return ret; - BIND_RING (Nv2D, 0); - BEGIN_RING(Nv2D, NV50_2D_DMA_NOTIFY, 1); - OUT_RING (nv->sync_notifier->handle); - BEGIN_RING(Nv2D, NV50_2D_DMA_IN_MEMORY0, 2); - OUT_RING (nv->channel->vram->handle); - OUT_RING (nv->channel->vram->handle); - BEGIN_RING(Nv2D, NV50_2D_OPERATION, 1); - OUT_RING (NV50_2D_OPERATION_SRCCOPY); - - nv->surface_copy_prep = nv50_surface_copy_prep; - nv->surface_copy = nv50_surface_copy; - nv->surface_copy_done = nv50_surface_copy_done; - nv->surface_fill = nv50_surface_fill; - return 0; -} - diff --git a/src/mesa/pipe/nouveau/nouveau_bo.h b/src/mesa/pipe/nouveau/nouveau_bo.h deleted file mode 100644 index 18020e9c652..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_bo.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_BO_H__ -#define __NOUVEAU_BO_H__ - -/* Relocation/Buffer type flags */ -#define NOUVEAU_BO_VRAM (1 << 0) -#define NOUVEAU_BO_GART (1 << 1) -#define NOUVEAU_BO_RD (1 << 2) -#define NOUVEAU_BO_WR (1 << 3) -#define NOUVEAU_BO_RDWR (NOUVEAU_BO_RD | NOUVEAU_BO_WR) -#define NOUVEAU_BO_MAP (1 << 4) -#define NOUVEAU_BO_PIN (1 << 5) -#define NOUVEAU_BO_LOW (1 << 6) -#define NOUVEAU_BO_HIGH (1 << 7) -#define NOUVEAU_BO_OR (1 << 8) -#define NOUVEAU_BO_LOCAL (1 << 9) -#define NOUVEAU_BO_DUMMY (1 << 31) - -struct nouveau_bo { - struct nouveau_device *device; - uint64_t handle; - - uint64_t size; - void *map; - - uint32_t flags; - uint64_t offset; -}; - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_channel.h b/src/mesa/pipe/nouveau/nouveau_channel.h deleted file mode 100644 index b99de9add86..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_channel.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_CHANNEL_H__ -#define __NOUVEAU_CHANNEL_H__ - -struct nouveau_channel { - struct nouveau_device *device; - int id; - - struct nouveau_pushbuf *pushbuf; - - struct nouveau_grobj *vram; - struct nouveau_grobj *gart; - - void *user_private; - void (*hang_notify)(struct nouveau_channel *); -}; - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_class.h b/src/mesa/pipe/nouveau/nouveau_class.h deleted file mode 100644 index 5998945677b..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_class.h +++ /dev/null @@ -1,6134 +0,0 @@ -/************************************************************************* - - Autogenerated file, do not edit ! - -************************************************************************** - - Copyright (C) 2006-2007 : - Dmitry Baryshkov, - Laurent Carlier, - Matthieu Castet, - Dawid Gajownik, - Jeremy Kolb, - Stephane Loeuillet, - Patrice Mandin, - Stephane Marchesin, - Serge Martin, - Sylvain Munaut, - Simon Raffeiner, - Ben Skeggs, - Erik Waling, - koala_br, - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -*************************************************************************/ - - -#ifndef NOUVEAU_REG_H -#define NOUVEAU_REG_H 1 - - -#define NV01_ROOT 0x00000001 - - - -#define NV01_CONTEXT_DMA 0x00000002 - - - -#define NV01_DEVICE 0x00000003 - - - -#define NV01_TIMER 0x00000004 - -#define NV01_TIMER_SYNCHRONIZE 0x00000100 -#define NV01_TIMER_STOP_ALARM 0x00000104 -#define NV01_TIMER_DMA_NOTIFY 0x00000180 -#define NV01_TIMER_TIME(x) (0x00000300+((x)*4)) -#define NV01_TIMER_TIME__SIZE 0x00000002 -#define NV01_TIMER_ALARM_NOTIFY 0x00000308 - - -#define NV_IMAGE_STENCIL 0x00000010 - -#define NV_IMAGE_STENCIL_NOTIFY 0x00000104 -#define NV_IMAGE_STENCIL_DMA_NOTIFY 0x00000180 -#define NV_IMAGE_STENCIL_IMAGE_OUTPUT 0x00000200 -#define NV_IMAGE_STENCIL_IMAGE_INPUT(x) (0x00000204+((x)*4)) -#define NV_IMAGE_STENCIL_IMAGE_INPUT__SIZE 0x00000002 - - -#define NV_IMAGE_BLEND_AND 0x00000011 - -#define NV_IMAGE_BLEND_AND_NOP 0x00000100 -#define NV_IMAGE_BLEND_AND_NOTIFY 0x00000104 -#define NV_IMAGE_BLEND_AND_DMA_NOTIFY 0x00000180 -#define NV_IMAGE_BLEND_AND_IMAGE_OUTPUT 0x00000200 -#define NV_IMAGE_BLEND_AND_BETA_INPUT 0x00000204 -#define NV_IMAGE_BLEND_AND_IMAGE_INPUT 0x00000208 - - -#define NV01_CONTEXT_BETA1 0x00000012 - -#define NV01_CONTEXT_BETA1_NOP 0x00000100 -#define NV01_CONTEXT_BETA1_NOTIFY 0x00000104 -#define NV01_CONTEXT_BETA1_DMA_NOTIFY 0x00000180 -#define NV01_CONTEXT_BETA1_BETA_1D31 0x00000300 - - -#define NV_IMAGE_ROP_AND 0x00000013 - -#define NV_IMAGE_ROP_AND_NOTIFY 0x00000104 -#define NV_IMAGE_ROP_AND_DMA_NOTIFY 0x00000180 -#define NV_IMAGE_ROP_AND_IMAGE_OUTPUT 0x00000200 -#define NV_IMAGE_ROP_AND_ROP_INPUT 0x00000204 -#define NV_IMAGE_ROP_AND_IMAGE_INPUT(x) (0x00000208+((x)*4)) -#define NV_IMAGE_ROP_AND_IMAGE_INPUT__SIZE 0x00000002 - - -#define NV_IMAGE_COLOR_KEY 0x00000015 - - - -#define NV01_CONTEXT_COLOR_KEY 0x00000017 - -#define NV01_CONTEXT_COLOR_KEY_NOP 0x00000100 -#define NV01_CONTEXT_COLOR_KEY_NOTIFY 0x00000104 -#define NV01_CONTEXT_COLOR_KEY_DMA_NOTIFY 0x00000180 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT 0x00000300 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A8Y8 0x00000001 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X24Y8 0x00000002 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A1R5G5B5 0x00000003 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X17R5G5B5 0x00000004 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A8R8G8B8 0x00000005 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X8R8G8B8 0x00000006 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A16Y16 0x00000007 -#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16Y16 0x00000008 -#define NV01_CONTEXT_COLOR_KEY_COLOR 0x00000304 - - -#define NV01_CONTEXT_PATTERN 0x00000018 - -#define NV01_CONTEXT_PATTERN_NOP 0x00000100 -#define NV01_CONTEXT_PATTERN_NOTIFY 0x00000104 -#define NV01_CONTEXT_PATTERN_DMA_NOTIFY 0x00000180 -#define NV01_CONTEXT_PATTERN_COLOR_FORMAT 0x00000300 -#define NV01_CONTEXT_PATTERN_MONOCHROME_FORMAT 0x00000304 -#define NV01_CONTEXT_PATTERN_SHAPE 0x00000308 -#define NV01_CONTEXT_PATTERN_COLOR(x) (0x00000310+((x)*4)) -#define NV01_CONTEXT_PATTERN_COLOR__SIZE 0x00000002 -#define NV01_CONTEXT_PATTERN_PATTERN(x) (0x00000318+((x)*4)) -#define NV01_CONTEXT_PATTERN_PATTERN__SIZE 0x00000002 - - -#define NV01_CONTEXT_CLIP_RECTANGLE 0x00000019 - -#define NV01_CONTEXT_CLIP_RECTANGLE_NOP 0x00000100 -#define NV01_CONTEXT_CLIP_RECTANGLE_NOTIFY 0x00000104 -#define NV01_CONTEXT_CLIP_RECTANGLE_DMA_NOTIFY 0x00000180 -#define NV01_CONTEXT_CLIP_RECTANGLE_POINT 0x00000300 -#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_SHIFT 0 -#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_MASK 0x0000ffff -#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_SHIFT 16 -#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_MASK 0xffff0000 -#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE 0x00000304 -#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_SHIFT 0 -#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_MASK 0x0000ffff -#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_SHIFT 16 -#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_MASK 0xffff0000 - - -#define NV01_RENDER_SOLID_LINE 0x0000001c - -#define NV01_RENDER_SOLID_LINE_NOP 0x00000100 -#define NV01_RENDER_SOLID_LINE_NOTIFY 0x00000104 -#define NV01_RENDER_SOLID_LINE_PATCH 0x0000010c -#define NV01_RENDER_SOLID_LINE_DMA_NOTIFY 0x00000180 -#define NV01_RENDER_SOLID_LINE_CLIP_RECTANGLE 0x00000184 -#define NV01_RENDER_SOLID_LINE_PATTERN 0x00000188 -#define NV01_RENDER_SOLID_LINE_ROP 0x0000018c -#define NV01_RENDER_SOLID_LINE_BETA1 0x00000190 -#define NV01_RENDER_SOLID_LINE_SURFACE 0x00000194 -#define NV01_RENDER_SOLID_LINE_OPERATION 0x000002fc -#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_AND 0x00000000 -#define NV01_RENDER_SOLID_LINE_OPERATION_ROP_AND 0x00000001 -#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_AND 0x00000002 -#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY 0x00000003 -#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_PREMULT 0x00000005 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT 0x00000300 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A8Y8 0x00000001 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X24Y8 0x00000002 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A1R5G5B5 0x00000003 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X17R5G5B5 0x00000004 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A8R8G8B8 0x00000005 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X8R8G8B8 0x00000006 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A16Y16 0x00000007 -#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16Y16 0x00000008 -#define NV01_RENDER_SOLID_LINE_COLOR 0x00000304 -#define NV01_RENDER_SOLID_LINE_LINE_POINT0(x) (0x00000400+((x)*8)) -#define NV01_RENDER_SOLID_LINE_LINE_POINT0__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_SHIFT 0 -#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_SHIFT 16 -#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_LINE_LINE_POINT1(x) (0x00000404+((x)*8)) -#define NV01_RENDER_SOLID_LINE_LINE_POINT1__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_SHIFT 0 -#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_SHIFT 16 -#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X(x) (0x00000480+((x)*16)) -#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y(x) (0x00000484+((x)*16)) -#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X(x) (0x00000488+((x)*16)) -#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y(x) (0x0000048c+((x)*16)) -#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_POLYLINE(x) (0x00000500+((x)*4)) -#define NV01_RENDER_SOLID_LINE_POLYLINE__SIZE 0x00000020 -#define NV01_RENDER_SOLID_LINE_POLYLINE_X_SHIFT 0 -#define NV01_RENDER_SOLID_LINE_POLYLINE_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_SHIFT 16 -#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X(x) (0x00000580+((x)*8)) -#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y(x) (0x00000584+((x)*8)) -#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR(x) (0x00000600+((x)*8)) -#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT(x) (0x00000604+((x)*8)) -#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__SIZE 0x00000010 -#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_SHIFT 0 -#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_SHIFT 16 -#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_MASK 0xffff0000 - - -#define NV01_RENDER_SOLID_TRIANGLE 0x0000001d - -#define NV01_RENDER_SOLID_TRIANGLE_NOP 0x00000100 -#define NV01_RENDER_SOLID_TRIANGLE_NOTIFY 0x00000104 -#define NV01_RENDER_SOLID_TRIANGLE_PATCH 0x0000010c -#define NV01_RENDER_SOLID_TRIANGLE_DMA_NOTIFY 0x00000180 -#define NV01_RENDER_SOLID_TRIANGLE_CLIP_RECTANGLE 0x00000184 -#define NV01_RENDER_SOLID_TRIANGLE_PATTERN 0x00000188 -#define NV01_RENDER_SOLID_TRIANGLE_ROP 0x0000018c -#define NV01_RENDER_SOLID_TRIANGLE_BETA1 0x00000190 -#define NV01_RENDER_SOLID_TRIANGLE_SURFACE 0x00000194 -#define NV01_RENDER_SOLID_TRIANGLE_OPERATION 0x000002fc -#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_AND 0x00000000 -#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_ROP_AND 0x00000001 -#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_AND 0x00000002 -#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY 0x00000003 -#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_PREMULT 0x00000005 -#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT 0x00000300 -#define NV01_RENDER_SOLID_TRIANGLE_COLOR 0x00000304 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0 0x00000310 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_SHIFT 0 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_SHIFT 16 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1 0x00000314 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_SHIFT 0 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_SHIFT 16 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2 0x00000318 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_SHIFT 0 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_SHIFT 16 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_X 0x00000320 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_Y 0x00000324 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_X 0x00000328 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_Y 0x0000032c -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_X 0x00000330 -#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_Y 0x00000334 -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH(x) (0x00000400+((x)*4)) -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__SIZE 0x00000020 -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_SHIFT 0 -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_SHIFT 16 -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X(x) (0x00000480+((x)*8)) -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__SIZE 0x00000010 -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y(x) (0x00000484+((x)*8)) -#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__SIZE 0x00000010 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR(x) (0x00000500+((x)*16)) -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__SIZE 0x00000008 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0(x) (0x00000504+((x)*16)) -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__SIZE 0x00000008 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_SHIFT 0 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_SHIFT 16 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1(x) (0x00000508+((x)*16)) -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__SIZE 0x00000008 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_SHIFT 0 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_SHIFT 16 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2(x) (0x0000050c+((x)*16)) -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__SIZE 0x00000008 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_SHIFT 0 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_SHIFT 16 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR(x) (0x00000580+((x)*8)) -#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__SIZE 0x00000010 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT(x) (0x00000584+((x)*8)) -#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__SIZE 0x00000010 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_SHIFT 0 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_SHIFT 16 -#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_MASK 0xffff0000 - - -#define NV01_RENDER_SOLID_RECTANGLE 0x0000001e - -#define NV01_RENDER_SOLID_RECTANGLE_NOP 0x00000100 -#define NV01_RENDER_SOLID_RECTANGLE_NOTIFY 0x00000104 -#define NV01_RENDER_SOLID_RECTANGLE_PATCH 0x0000010c -#define NV01_RENDER_SOLID_RECTANGLE_DMA_NOTIFY 0x00000180 -#define NV01_RENDER_SOLID_RECTANGLE_CLIP_RECTANGLE 0x00000184 -#define NV01_RENDER_SOLID_RECTANGLE_PATTERN 0x00000188 -#define NV01_RENDER_SOLID_RECTANGLE_ROP 0x0000018c -#define NV01_RENDER_SOLID_RECTANGLE_BETA1 0x00000190 -#define NV01_RENDER_SOLID_RECTANGLE_SURFACE 0x00000194 -#define NV01_RENDER_SOLID_RECTANGLE_OPERATION 0x000002fc -#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_AND 0x00000000 -#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_ROP_AND 0x00000001 -#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_AND 0x00000002 -#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY 0x00000003 -#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_PREMULT 0x00000005 -#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT 0x00000300 -#define NV01_RENDER_SOLID_RECTANGLE_COLOR 0x00000304 -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT(x) (0x00000400+((x)*8)) -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__SIZE 0x00000010 -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_SHIFT 0 -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_MASK 0x0000ffff -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_SHIFT 16 -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_MASK 0xffff0000 -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE(x) (0x00000404+((x)*8)) -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__SIZE 0x00000010 -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_SHIFT 0 -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_MASK 0x0000ffff -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_SHIFT 16 -#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_MASK 0xffff0000 - - -#define NV01_IMAGE_BLIT 0x0000001f - -#define NV01_IMAGE_BLIT_NOP 0x00000100 -#define NV01_IMAGE_BLIT_NOTIFY 0x00000104 -#define NV01_IMAGE_BLIT_PATCH 0x0000010c -#define NV01_IMAGE_BLIT_DMA_NOTIFY 0x00000180 -#define NV01_IMAGE_BLIT_COLOR_KEY 0x00000184 -#define NV01_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188 -#define NV01_IMAGE_BLIT_PATTERN 0x0000018c -#define NV01_IMAGE_BLIT_ROP 0x00000190 -#define NV01_IMAGE_BLIT_BETA1 0x00000194 -#define NV01_IMAGE_BLIT_SURFACE 0x0000019c -#define NV01_IMAGE_BLIT_OPERATION 0x000002fc -#define NV01_IMAGE_BLIT_IMAGE_INPUT 0x00000204 -#define NV01_IMAGE_BLIT_POINT_IN 0x00000300 -#define NV01_IMAGE_BLIT_POINT_IN_X_SHIFT 0 -#define NV01_IMAGE_BLIT_POINT_IN_X_MASK 0x0000ffff -#define NV01_IMAGE_BLIT_POINT_IN_Y_SHIFT 16 -#define NV01_IMAGE_BLIT_POINT_IN_Y_MASK 0xffff0000 -#define NV01_IMAGE_BLIT_POINT_OUT 0x00000304 -#define NV01_IMAGE_BLIT_POINT_OUT_X_SHIFT 0 -#define NV01_IMAGE_BLIT_POINT_OUT_X_MASK 0x0000ffff -#define NV01_IMAGE_BLIT_POINT_OUT_Y_SHIFT 16 -#define NV01_IMAGE_BLIT_POINT_OUT_Y_MASK 0xffff0000 -#define NV01_IMAGE_BLIT_SIZE 0x00000308 -#define NV01_IMAGE_BLIT_SIZE_W_SHIFT 0 -#define NV01_IMAGE_BLIT_SIZE_W_MASK 0x0000ffff -#define NV01_IMAGE_BLIT_SIZE_H_SHIFT 16 -#define NV01_IMAGE_BLIT_SIZE_H_MASK 0xffff0000 - - -#define NV01_IMAGE_FROM_CPU 0x00000021 - -#define NV01_IMAGE_FROM_CPU_NOP 0x00000100 -#define NV01_IMAGE_FROM_CPU_NOTIFY 0x00000104 -#define NV01_IMAGE_FROM_CPU_PATCH 0x0000010c -#define NV01_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 -#define NV01_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 -#define NV01_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x00000188 -#define NV01_IMAGE_FROM_CPU_PATTERN 0x0000018c -#define NV01_IMAGE_FROM_CPU_ROP 0x00000190 -#define NV01_IMAGE_FROM_CPU_BETA1 0x00000194 -#define NV01_IMAGE_FROM_CPU_SURFACE 0x00000198 -#define NV01_IMAGE_FROM_CPU_OPERATION 0x000002fc -#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_AND 0x00000000 -#define NV01_IMAGE_FROM_CPU_OPERATION_ROP_AND 0x00000001 -#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_AND 0x00000002 -#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY 0x00000003 -#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_PREMULT 0x00000005 -#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300 -#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_Y8 0x00000001 -#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A1R5G5B5 0x00000002 -#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X1R5G5B5 0x00000003 -#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A8R8G8B8 0x00000004 -#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X8R8G8B8 0x00000005 -#define NV01_IMAGE_FROM_CPU_POINT 0x00000304 -#define NV01_IMAGE_FROM_CPU_POINT_X_SHIFT 0 -#define NV01_IMAGE_FROM_CPU_POINT_X_MASK 0x0000ffff -#define NV01_IMAGE_FROM_CPU_POINT_Y_SHIFT 16 -#define NV01_IMAGE_FROM_CPU_POINT_Y_MASK 0xffff0000 -#define NV01_IMAGE_FROM_CPU_SIZE_OUT 0x00000308 -#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_SHIFT 0 -#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_MASK 0x0000ffff -#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_SHIFT 16 -#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_MASK 0xffff0000 -#define NV01_IMAGE_FROM_CPU_SIZE_IN 0x0000030c -#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0 -#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff -#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16 -#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000 -#define NV01_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) -#define NV01_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020 - - -#define NV01_NULL 0x00000030 - - - -#define NV03_STRETCHED_IMAGE_FROM_CPU 0x00000036 - -#define NV03_STRETCHED_IMAGE_FROM_CPU_NOP 0x00000100 -#define NV03_STRETCHED_IMAGE_FROM_CPU_NOTIFY 0x00000104 -#define NV03_STRETCHED_IMAGE_FROM_CPU_PATCH 0x0000010c -#define NV03_STRETCHED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 -#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 -#define NV03_STRETCHED_IMAGE_FROM_CPU_PATTERN 0x00000188 -#define NV03_STRETCHED_IMAGE_FROM_CPU_ROP 0x0000018c -#define NV03_STRETCHED_IMAGE_FROM_CPU_BETA1 0x00000190 -#define NV03_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000194 -#define NV03_STRETCHED_IMAGE_FROM_CPU_OPERATION 0x000002fc -#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300 -#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN 0x00000304 -#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0 -#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff -#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16 -#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000 -#define NV03_STRETCHED_IMAGE_FROM_CPU_DX_DU 0x00000308 -#define NV03_STRETCHED_IMAGE_FROM_CPU_DY_DV 0x0000030c -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT 0x00000310 -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_SHIFT 0 -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_MASK 0x0000ffff -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_SHIFT 16 -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_MASK 0xffff0000 -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE 0x00000314 -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_SHIFT 0 -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_MASK 0x0000ffff -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_SHIFT 16 -#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_MASK 0xffff0000 -#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4 0x00000318 -#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_SHIFT 0 -#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_MASK 0x0000ffff -#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_SHIFT 16 -#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_MASK 0xffff0000 -#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) -#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020 - - -#define NV03_SCALED_IMAGE_FROM_MEMORY 0x00000037 - -#define NV03_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100 -#define NV03_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104 -#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180 -#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184 -#define NV03_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188 -#define NV03_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c -#define NV03_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190 -#define NV03_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000194 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008 -#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009 -#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304 -#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001 -#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002 -#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003 -#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005 -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308 -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0 -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16 -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0 -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16 -#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT 0x00000310 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_SHIFT 0 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_MASK 0x0000ffff -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_SHIFT 16 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_MASK 0xffff0000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE 0x00000314 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_SHIFT 0 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_MASK 0x0000ffff -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_SHIFT 16 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_MASK 0xffff0000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DU_DX 0x00000318 -#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DV_DY 0x0000031c -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE 0x00000400 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_SHIFT 0 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_MASK 0x0000ffff -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_SHIFT 16 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_MASK 0xffff0000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT 0x00000404 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_SHIFT 0 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_MASK 0x0000ffff -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_SHIFT 16 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_MASK 0x00ff0000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CENTER 0x00010000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CORNER 0x00020000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_SHIFT 24 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_MASK 0xff000000 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_OFFSET 0x00000408 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT 0x0000040c -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_SHIFT 0 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_MASK 0x0000ffff -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_SHIFT 16 -#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_MASK 0xffff0000 - - -#define NV04_DVD_SUBPICTURE 0x00000038 - -#define NV04_DVD_SUBPICTURE_NOP 0x00000100 -#define NV04_DVD_SUBPICTURE_NOTIFY 0x00000104 -#define NV04_DVD_SUBPICTURE_WAIT_FOR_IDLE 0x00000108 -#define NV04_DVD_SUBPICTURE_DMA_NOTIFY 0x00000180 -#define NV04_DVD_SUBPICTURE_DMA_OVERLAY 0x00000184 -#define NV04_DVD_SUBPICTURE_DMA_IMAGEIN 0x00000188 -#define NV04_DVD_SUBPICTURE_DMA_IMAGEOUT 0x0000018c -#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT 0x00000300 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_SHIFT 0 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_SHIFT 16 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_MASK 0xffff0000 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE 0x00000304 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_SHIFT 0 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_SHIFT 16 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_MASK 0xffff0000 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT 0x00000308 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_SHIFT 0 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_SHIFT 16 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_MASK 0xffff0000 -#define NV04_DVD_SUBPICTURE_IMAGEOUT_OFFSET 0x0000030c -#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DU_DX 0x00000310 -#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DV_DY 0x00000314 -#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE 0x00000318 -#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_SHIFT 0 -#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_SHIFT 16 -#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_MASK 0xffff0000 -#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT 0x0000031c -#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_SHIFT 0 -#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_SHIFT 16 -#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_MASK 0xffff0000 -#define NV04_DVD_SUBPICTURE_IMAGEIN_OFFSET 0x00000320 -#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT 0x00000324 -#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_SHIFT 0 -#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_SHIFT 16 -#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_MASK 0xffff0000 -#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DU_DX 0x00000328 -#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DV_DY 0x0000032c -#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE 0x00000330 -#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_SHIFT 0 -#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_SHIFT 16 -#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_MASK 0xffff0000 -#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT 0x00000334 -#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_SHIFT 0 -#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_SHIFT 16 -#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_MASK 0xffff0000 -#define NV04_DVD_SUBPICTURE_OVERLAY_OFFSET 0x00000338 -#define NV04_DVD_SUBPICTURE_OVERLAY_POINT 0x0000033c -#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_SHIFT 0 -#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_MASK 0x0000ffff -#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_SHIFT 16 -#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_MASK 0xffff0000 - - -#define NV04_MEMORY_TO_MEMORY_FORMAT 0x00000039 - -#define NV04_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100 -#define NV04_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104 -#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY 0x00000180 -#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN 0x00000184 -#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_OUT 0x00000188 -#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c -#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT 0x00000310 -#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314 -#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318 -#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c -#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320 -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT 0x00000324 -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_SHIFT 0 -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_MASK 0x0000000f -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_SHIFT 8 -#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_MASK 0x00000f00 -#define NV04_MEMORY_TO_MEMORY_FORMAT_BUF_NOTIFY 0x00000328 - - -#define NV01_MEMORY_LOCAL_BANKED 0x0000003d - - - -#define NV01_MAPPING_SYSTEM 0x0000003e - - - -#define NV03_MEMORY_LOCAL_CURSOR 0x0000003f - - - -#define NV01_MEMORY_LOCAL_LINEAR 0x00000040 - - - -#define NV01_MAPPING_LOCAL 0x00000041 - - - -#define NV04_CONTEXT_SURFACES_2D 0x00000042 - -#define NV04_CONTEXT_SURFACES_2D_NOP 0x00000100 -#define NV04_CONTEXT_SURFACES_2D_NOTIFY 0x00000104 -#define NV04_CONTEXT_SURFACES_2D_PM_TRIGGER 0x00000140 -#define NV04_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180 -#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE 0x00000184 -#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_DESTIN 0x00000188 -#define NV04_CONTEXT_SURFACES_2D_FORMAT 0x00000300 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y8 0x00000001 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_Z1R5G5B5 0x00000002 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5 0x00000003 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5 0x00000004 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y16 0x00000005 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_Z8R8G8B8 0x00000006 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8 0x00000007 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_X1A7R8G8B8 0x00000009 -#define NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8 0x0000000a -#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y32 0x0000000b -#define NV04_CONTEXT_SURFACES_2D_PITCH 0x00000304 -#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0 -#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff -#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16 -#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000 -#define NV04_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308 -#define NV04_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c - - -#define NV03_CONTEXT_ROP 0x00000043 - -#define NV03_CONTEXT_ROP_NOP 0x00000100 -#define NV03_CONTEXT_ROP_NOTIFY 0x00000104 -#define NV03_CONTEXT_ROP_DMA_NOTIFY 0x00000180 -#define NV03_CONTEXT_ROP_ROP 0x00000300 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SHIFT 0 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_MASK 0x0000000f -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_CLEAR 0x00000000 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOR 0x00000001 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_INVERTED 0x00000002 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY_INVERTED 0x00000003 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_REVERSE 0x00000004 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_INVERT 0x00000005 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_XOR 0x00000006 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NAND 0x00000007 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND 0x00000008 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_EQUI 0x00000009 -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOOP 0x0000000a -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_INVERTED 0x0000000b -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY 0x0000000c -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_REVERSE 0x0000000d -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR 0x0000000e -#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SET 0x0000000f -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SHIFT 4 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_MASK 0x000000f0 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_CLEAR 0x00000000 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOR 0x00000010 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_INVERTED 0x00000020 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY_INVERTED 0x00000030 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_REVERSE 0x00000040 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_INVERT 0x00000050 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_XOR 0x00000060 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NAND 0x00000070 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND 0x00000080 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_EQUI 0x00000090 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOOP 0x000000a0 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_INVERTED 0x000000b0 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY 0x000000c0 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_REVERSE 0x000000d0 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR 0x000000e0 -#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SET 0x000000f0 - - -#define NV04_IMAGE_PATTERN 0x00000044 - -#define NV04_IMAGE_PATTERN_NOP 0x00000100 -#define NV04_IMAGE_PATTERN_NOTIFY 0x00000104 -#define NV04_IMAGE_PATTERN_DMA_NOTIFY 0x00000180 -#define NV04_IMAGE_PATTERN_COLOR_FORMAT 0x00000300 -#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A16R5G6B5 0x00000001 -#define NV04_IMAGE_PATTERN_COLOR_FORMAT_X16A1R5G5B5 0x00000002 -#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A8R8G8B8 0x00000003 -#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT 0x00000304 -#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_CGA6 0x00000001 -#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE 0x00000002 -#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE 0x00000308 -#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8 0x00000000 -#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_64X1 0x00000001 -#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_1X64 0x00000002 -#define NV04_IMAGE_PATTERN_PATTERN_SELECT 0x0000030c -#define NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO 0x00000001 -#define NV04_IMAGE_PATTERN_PATTERN_SELECT_COLOR 0x00000002 -#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR0 0x00000310 -#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR1 0x00000314 -#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN0 0x00000318 -#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN1 0x0000031c -#define NV04_IMAGE_PATTERN_PATTERN_Y8(x) (0x00000400+((x)*4)) -#define NV04_IMAGE_PATTERN_PATTERN_Y8__SIZE 0x00000010 -#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_SHIFT 0 -#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_MASK 0x000000ff -#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_SHIFT 8 -#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_MASK 0x0000ff00 -#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_SHIFT 16 -#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_MASK 0x00ff0000 -#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_SHIFT 24 -#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_MASK 0xff000000 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5(x) (0x00000500+((x)*4)) -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__SIZE 0x00000020 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_SHIFT 0 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_MASK 0x0000001f -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_SHIFT 5 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_MASK 0x000007e0 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_SHIFT 11 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_MASK 0x0000f800 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_SHIFT 16 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_MASK 0x001f0000 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_SHIFT 21 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_MASK 0x07e00000 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_SHIFT 27 -#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_MASK 0xf8000000 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5(x) (0x00000600+((x)*4)) -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__SIZE 0x00000020 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_SHIFT 0 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_MASK 0x0000001f -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_SHIFT 5 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_MASK 0x000003e0 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_SHIFT 10 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_MASK 0x00007c00 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_SHIFT 16 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_MASK 0x001f0000 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_SHIFT 21 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_MASK 0x03e00000 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_SHIFT 26 -#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_MASK 0x7c000000 -#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8(x) (0x00000700+((x)*4)) -#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__SIZE 0x00000040 -#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_SHIFT 0 -#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_MASK 0x000000ff -#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_SHIFT 8 -#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_MASK 0x0000ff00 -#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_SHIFT 16 -#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_MASK 0x00ff0000 - - -#define NV03_VIDEO_LUT_CURSOR_DAC 0x00000046 - -#define NV03_VIDEO_LUT_CURSOR_DAC_SYNCHRONIZE 0x00000100 -#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_IMAGE 0x00000104 -#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_CURSOR 0x00000108 -#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_DAC 0x0000010c -#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_NOTIFY 0x00000180 -#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE(x) (0x00000184+((x)*4)) -#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT(x) (0x0000018c+((x)*4)) -#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR(x) (0x00000194+((x)*4)) -#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_GET 0x000002fc -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET(x) (0x00000300+((x)*8)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT(x) (0x00000304+((x)*8)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_SHIFT 0 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_MASK 0x0000ffff -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_SHIFT 16 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_MASK 0x0fff0000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_SHIFT 28 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_MASK 0xf0000000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET(x) (0x00000340+((x)*12)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT(x) (0x00000344+((x)*12)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_SHIFT 0 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_MASK 0x0000ffff -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_SHIFT 16 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_MASK 0xffff0000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT(x) (0x00000348+((x)*12)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A 0x00000358 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_SHIFT 0 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_MASK 0x0000ffff -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_SHIFT 16 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_MASK 0xffff0000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE(x) (0x00000380+((x)*16)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_SHIFT 0 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_MASK 0x0000ffff -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_SHIFT 16 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_MASK 0xffff0000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC(x) (0x00000384+((x)*16)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_SHIFT 0 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_MASK 0x0000ffff -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_SHIFT 16 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_MASK 0x0fff0000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_SHIFT 28 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_MASK 0xf0000000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC(x) (0x00000388+((x)*16)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_SHIFT 0 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_MASK 0x0000ffff -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_SHIFT 16 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_MASK 0x0fff0000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_SHIFT 28 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_MASK 0xf0000000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE(x) (0x0000038c+((x)*16)) -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE__SIZE 0x00000002 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_SHIFT 0 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_MASK 0x0000ffff -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_SHIFT 16 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_MASK 0x0fff0000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_SHIFT 28 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_MASK 0xf0000000 -#define NV03_VIDEO_LUT_CURSOR_DAC_SET_PIXEL_CLOCK 0x000003a0 - - -#define NV03_DX3_TEXTURED_TRIANGLE 0x00000048 - -#define NV03_DX3_TEXTURED_TRIANGLE_NOP 0x00000100 -#define NV03_DX3_TEXTURED_TRIANGLE_NOTIFY 0x00000104 -#define NV03_DX3_TEXTURED_TRIANGLE_PATCH 0x0000010c -#define NV03_DX3_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180 -#define NV03_DX3_TEXTURED_TRIANGLE_DMA_TEXTURE 0x00000184 -#define NV03_DX3_TEXTURED_TRIANGLE_CLIP_RECTANGLE 0x00000188 -#define NV03_DX3_TEXTURED_TRIANGLE_SURFACE 0x0000018c -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_OFFSET 0x00000304 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT 0x00000308 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_SHIFT 0 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_MASK 0x0000ffff -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_SHIFT 16 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_MASK 0x000f0000 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_SHIFT 20 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_MASK 0x00f00000 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_SHIFT 24 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_MASK 0x0f000000 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_SHIFT 28 -#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_MASK 0xf0000000 -#define NV03_DX3_TEXTURED_TRIANGLE_FILTER 0x0000030c -#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_SHIFT 0 -#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_MASK 0x0000001f -#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_SHIFT 8 -#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_MASK 0x00001f00 -#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_SHIFT 16 -#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_MASK 0x00ff0000 -#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR 0x00000310 -#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_SHIFT 0 -#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_MASK 0x000000ff -#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_SHIFT 8 -#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_MASK 0x0000ff00 -#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_SHIFT 16 -#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_MASK 0x00ff0000 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT 0x00000314 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_SHIFT 0 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_MASK 0x0000000f -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_SHIFT 4 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_MASK 0x00000030 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_SHIFT 6 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_MASK 0x000000c0 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_SHIFT 8 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_MASK 0x00000f00 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_SHIFT 12 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_MASK 0x00007000 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_PERSPECTIVE_ENABLE (1 << 15) -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_SHIFT 16 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_MASK 0x000f0000 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_SHIFT 20 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_MASK 0x00f00000 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_SHIFT 24 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_MASK 0x07000000 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_SHIFT 27 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_MASK 0x18000000 -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_BETA (1 << 29) -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_DST_BLEND (1 << 30) -#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SRC_BLEND (1 << 31) -#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL 0x00000318 -#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_SHIFT 0 -#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_MASK 0x000000ff -#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_SHIFT 8 -#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_MASK 0xffffff00 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR(x) (0x00001000+((x)*32)) -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR__SIZE 0x00000040 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_SHIFT 0 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_MASK 0x0000000f -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_SHIFT 4 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_MASK 0x000000f0 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_SHIFT 8 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_MASK 0x00000f00 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_SHIFT 12 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_MASK 0x0000f000 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_SHIFT 16 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_MASK 0x000f0000 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_SHIFT 20 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_MASK 0x00f00000 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_SHIFT 24 -#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_MASK 0xff000000 -#define NV03_DX3_TEXTURED_TRIANGLE_COLOR(x) (0x00001004+((x)*32)) -#define NV03_DX3_TEXTURED_TRIANGLE_COLOR__SIZE 0x00000040 -#define NV03_DX3_TEXTURED_TRIANGLE_X(x) (0x00001008+((x)*32)) -#define NV03_DX3_TEXTURED_TRIANGLE_X__SIZE 0x00000040 -#define NV03_DX3_TEXTURED_TRIANGLE_Y(x) (0x0000100c+((x)*32)) -#define NV03_DX3_TEXTURED_TRIANGLE_Y__SIZE 0x00000040 -#define NV03_DX3_TEXTURED_TRIANGLE_Z(x) (0x00001010+((x)*32)) -#define NV03_DX3_TEXTURED_TRIANGLE_Z__SIZE 0x00000040 -#define NV03_DX3_TEXTURED_TRIANGLE_M(x) (0x00001014+((x)*32)) -#define NV03_DX3_TEXTURED_TRIANGLE_M__SIZE 0x00000040 -#define NV03_DX3_TEXTURED_TRIANGLE_U(x) (0x00001018+((x)*32)) -#define NV03_DX3_TEXTURED_TRIANGLE_U__SIZE 0x00000040 -#define NV03_DX3_TEXTURED_TRIANGLE_V(x) (0x0000101c+((x)*32)) -#define NV03_DX3_TEXTURED_TRIANGLE_V__SIZE 0x00000040 - - -#define NV04_GDI_RECTANGLE_TEXT 0x0000004a - -#define NV04_GDI_RECTANGLE_TEXT_NOP 0x00000100 -#define NV04_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104 -#define NV04_GDI_RECTANGLE_TEXT_PATCH 0x0000010c -#define NV04_GDI_RECTANGLE_TEXT_PM_TRIGGER 0x00000140 -#define NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180 -#define NV04_GDI_RECTANGLE_TEXT_DMA_FONTS 0x00000184 -#define NV04_GDI_RECTANGLE_TEXT_PATTERN 0x00000188 -#define NV04_GDI_RECTANGLE_TEXT_ROP 0x0000018c -#define NV04_GDI_RECTANGLE_TEXT_BETA1 0x00000190 -#define NV04_GDI_RECTANGLE_TEXT_BETA4 0x00000194 -#define NV04_GDI_RECTANGLE_TEXT_SURFACE 0x00000198 -#define NV04_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc -#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_AND 0x00000000 -#define NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND 0x00000001 -#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_AND 0x00000002 -#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY 0x00000003 -#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_PREMULT 0x00000005 -#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300 -#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5 0x00000001 -#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_X16A1R5G5B5 0x00000002 -#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8 0x00000003 -#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304 -#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_CGA6 0x00000001 -#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE 0x00000002 -#define NV04_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(x) (0x00000400+((x)*8)) -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__SIZE 0x00000020 -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE(x) (0x00000404+((x)*8)) -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__SIZE 0x00000020 -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0 0x000005f4 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1 0x000005f8 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_COLOR1_B 0x000005fc -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0(x) (0x00000600+((x)*8)) -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__SIZE 0x00000020 -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1(x) (0x00000604+((x)*8)) -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__SIZE 0x00000020 -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x000007ec -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x000007f0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_COLOR1_C 0x000007f4 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_C 0x000007f8 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_POINT_C 0x000007fc -#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000800+((x)*4)) -#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000080 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x00000be4 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x00000be8 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_COLOR0_E 0x00000bec -#define NV04_GDI_RECTANGLE_TEXT_COLOR1_E 0x00000bf0 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x00000bf4 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x00000bf8 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_POINT_E 0x00000bfc -#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00000c00+((x)*4)) -#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000080 -#define NV04_GDI_RECTANGLE_TEXT_FONT_F 0x00000ff0 -#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_MASK 0x0fffffff -#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_SHIFT 28 -#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_MASK 0xf0000000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0 0x00000ff4 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1 0x00000ff8 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_COLOR1_F 0x00000ffc -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F(x) (0x00001000+((x)*4)) -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__SIZE 0x00000100 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_MASK 0x000000ff -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_SHIFT 8 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_MASK 0x000fff00 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_SHIFT 20 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_MASK 0xfff00000 -#define NV04_GDI_RECTANGLE_TEXT_FONT_G 0x000017f0 -#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_MASK 0x0fffffff -#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_SHIFT 28 -#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_MASK 0xf0000000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0 0x000017f4 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1 0x000017f8 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_COLOR1_G 0x000017fc -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT(x) (0x00001800+((x)*8)) -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__SIZE 0x00000100 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_SHIFT 0 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_MASK 0x0000ffff -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_SHIFT 16 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_MASK 0xffff0000 -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX(x) (0x00001804+((x)*8)) -#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__SIZE 0x00000100 - - -#define NV03_GDI_RECTANGLE_TEXT 0x0000004b - -#define NV03_GDI_RECTANGLE_TEXT_NOP 0x00000100 -#define NV03_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104 -#define NV03_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180 -#define NV03_GDI_RECTANGLE_TEXT_PATTERN 0x00000184 -#define NV03_GDI_RECTANGLE_TEXT_ROP 0x00000188 -#define NV03_GDI_RECTANGLE_TEXT_BETA1 0x0000018c -#define NV03_GDI_RECTANGLE_TEXT_SURFACE 0x00000190 -#define NV03_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc -#define NV03_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300 -#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304 -#define NV03_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT 0x00000400 -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE 0x00000404 -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B 0x000007f4 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B 0x000007f8 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_COLOR1_B 0x000007fc -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0 0x00000800 -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1 0x00000804 -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x00000bec -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x00000bf0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_COLOR1_C 0x00000bf4 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_C 0x00000bf8 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_POINT_C 0x00000bfc -#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000c00+((x)*4)) -#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000020 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0 0x00000fe8 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1 0x00000fec -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_COLOR1_D 0x00000ff0 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D 0x00000ff4 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D 0x00000ff8 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_POINT_D 0x00000ffc -#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D(x) (0x00001000+((x)*4)) -#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__SIZE 0x00000020 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x000013e4 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x000013e8 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_COLOR0_E 0x000013ec -#define NV03_GDI_RECTANGLE_TEXT_COLOR1_E 0x000013f0 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x000013f4 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x000013f8 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_POINT_E 0x000013fc -#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0 -#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff -#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16 -#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000 -#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00001400+((x)*4)) -#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000020 - - -#define NV04_SWIZZLED_SURFACE 0x00000052 - -#define NV04_SWIZZLED_SURFACE_NOP 0x00000100 -#define NV04_SWIZZLED_SURFACE_NOTIFY 0x00000104 -#define NV04_SWIZZLED_SURFACE_DMA_NOTIFY 0x00000180 -#define NV04_SWIZZLED_SURFACE_DMA_IMAGE 0x00000184 -#define NV04_SWIZZLED_SURFACE_FORMAT 0x00000300 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_SHIFT 0 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_MASK 0x000000ff -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8 0x00000001 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000002 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000003 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5 0x00000004 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y16 0x00000005 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000006 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000007 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000009 -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8 0x0000000a -#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y32 0x0000000b -#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT 16 -#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_MASK 0x00ff0000 -#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT 24 -#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_MASK 0xff000000 -#define NV04_SWIZZLED_SURFACE_OFFSET 0x00000304 - - -#define NV04_CONTEXT_SURFACES_3D 0x00000053 - -#define NV04_CONTEXT_SURFACES_3D_NOP 0x00000100 -#define NV04_CONTEXT_SURFACES_3D_NOTIFY 0x00000104 -#define NV04_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180 -#define NV04_CONTEXT_SURFACES_3D_DMA_COLOR 0x00000184 -#define NV04_CONTEXT_SURFACES_3D_DMA_ZETA 0x00000188 -#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL 0x000002f8 -#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_SHIFT 0 -#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_MASK 0x0000ffff -#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_SHIFT 16 -#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_MASK 0xffff0000 -#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL 0x000002fc -#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_SHIFT 0 -#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_MASK 0x0000ffff -#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_SHIFT 16 -#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_MASK 0xffff0000 -#define NV04_CONTEXT_SURFACES_3D_FORMAT 0x00000300 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_SHIFT 0 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_MASK 0x000000ff -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000001 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000002 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_R5G6B5 0x00000003 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000004 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000005 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000006 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000007 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_A8R8G8B8 0x00000008 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SHIFT 8 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_MASK 0x0000ff00 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_PITCH 0x00000100 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SWIZZLE 0x00000200 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_SHIFT 16 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_MASK 0x00ff0000 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_SHIFT 24 -#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_MASK 0xff000000 -#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE 0x00000304 -#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_SHIFT 0 -#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_MASK 0x0000ffff -#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_SHIFT 16 -#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_MASK 0xffff0000 -#define NV04_CONTEXT_SURFACES_3D_PITCH 0x00000308 -#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_SHIFT 0 -#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_MASK 0x0000ffff -#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_SHIFT 16 -#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_MASK 0xffff0000 -#define NV04_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x0000030c -#define NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000310 - - -#define NV04_DX5_TEXTURED_TRIANGLE 0x00000054 - -#define NV04_DX5_TEXTURED_TRIANGLE_NOP 0x00000100 -#define NV04_DX5_TEXTURED_TRIANGLE_NOTIFY 0x00000104 -#define NV04_DX5_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180 -#define NV04_DX5_TEXTURED_TRIANGLE_DMA_A 0x00000184 -#define NV04_DX5_TEXTURED_TRIANGLE_DMA_B 0x00000188 -#define NV04_DX5_TEXTURED_TRIANGLE_SURFACE 0x0000018c -#define NV04_DX5_TEXTURED_TRIANGLE_COLORKEY 0x00000300 -#define NV04_DX5_TEXTURED_TRIANGLE_OFFSET 0x00000304 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT 0x00000308 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_SHIFT 0 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_MASK 0x00000003 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_SHIFT 2 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_MASK 0x0000000c -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CENTER 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER 0x00000020 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CENTER 0x00000040 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER 0x00000080 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_SHIFT 8 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_Y8 0x00000100 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A1R5G5B5 0x00000200 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X1R5G5B5 0x00000300 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A4R4G4B4 0x00000400 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_R5G6B5 0x00000500 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8 0x00000600 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X8R8G8B8 0x00000700 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT 0x01000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT 0x02000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE 0x03000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER 0x04000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP 0x05000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPU (1 << 27) -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_REPEAT 0x10000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MIRRORED_REPEAT 0x20000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE 0x30000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_BORDER 0x40000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP 0x50000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPV (1 << 31) -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER 0x0000030c -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15) -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_SHIFT 24 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_MASK 0x07000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST 0x01000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR 0x02000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27) -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_SHIFT 28 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST 0x10000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR 0x20000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31) -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND 0x00000310 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_SHIFT 0 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_MASK 0x0000000f -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_SHIFT 4 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT 0x00000040 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD 0x00000080 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_PHONG 0x000000c0 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_SHIFT 24 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_MASK 0x0f000000 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_SHIFT 28 -#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_MASK 0xf0000000 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL 0x00000314 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_SHIFT 0 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_MASK 0x000000ff -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT 8 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_MASK 0x00000f00 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE (1 << 12) -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN (1 << 13) -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT 14 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_MASK 0x0000c000 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT 16 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_MASK 0x000f0000 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT 20 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_MASK 0x00300000 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE (1 << 22) -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE (1 << 23) -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT 24 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_MASK 0x3f000000 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT 30 -#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_MASK 0xc0000000 -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR 0x00000318 -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_SHIFT 0 -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_SHIFT 8 -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00 -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_SHIFT 16 -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000 -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_SHIFT 24 -#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_MASK 0xff000000 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(x) (0x00000400+((x)*32)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX__SIZE 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY(x) (0x00000404+((x)*32)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY__SIZE 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ(x) (0x00000408+((x)*32)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ__SIZE 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW(x) (0x0000040c+((x)*32)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW__SIZE 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR(x) (0x00000410+((x)*32)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR__SIZE 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_SHIFT 0 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_MASK 0x000000ff -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_SHIFT 8 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_MASK 0x0000ff00 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_SHIFT 16 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_MASK 0x00ff0000 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_SHIFT 24 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_MASK 0xff000000 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR(x) (0x00000414+((x)*32)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR__SIZE 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_SHIFT 0 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_MASK 0x000000ff -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_SHIFT 8 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_MASK 0x0000ff00 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_SHIFT 16 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_MASK 0x00ff0000 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_SHIFT 24 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_MASK 0xff000000 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU(x) (0x00000418+((x)*32)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU__SIZE 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV(x) (0x0000041c+((x)*32)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV__SIZE 0x00000010 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(x) (0x00000600+((x)*4)) -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE__SIZE 0x00000040 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_SHIFT 0 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_SHIFT 4 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_SHIFT 8 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_SHIFT 12 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_SHIFT 16 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_SHIFT 20 -#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000 - - -#define NV04_DX6_MULTITEX_TRIANGLE 0x00000055 - -#define NV04_DX6_MULTITEX_TRIANGLE_NOP 0x00000100 -#define NV04_DX6_MULTITEX_TRIANGLE_NOTIFY 0x00000104 -#define NV04_DX6_MULTITEX_TRIANGLE_DMA_NOTIFY 0x00000180 -#define NV04_DX6_MULTITEX_TRIANGLE_DMA_A 0x00000184 -#define NV04_DX6_MULTITEX_TRIANGLE_DMA_B 0x00000188 -#define NV04_DX6_MULTITEX_TRIANGLE_SURFACE 0x0000018c -#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET(x) (0x00000308+((x)*4)) -#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET__SIZE 0x00000002 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT(x) (0x00000310+((x)*4)) -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT__SIZE 0x00000002 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_MASK 0x0000000f -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPU (1 << 27) -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000 -#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPV (1 << 31) -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER(x) (0x00000318+((x)*4)) -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER__SIZE 0x00000002 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15) -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_SHIFT 24 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_MASK 0x07000000 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27) -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_SHIFT 28 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000 -#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA 0x00000320 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE0 (1 << 0) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA0 (1 << 1) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_SHIFT 2 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_MASK 0x000000fc -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE1 (1 << 8) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA1 (1 << 9) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_SHIFT 10 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_MASK 0x0000fc00 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE2 (1 << 16) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA2 (1 << 17) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_SHIFT 18 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_MASK 0x00fc0000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE3 (1 << 24) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA3 (1 << 25) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_SHIFT 26 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_MASK 0x1c000000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_SHIFT 29 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_MASK 0xe0000000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR 0x00000324 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE0 (1 << 0) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA0 (1 << 1) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_SHIFT 2 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_MASK 0x000000fc -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE1 (1 << 8) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA1 (1 << 9) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_SHIFT 10 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_MASK 0x0000fc00 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE2 (1 << 16) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA2 (1 << 17) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_SHIFT 18 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_MASK 0x00fc0000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE3 (1 << 24) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA3 (1 << 25) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_SHIFT 26 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_MASK 0x1c000000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_SHIFT 29 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_MASK 0xe0000000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA 0x0000032c -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE0 (1 << 0) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA0 (1 << 1) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_SHIFT 2 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_MASK 0x000000fc -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE1 (1 << 8) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA1 (1 << 9) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_SHIFT 10 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_MASK 0x0000fc00 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE2 (1 << 16) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA2 (1 << 17) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_SHIFT 18 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_MASK 0x00fc0000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE3 (1 << 24) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA3 (1 << 25) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_SHIFT 26 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_MASK 0x1c000000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_SHIFT 29 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_MASK 0xe0000000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR 0x00000330 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE0 (1 << 0) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA0 (1 << 1) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_SHIFT 2 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_MASK 0x000000fc -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE1 (1 << 8) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA1 (1 << 9) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_SHIFT 10 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_MASK 0x0000fc00 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE2 (1 << 16) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA2 (1 << 17) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_SHIFT 18 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_MASK 0x00fc0000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE3 (1 << 24) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA3 (1 << 25) -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_SHIFT 26 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_MASK 0x1c000000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_SHIFT 29 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_MASK 0xe0000000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR 0x00000334 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_MASK 0x000000ff -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_MASK 0x0000ff00 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_MASK 0x00ff0000 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_SHIFT 24 -#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_MASK 0xff000000 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND 0x00000338 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_SHIFT 4 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_SHIFT 24 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_MASK 0x0f000000 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_SHIFT 28 -#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_MASK 0xf0000000 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0 0x0000033c -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_MASK 0x000000ff -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_MASK 0x00000f00 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_TEST_ENABLE (1 << 12) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ORIGIN (1 << 13) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_SHIFT 14 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_MASK 0x0000c000 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_MASK 0x000f0000 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_SHIFT 20 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_MASK 0x00300000 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_DITHER_ENABLE (1 << 22) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_PERSPECTIVE_ENABLE (1 << 23) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_WRITE_ENABLE (1 << 24) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE_ENABLE (1 << 25) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_WRITE_ENABLE (1 << 26) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_RED_WRITE_ENABLE (1 << 27) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_GREEN_WRITE_ENABLE (1 << 28) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_BLUE_WRITE_ENABLE (1 << 29) -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_SHIFT 30 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_MASK 0xc0000000 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1 0x00000340 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_MASK 0x0000000f -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_SHIFT 4 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_MASK 0x000000f0 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_MASK 0x0000ff00 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_MASK 0x00ff0000 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_SHIFT 24 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_MASK 0xff000000 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2 0x00000344 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_MASK 0x0000000f -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_SHIFT 4 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_MASK 0x000000f0 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_MASK 0x00000f00 -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR 0x00000348 -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00 -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000 -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_SHIFT 24 -#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_MASK 0xff000000 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX(x) (0x00000400+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY(x) (0x00000404+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ(x) (0x00000408+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW(x) (0x0000040c+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR(x) (0x00000410+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_MASK 0x000000ff -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_MASK 0x0000ff00 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_MASK 0x00ff0000 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_SHIFT 24 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_MASK 0xff000000 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR(x) (0x00000414+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_MASK 0x000000ff -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_MASK 0x0000ff00 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_MASK 0x00ff0000 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_SHIFT 24 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_MASK 0xff000000 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0(x) (0x00000418+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0(x) (0x0000041c+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1(x) (0x00000420+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1(x) (0x00000424+((x)*40)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1__SIZE 0x00000008 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE(x) (0x00000540+((x)*4)) -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE__SIZE 0x00000030 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_SHIFT 0 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_SHIFT 4 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_SHIFT 8 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_SHIFT 12 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_SHIFT 16 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_SHIFT 20 -#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000 - - -#define NV10TCL 0x00000056 - -#define NV10TCL_NOP 0x00000100 -#define NV10TCL_NOTIFY 0x00000104 -#define NV10TCL_DMA_NOTIFY 0x00000180 -#define NV10TCL_DMA_IN_MEMORY0 0x00000184 -#define NV10TCL_DMA_IN_MEMORY1 0x00000188 -#define NV10TCL_DISPLAY_LIST 0x0000018c -#define NV10TCL_DMA_IN_MEMORY2 0x00000194 -#define NV10TCL_DMA_IN_MEMORY3 0x00000198 -#define NV10TCL_VIEWPORT_HORIZ 0x00000200 -#define NV10TCL_VIEWPORT_HORIZ_X_SHIFT 0 -#define NV10TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff -#define NV10TCL_VIEWPORT_HORIZ_W_SHIFT 16 -#define NV10TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 -#define NV10TCL_VIEWPORT_VERT 0x00000204 -#define NV10TCL_VIEWPORT_VERT_Y_SHIFT 0 -#define NV10TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff -#define NV10TCL_VIEWPORT_VERT_H_SHIFT 16 -#define NV10TCL_VIEWPORT_VERT_H_MASK 0xffff0000 -#define NV10TCL_BUFFER_FORMAT 0x00000208 -#define NV10TCL_BUFFER_PITCH 0x0000020c -#define NV10TCL_BUFFER_PITCH_COLOR_PITCH_SHIFT 0 -#define NV10TCL_BUFFER_PITCH_COLOR_PITCH_MASK 0x0000ffff -#define NV10TCL_BUFFER_PITCH_ZETA_PITCH_SHIFT 16 -#define NV10TCL_BUFFER_PITCH_ZETA_PITCH_MASK 0xffff0000 -#define NV10TCL_COLOR_OFFSET 0x00000210 -#define NV10TCL_ZETA_OFFSET 0x00000214 -#define NV10TCL_TX_OFFSET(x) (0x00000218+((x)*4)) -#define NV10TCL_TX_OFFSET__SIZE 0x00000002 -#define NV10TCL_TX_FORMAT(x) (0x00000220+((x)*4)) -#define NV10TCL_TX_FORMAT__SIZE 0x00000002 -#define NV10TCL_TX_FORMAT_CUBE_MAP (1 << 2) -#define NV10TCL_TX_FORMAT_FORMAT_SHIFT 7 -#define NV10TCL_TX_FORMAT_FORMAT_MASK 0x00000780 -#define NV10TCL_TX_FORMAT_FORMAT_L8 0x00000000 -#define NV10TCL_TX_FORMAT_FORMAT_A8 0x00000080 -#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000100 -#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000180 -#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000200 -#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000300 -#define NV10TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000380 -#define NV10TCL_TX_FORMAT_FORMAT_INDEX8 0x00000580 -#define NV10TCL_TX_FORMAT_FORMAT_DXT1 0x00000600 -#define NV10TCL_TX_FORMAT_FORMAT_DXT3 0x00000700 -#define NV10TCL_TX_FORMAT_FORMAT_DXT5 0x00000780 -#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00000800 -#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00000900 -#define NV10TCL_TX_FORMAT_FORMAT_L8_RECT 0x00000980 -#define NV10TCL_TX_FORMAT_FORMAT_A8L8 0x00000d00 -#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00000d80 -#define NV10TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00000f00 -#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00000e80 -#define NV10TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00001000 -#define NV10TCL_TX_FORMAT_FORMAT_A16 0x00001900 -#define NV10TCL_TX_FORMAT_FORMAT_A16_RECT 0x00001a80 -#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00002500 -#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00002580 -#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00002600 -#define NV10TCL_TX_FORMAT_NPOT (1 << 11) -#define NV10TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT 12 -#define NV10TCL_TX_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000 -#define NV10TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 16 -#define NV10TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x000f0000 -#define NV10TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 20 -#define NV10TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x00f00000 -#define NV10TCL_TX_FORMAT_WRAP_S_SHIFT 24 -#define NV10TCL_TX_FORMAT_WRAP_S_MASK 0x0f000000 -#define NV10TCL_TX_FORMAT_WRAP_S_REPEAT 0x01000000 -#define NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT 0x02000000 -#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE 0x03000000 -#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER 0x04000000 -#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP 0x05000000 -#define NV10TCL_TX_FORMAT_WRAP_T_SHIFT 28 -#define NV10TCL_TX_FORMAT_WRAP_T_MASK 0xf0000000 -#define NV10TCL_TX_FORMAT_WRAP_T_REPEAT 0x10000000 -#define NV10TCL_TX_FORMAT_WRAP_T_MIRRORED_REPEAT 0x20000000 -#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE 0x30000000 -#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_BORDER 0x40000000 -#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP 0x50000000 -#define NV10TCL_TX_ENABLE(x) (0x00000228+((x)*4)) -#define NV10TCL_TX_ENABLE__SIZE 0x00000002 -#define NV10TCL_TX_ENABLE_ANISOTROPY_SHIFT 4 -#define NV10TCL_TX_ENABLE_ANISOTROPY_MASK 0x00000030 -#define NV10TCL_TX_ENABLE_ENABLE (1 << 30) -#define NV10TCL_TX_NPOT_PITCH(x) (0x00000230+((x)*4)) -#define NV10TCL_TX_NPOT_PITCH__SIZE 0x00000002 -#define NV10TCL_TX_NPOT_PITCH_PITCH_SHIFT 16 -#define NV10TCL_TX_NPOT_PITCH_PITCH_MASK 0xffff0000 -#define NV10TCL_TX_NPOT_SIZE(x) (0x00000240+((x)*4)) -#define NV10TCL_TX_NPOT_SIZE__SIZE 0x00000002 -#define NV10TCL_TX_NPOT_SIZE_H_SHIFT 0 -#define NV10TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff -#define NV10TCL_TX_NPOT_SIZE_W_SHIFT 16 -#define NV10TCL_TX_NPOT_SIZE_W_MASK 0xffff0000 -#define NV10TCL_TX_FILTER(x) (0x00000248+((x)*4)) -#define NV10TCL_TX_FILTER__SIZE 0x00000002 -#define NV10TCL_TX_FILTER_MINIFY_SHIFT 24 -#define NV10TCL_TX_FILTER_MINIFY_MASK 0x0f000000 -#define NV10TCL_TX_FILTER_MINIFY_NEAREST 0x01000000 -#define NV10TCL_TX_FILTER_MINIFY_LINEAR 0x02000000 -#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000 -#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000 -#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000 -#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000 -#define NV10TCL_TX_FILTER_MAGNIFY_SHIFT 28 -#define NV10TCL_TX_FILTER_MAGNIFY_MASK 0xf0000000 -#define NV10TCL_TX_FILTER_MAGNIFY_NEAREST 0x10000000 -#define NV10TCL_TX_FILTER_MAGNIFY_LINEAR 0x20000000 -#define NV10TCL_TX_PALETTE_OFFSET(x) (0x00000250+((x)*4)) -#define NV10TCL_TX_PALETTE_OFFSET__SIZE 0x00000002 -#define NV10TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4)) -#define NV10TCL_RC_IN_ALPHA__SIZE 0x00000002 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f -#define NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009 -#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c -#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d -#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e -#define NV10TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f -#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4) -#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000 -#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 -#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 -#define NV10TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00 -#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12) -#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000 -#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 -#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 -#define NV10TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000 -#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20) -#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000 -#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 -#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 -#define NV10TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000 -#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28) -#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000 -#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 -#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 -#define NV10TCL_RC_IN_RGB(x) (0x00000268+((x)*4)) -#define NV10TCL_RC_IN_RGB__SIZE 0x00000002 -#define NV10TCL_RC_IN_RGB_D_INPUT_SHIFT 0 -#define NV10TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f -#define NV10TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 -#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 -#define NV10TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003 -#define NV10TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004 -#define NV10TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005 -#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008 -#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009 -#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c -#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d -#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e -#define NV10TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f -#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4) -#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010 -#define NV10TCL_RC_IN_RGB_D_MAPPING_SHIFT 5 -#define NV10TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0 -#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 -#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 -#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 -#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 -#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 -#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 -#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 -#define NV10TCL_RC_IN_RGB_C_INPUT_SHIFT 8 -#define NV10TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00 -#define NV10TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 -#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 -#define NV10TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300 -#define NV10TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400 -#define NV10TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500 -#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800 -#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900 -#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00 -#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00 -#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 -#define NV10TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00 -#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12) -#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_SHIFT 13 -#define NV10TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 -#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 -#define NV10TCL_RC_IN_RGB_B_INPUT_SHIFT 16 -#define NV10TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000 -#define NV10TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 -#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 -#define NV10TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000 -#define NV10TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000 -#define NV10TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000 -#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000 -#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000 -#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000 -#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000 -#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 -#define NV10TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000 -#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20) -#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_SHIFT 21 -#define NV10TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 -#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 -#define NV10TCL_RC_IN_RGB_A_INPUT_SHIFT 24 -#define NV10TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 -#define NV10TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000 -#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28) -#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_SHIFT 29 -#define NV10TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 -#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 -#define NV10TCL_RC_COLOR(x) (0x00000270+((x)*4)) -#define NV10TCL_RC_COLOR__SIZE 0x00000002 -#define NV10TCL_RC_COLOR_B_SHIFT 0 -#define NV10TCL_RC_COLOR_B_MASK 0x000000ff -#define NV10TCL_RC_COLOR_G_SHIFT 8 -#define NV10TCL_RC_COLOR_G_MASK 0x0000ff00 -#define NV10TCL_RC_COLOR_R_SHIFT 16 -#define NV10TCL_RC_COLOR_R_MASK 0x00ff0000 -#define NV10TCL_RC_COLOR_A_SHIFT 24 -#define NV10TCL_RC_COLOR_A_MASK 0xff000000 -#define NV10TCL_RC_OUT_ALPHA(x) (0x00000278+((x)*4)) -#define NV10TCL_RC_OUT_ALPHA__SIZE 0x00000002 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009 -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e -#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 -#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 -#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 -#define NV10TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12) -#define NV10TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13) -#define NV10TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14) -#define NV10TCL_RC_OUT_ALPHA_BIAS (1 << 15) -#define NV10TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000 -#define NV10TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 -#define NV10TCL_RC_OUT_ALPHA_SCALE_SHIFT 17 -#define NV10TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000 -#define NV10TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000 -#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000 -#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000 -#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 -#define NV10TCL_RC_OUT_RGB(x) (0x00000280+((x)*4)) -#define NV10TCL_RC_OUT_RGB__SIZE 0x00000002 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009 -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e -#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 -#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 -#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 -#define NV10TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12) -#define NV10TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13) -#define NV10TCL_RC_OUT_RGB_MUX_SUM (1 << 14) -#define NV10TCL_RC_OUT_RGB_BIAS (1 << 15) -#define NV10TCL_RC_OUT_RGB_BIAS_NONE 0x00000000 -#define NV10TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 -#define NV10TCL_RC_OUT_RGB_SCALE_SHIFT 17 -#define NV10TCL_RC_OUT_RGB_SCALE_MASK 0x00000000 -#define NV10TCL_RC_OUT_RGB_SCALE_NONE 0x00000000 -#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000 -#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000 -#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 -#define NV10TCL_RC_OUT_RGB_OPERATION_SHIFT 27 -#define NV10TCL_RC_OUT_RGB_OPERATION_MASK 0x38000000 -#define NV10TCL_RC_FINAL0 0x00000288 -#define NV10TCL_RC_FINAL0_D_INPUT_SHIFT 0 -#define NV10TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f -#define NV10TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 -#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 -#define NV10TCL_RC_FINAL0_D_INPUT_FOG 0x00000003 -#define NV10TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004 -#define NV10TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005 -#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008 -#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009 -#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c -#define NV10TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d -#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e -#define NV10TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f -#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4) -#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010 -#define NV10TCL_RC_FINAL0_D_MAPPING_SHIFT 5 -#define NV10TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0 -#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 -#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 -#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 -#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 -#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 -#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 -#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 -#define NV10TCL_RC_FINAL0_C_INPUT_SHIFT 8 -#define NV10TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00 -#define NV10TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 -#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 -#define NV10TCL_RC_FINAL0_C_INPUT_FOG 0x00000300 -#define NV10TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400 -#define NV10TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500 -#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800 -#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900 -#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00 -#define NV10TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00 -#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 -#define NV10TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00 -#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12) -#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000 -#define NV10TCL_RC_FINAL0_C_MAPPING_SHIFT 13 -#define NV10TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000 -#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 -#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 -#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 -#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 -#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 -#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 -#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 -#define NV10TCL_RC_FINAL0_B_INPUT_SHIFT 16 -#define NV10TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000 -#define NV10TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 -#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 -#define NV10TCL_RC_FINAL0_B_INPUT_FOG 0x00030000 -#define NV10TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000 -#define NV10TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000 -#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000 -#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000 -#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000 -#define NV10TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000 -#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 -#define NV10TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000 -#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20) -#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000 -#define NV10TCL_RC_FINAL0_B_MAPPING_SHIFT 21 -#define NV10TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000 -#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 -#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 -#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 -#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 -#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 -#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 -#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 -#define NV10TCL_RC_FINAL0_A_INPUT_SHIFT 24 -#define NV10TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000 -#define NV10TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 -#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 -#define NV10TCL_RC_FINAL0_A_INPUT_FOG 0x03000000 -#define NV10TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000 -#define NV10TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000 -#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000 -#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000 -#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000 -#define NV10TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000 -#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 -#define NV10TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000 -#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28) -#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_SHIFT 29 -#define NV10TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 -#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 -#define NV10TCL_RC_FINAL1 0x0000028c -#define NV10TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7) -#define NV10TCL_RC_FINAL1_G_INPUT_SHIFT 8 -#define NV10TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00 -#define NV10TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100 -#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200 -#define NV10TCL_RC_FINAL1_G_INPUT_FOG 0x00000300 -#define NV10TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400 -#define NV10TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500 -#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800 -#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900 -#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00 -#define NV10TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00 -#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 -#define NV10TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00 -#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12) -#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000 -#define NV10TCL_RC_FINAL1_G_MAPPING_SHIFT 13 -#define NV10TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000 -#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000 -#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000 -#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000 -#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 -#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 -#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 -#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000 -#define NV10TCL_RC_FINAL1_F_INPUT_SHIFT 16 -#define NV10TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000 -#define NV10TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000 -#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000 -#define NV10TCL_RC_FINAL1_F_INPUT_FOG 0x00030000 -#define NV10TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000 -#define NV10TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000 -#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000 -#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000 -#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000 -#define NV10TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000 -#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 -#define NV10TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000 -#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20) -#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000 -#define NV10TCL_RC_FINAL1_F_MAPPING_SHIFT 21 -#define NV10TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000 -#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000 -#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000 -#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000 -#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 -#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 -#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 -#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000 -#define NV10TCL_RC_FINAL1_E_INPUT_SHIFT 24 -#define NV10TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000 -#define NV10TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000 -#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000 -#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000 -#define NV10TCL_RC_FINAL1_E_INPUT_FOG 0x03000000 -#define NV10TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000 -#define NV10TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000 -#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000 -#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000 -#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000 -#define NV10TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000 -#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 -#define NV10TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000 -#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28) -#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000 -#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_SHIFT 29 -#define NV10TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 -#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000 -#define NV10TCL_LIGHT_MODEL 0x00000294 -#define NV10TCL_LIGHT_MODEL_COLOR_CONTROL (1 << 1) -#define NV10TCL_LIGHT_MODEL_LOCAL_VIEWER (1 << 16) -#define NV10TCL_COLOR_MATERIAL_ENABLE 0x00000298 -#define NV10TCL_COLOR_MATERIAL_ENABLE_SPECULAR (1 << 0) -#define NV10TCL_COLOR_MATERIAL_ENABLE_DIFFUSE (1 << 1) -#define NV10TCL_COLOR_MATERIAL_ENABLE_AMBIENT (1 << 2) -#define NV10TCL_COLOR_MATERIAL_ENABLE_EMISSION (1 << 3) -#define NV10TCL_FOG_MODE 0x0000029c -#define NV10TCL_FOG_MODE_EXP 0x00000800 -#define NV10TCL_FOG_MODE_EXP_2 0x00000802 -#define NV10TCL_FOG_MODE_EXP2 0x00000803 -#define NV10TCL_FOG_MODE_LINEAR 0x00000804 -#define NV10TCL_FOG_MODE_LINEAR_2 0x00002601 -#define NV10TCL_FOG_COORD_DIST 0x000002a0 -#define NV10TCL_FOG_ENABLE 0x000002a4 -#define NV10TCL_FOG_COLOR 0x000002a8 -#define NV10TCL_FOG_COLOR_R_SHIFT 0 -#define NV10TCL_FOG_COLOR_R_MASK 0x000000ff -#define NV10TCL_FOG_COLOR_G_SHIFT 8 -#define NV10TCL_FOG_COLOR_G_MASK 0x0000ff00 -#define NV10TCL_FOG_COLOR_B_SHIFT 16 -#define NV10TCL_FOG_COLOR_B_MASK 0x00ff0000 -#define NV10TCL_FOG_COLOR_A_SHIFT 24 -#define NV10TCL_FOG_COLOR_A_MASK 0xff000000 -#define NV10TCL_VIEWPORT_CLIP_MODE 0x000002b4 -#define NV10TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4)) -#define NV10TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 -#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_SHIFT 0 -#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_MASK 0x000007ff -#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_LEFT_ENABLE (1 << 11) -#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_SHIFT 16 -#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_MASK 0x07ff0000 -#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_RIGHT_ENABLE (1 << 27) -#define NV10TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4)) -#define NV10TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 -#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_SHIFT 0 -#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_MASK 0x000007ff -#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_TOP_ENABLE (1 << 11) -#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_SHIFT 16 -#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_MASK 0x07ff0000 -#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_BOTTOM_ENABLE (1 << 27) -#define NV10TCL_ALPHA_FUNC_ENABLE 0x00000300 -#define NV10TCL_BLEND_FUNC_ENABLE 0x00000304 -#define NV10TCL_CULL_FACE_ENABLE 0x00000308 -#define NV10TCL_DEPTH_TEST_ENABLE 0x0000030c -#define NV10TCL_DITHER_ENABLE 0x00000310 -#define NV10TCL_LIGHTING_ENABLE 0x00000314 -#define NV10TCL_POINT_PARAMETERS_ENABLE 0x00000318 -#define NV10TCL_POINT_SMOOTH_ENABLE 0x0000031c -#define NV10TCL_LINE_SMOOTH_ENABLE 0x00000320 -#define NV10TCL_POLYGON_SMOOTH_ENABLE 0x00000324 -#define NV10TCL_VERTEX_WEIGHT_ENABLE 0x00000328 -#define NV10TCL_STENCIL_ENABLE 0x0000032c -#define NV10TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330 -#define NV10TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334 -#define NV10TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338 -#define NV10TCL_ALPHA_FUNC_FUNC 0x0000033c -#define NV10TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 -#define NV10TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 -#define NV10TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 -#define NV10TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 -#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 -#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 -#define NV10TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV10TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 -#define NV10TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 -#define NV10TCL_ALPHA_FUNC_REF 0x00000340 -#define NV10TCL_BLEND_FUNC_SRC 0x00000344 -#define NV10TCL_BLEND_FUNC_SRC_ZERO 0x00000000 -#define NV10TCL_BLEND_FUNC_SRC_ONE 0x00000001 -#define NV10TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300 -#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302 -#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV10TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304 -#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV10TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306 -#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307 -#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308 -#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001 -#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003 -#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV10TCL_BLEND_FUNC_DST 0x00000348 -#define NV10TCL_BLEND_FUNC_DST_ZERO 0x00000000 -#define NV10TCL_BLEND_FUNC_DST_ONE 0x00000001 -#define NV10TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300 -#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302 -#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV10TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304 -#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV10TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306 -#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307 -#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308 -#define NV10TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001 -#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV10TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003 -#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV10TCL_BLEND_COLOR 0x0000034c -#define NV10TCL_BLEND_COLOR_B_SHIFT 0 -#define NV10TCL_BLEND_COLOR_B_MASK 0x000000ff -#define NV10TCL_BLEND_COLOR_G_SHIFT 8 -#define NV10TCL_BLEND_COLOR_G_MASK 0x0000ff00 -#define NV10TCL_BLEND_COLOR_R_SHIFT 16 -#define NV10TCL_BLEND_COLOR_R_MASK 0x00ff0000 -#define NV10TCL_BLEND_COLOR_A_SHIFT 24 -#define NV10TCL_BLEND_COLOR_A_MASK 0xff000000 -#define NV10TCL_BLEND_EQUATION 0x00000350 -#define NV10TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 -#define NV10TCL_BLEND_EQUATION_MIN 0x00008007 -#define NV10TCL_BLEND_EQUATION_MAX 0x00008008 -#define NV10TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a -#define NV10TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b -#define NV10TCL_DEPTH_FUNC 0x00000354 -#define NV10TCL_DEPTH_FUNC_NEVER 0x00000200 -#define NV10TCL_DEPTH_FUNC_LESS 0x00000201 -#define NV10TCL_DEPTH_FUNC_EQUAL 0x00000202 -#define NV10TCL_DEPTH_FUNC_LEQUAL 0x00000203 -#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204 -#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204 -#define NV10TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 -#define NV10TCL_DEPTH_FUNC_GEQUAL 0x00000206 -#define NV10TCL_DEPTH_FUNC_ALWAYS 0x00000207 -#define NV10TCL_COLOR_MASK 0x00000358 -#define NV10TCL_COLOR_MASK_B (1 << 0) -#define NV10TCL_COLOR_MASK_G (1 << 8) -#define NV10TCL_COLOR_MASK_R (1 << 16) -#define NV10TCL_COLOR_MASK_A (1 << 24) -#define NV10TCL_DEPTH_WRITE_ENABLE 0x0000035c -#define NV10TCL_STENCIL_MASK 0x00000360 -#define NV10TCL_STENCIL_FUNC_FUNC 0x00000364 -#define NV10TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200 -#define NV10TCL_STENCIL_FUNC_FUNC_LESS 0x00000201 -#define NV10TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202 -#define NV10TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203 -#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 -#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 -#define NV10TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV10TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206 -#define NV10TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207 -#define NV10TCL_STENCIL_FUNC_REF 0x00000368 -#define NV10TCL_STENCIL_FUNC_MASK 0x0000036c -#define NV10TCL_STENCIL_OP_FAIL 0x00000370 -#define NV10TCL_STENCIL_OP_FAIL_ZERO 0x00000000 -#define NV10TCL_STENCIL_OP_FAIL_INVERT 0x0000150a -#define NV10TCL_STENCIL_OP_FAIL_KEEP 0x00001e00 -#define NV10TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01 -#define NV10TCL_STENCIL_OP_FAIL_INCR 0x00001e02 -#define NV10TCL_STENCIL_OP_FAIL_DECR 0x00001e03 -#define NV10TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507 -#define NV10TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508 -#define NV10TCL_STENCIL_OP_ZFAIL 0x00000374 -#define NV10TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000 -#define NV10TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a -#define NV10TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00 -#define NV10TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01 -#define NV10TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02 -#define NV10TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03 -#define NV10TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV10TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV10TCL_STENCIL_OP_ZPASS 0x00000378 -#define NV10TCL_STENCIL_OP_ZPASS_ZERO 0x00000000 -#define NV10TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a -#define NV10TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00 -#define NV10TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01 -#define NV10TCL_STENCIL_OP_ZPASS_INCR 0x00001e02 -#define NV10TCL_STENCIL_OP_ZPASS_DECR 0x00001e03 -#define NV10TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV10TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV10TCL_SHADE_MODEL 0x0000037c -#define NV10TCL_SHADE_MODEL_FLAT 0x00001d00 -#define NV10TCL_SHADE_MODEL_SMOOTH 0x00001d01 -#define NV10TCL_LINE_WIDTH 0x00000380 -#define NV10TCL_POLYGON_OFFSET_FACTOR 0x00000384 -#define NV10TCL_POLYGON_OFFSET_UNITS 0x00000388 -#define NV10TCL_POLYGON_MODE_FRONT 0x0000038c -#define NV10TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 -#define NV10TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 -#define NV10TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 -#define NV10TCL_POLYGON_MODE_BACK 0x00000390 -#define NV10TCL_POLYGON_MODE_BACK_POINT 0x00001b00 -#define NV10TCL_POLYGON_MODE_BACK_LINE 0x00001b01 -#define NV10TCL_POLYGON_MODE_BACK_FILL 0x00001b02 -#define NV10TCL_DEPTH_RANGE_NEAR 0x00000394 -#define NV10TCL_DEPTH_RANGE_FAR 0x00000398 -#define NV10TCL_CULL_FACE 0x0000039c -#define NV10TCL_CULL_FACE_FRONT 0x00000404 -#define NV10TCL_CULL_FACE_BACK 0x00000405 -#define NV10TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 -#define NV10TCL_FRONT_FACE 0x000003a0 -#define NV10TCL_FRONT_FACE_CW 0x00000900 -#define NV10TCL_FRONT_FACE_CCW 0x00000901 -#define NV10TCL_NORMALIZE_ENABLE 0x000003a4 -#define NV10TCL_COLOR_MATERIAL_R 0x000003a8 -#define NV10TCL_COLOR_MATERIAL_G 0x000003ac -#define NV10TCL_COLOR_MATERIAL_B 0x000003b0 -#define NV10TCL_COLOR_MATERIAL_A 0x000003b4 -#define NV10TCL_COLOR_CONTROL 0x000003b8 -#define NV10TCL_ENABLED_LIGHTS 0x000003bc -#define NV10TCL_ENABLED_LIGHTS_LIGHT0 (1 << 0) -#define NV10TCL_ENABLED_LIGHTS_LIGHT1 (1 << 2) -#define NV10TCL_ENABLED_LIGHTS_LIGHT2 (1 << 4) -#define NV10TCL_ENABLED_LIGHTS_LIGHT3 (1 << 6) -#define NV10TCL_ENABLED_LIGHTS_LIGHT4 (1 << 8) -#define NV10TCL_ENABLED_LIGHTS_LIGHT5 (1 << 10) -#define NV10TCL_ENABLED_LIGHTS_LIGHT6 (1 << 12) -#define NV10TCL_ENABLED_LIGHTS_LIGHT7 (1 << 14) -#define NV10TCL_CLIP_PLANE_ENABLE(x) (0x000003c0+((x)*4)) -#define NV10TCL_CLIP_PLANE_ENABLE__SIZE 0x00000008 -#define NV10TCL_CLIP_PLANE_ENABLE_FALSE 0x00000000 -#define NV10TCL_CLIP_PLANE_ENABLE_EYE_LINEAR 0x00002400 -#define NV10TCL_CLIP_PLANE_ENABLE_OBJECT_LINEAR 0x00002401 -#define NV10TCL_TX_MATRIX_ENABLE(x) (0x000003e0+((x)*4)) -#define NV10TCL_TX_MATRIX_ENABLE__SIZE 0x00000002 -#define NV10TCL_VIEW_MATRIX_ENABLE 0x000003e8 -#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW1 (1 << 0) -#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW0 (1 << 1) -#define NV10TCL_VIEW_MATRIX_ENABLE_PROJECTION (1 << 2) -#define NV10TCL_POINT_SIZE 0x000003ec -#define NV10TCL_MODELVIEW0_MATRIX(x) (0x00000400+((x)*4)) -#define NV10TCL_MODELVIEW0_MATRIX__SIZE 0x00000010 -#define NV10TCL_MODELVIEW1_MATRIX(x) (0x00000440+((x)*4)) -#define NV10TCL_MODELVIEW1_MATRIX__SIZE 0x00000010 -#define NV10TCL_INVERSE_MODELVIEW0_MATRIX(x) (0x00000480+((x)*4)) -#define NV10TCL_INVERSE_MODELVIEW0_MATRIX__SIZE 0x00000010 -#define NV10TCL_INVERSE_MODELVIEW1_MATRIX(x) (0x000004c0+((x)*4)) -#define NV10TCL_INVERSE_MODELVIEW1_MATRIX__SIZE 0x00000010 -#define NV10TCL_PROJECTION_MATRIX(x) (0x00000500+((x)*4)) -#define NV10TCL_PROJECTION_MATRIX__SIZE 0x00000010 -#define NV10TCL_TX0_MATRIX(x) (0x00000540+((x)*4)) -#define NV10TCL_TX0_MATRIX__SIZE 0x00000010 -#define NV10TCL_TX1_MATRIX(x) (0x00000580+((x)*4)) -#define NV10TCL_TX1_MATRIX__SIZE 0x00000010 -#define NV10TCL_CLIP_PLANE_A(x) (0x00000600+((x)*16)) -#define NV10TCL_CLIP_PLANE_A__SIZE 0x00000008 -#define NV10TCL_CLIP_PLANE_B(x) (0x00000604+((x)*16)) -#define NV10TCL_CLIP_PLANE_B__SIZE 0x00000008 -#define NV10TCL_CLIP_PLANE_C(x) (0x00000608+((x)*16)) -#define NV10TCL_CLIP_PLANE_C__SIZE 0x00000008 -#define NV10TCL_CLIP_PLANE_D(x) (0x0000060c+((x)*16)) -#define NV10TCL_CLIP_PLANE_D__SIZE 0x00000008 -#define NV10TCL_FOG_EQUATION_CONSTANT 0x00000680 -#define NV10TCL_FOG_EQUATION_LINEAR 0x00000684 -#define NV10TCL_FOG_EQUATION_QUADRATIC 0x00000688 -#define NV10TCL_FRONT_MATERIAL_SHININESS(x) (0x000006a0+((x)*4)) -#define NV10TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006 -#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000006c4 -#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000006c8 -#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000006cc -#define NV10TCL_VIEWPORT_SCALE_X 0x000006e8 -#define NV10TCL_VIEWPORT_SCALE_Y 0x000006ec -#define NV10TCL_VIEWPORT_SCALE_Z 0x000006f0 -#define NV10TCL_VIEWPORT_SCALE_W 0x000006f4 -#define NV10TCL_POINT_PARAMETER(x) (0x000006f8+((x)*4)) -#define NV10TCL_POINT_PARAMETER__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00000800+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00000804+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00000808+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000080c+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00000810+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00000814+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00000818+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000081c+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008 -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00000820+((x)*128)) -#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008 -#define NV10TCL_LIGHT_HALF_VECTOR_X(x) (0x00000828+((x)*128)) -#define NV10TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008 -#define NV10TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000082c+((x)*128)) -#define NV10TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008 -#define NV10TCL_LIGHT_HALF_VECTOR_Z(x) (0x00000830+((x)*128)) -#define NV10TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008 -#define NV10TCL_LIGHT_DIRECTION_X(x) (0x00000834+((x)*128)) -#define NV10TCL_LIGHT_DIRECTION_X__SIZE 0x00000008 -#define NV10TCL_LIGHT_DIRECTION_Y(x) (0x00000838+((x)*128)) -#define NV10TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008 -#define NV10TCL_LIGHT_DIRECTION_Z(x) (0x0000083c+((x)*128)) -#define NV10TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008 -#define NV10TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00000840+((x)*128)) -#define NV10TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008 -#define NV10TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00000844+((x)*128)) -#define NV10TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008 -#define NV10TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00000848+((x)*128)) -#define NV10TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008 -#define NV10TCL_LIGHT_SPOT_DIR_X(x) (0x0000084c+((x)*128)) -#define NV10TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008 -#define NV10TCL_LIGHT_SPOT_DIR_Y(x) (0x00000850+((x)*128)) -#define NV10TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008 -#define NV10TCL_LIGHT_SPOT_DIR_Z(x) (0x00000854+((x)*128)) -#define NV10TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008 -#define NV10TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00000858+((x)*128)) -#define NV10TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008 -#define NV10TCL_LIGHT_POSITION_X(x) (0x0000085c+((x)*128)) -#define NV10TCL_LIGHT_POSITION_X__SIZE 0x00000008 -#define NV10TCL_LIGHT_POSITION_Y(x) (0x00000860+((x)*128)) -#define NV10TCL_LIGHT_POSITION_Y__SIZE 0x00000008 -#define NV10TCL_LIGHT_POSITION_Z(x) (0x00000864+((x)*128)) -#define NV10TCL_LIGHT_POSITION_Z__SIZE 0x00000008 -#define NV10TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00000868+((x)*128)) -#define NV10TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008 -#define NV10TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000086c+((x)*128)) -#define NV10TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008 -#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00000870+((x)*128)) -#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008 -#define NV10TCL_VERTEX_POS_3F_X 0x00000c00 -#define NV10TCL_VERTEX_POS_3F_Y 0x00000c04 -#define NV10TCL_VERTEX_POS_3F_Z 0x00000c08 -#define NV10TCL_VERTEX_POS_4F_X 0x00000c18 -#define NV10TCL_VERTEX_POS_4F_Y 0x00000c1c -#define NV10TCL_VERTEX_POS_4F_Z 0x00000c20 -#define NV10TCL_VERTEX_POS_4F_W 0x00000c24 -#define NV10TCL_VERTEX_NOR_3F_X 0x00000c30 -#define NV10TCL_VERTEX_NOR_3F_Y 0x00000c34 -#define NV10TCL_VERTEX_NOR_3F_Z 0x00000c38 -#define NV10TCL_VERTEX_NOR_3I_XY 0x00000c40 -#define NV10TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 -#define NV10TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff -#define NV10TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 -#define NV10TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 -#define NV10TCL_VERTEX_NOR_3I_Z 0x00000c44 -#define NV10TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 -#define NV10TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff -#define NV10TCL_VERTEX_COL_4F_R 0x00000c50 -#define NV10TCL_VERTEX_COL_4F_G 0x00000c54 -#define NV10TCL_VERTEX_COL_4F_B 0x00000c58 -#define NV10TCL_VERTEX_COL_4F_A 0x00000c5c -#define NV10TCL_VERTEX_COL_3F_R 0x00000c60 -#define NV10TCL_VERTEX_COL_3F_G 0x00000c64 -#define NV10TCL_VERTEX_COL_3F_B 0x00000c68 -#define NV10TCL_VERTEX_COL_4I 0x00000c6c -#define NV10TCL_VERTEX_COL_4I_R_SHIFT 0 -#define NV10TCL_VERTEX_COL_4I_R_MASK 0x000000ff -#define NV10TCL_VERTEX_COL_4I_G_SHIFT 8 -#define NV10TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 -#define NV10TCL_VERTEX_COL_4I_B_SHIFT 16 -#define NV10TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 -#define NV10TCL_VERTEX_COL_4I_A_SHIFT 24 -#define NV10TCL_VERTEX_COL_4I_A_MASK 0xff000000 -#define NV10TCL_VERTEX_COL2_3F_R 0x00000c80 -#define NV10TCL_VERTEX_COL2_3F_G 0x00000c84 -#define NV10TCL_VERTEX_COL2_3F_B 0x00000c88 -#define NV10TCL_VERTEX_COL2_3I 0x00000c8c -#define NV10TCL_VERTEX_COL2_3I_R_SHIFT 0 -#define NV10TCL_VERTEX_COL2_3I_R_MASK 0x000000ff -#define NV10TCL_VERTEX_COL2_3I_G_SHIFT 8 -#define NV10TCL_VERTEX_COL2_3I_G_MASK 0x0000ff00 -#define NV10TCL_VERTEX_COL2_3I_B_SHIFT 16 -#define NV10TCL_VERTEX_COL2_3I_B_MASK 0x00ff0000 -#define NV10TCL_VERTEX_TX0_2F_S 0x00000c90 -#define NV10TCL_VERTEX_TX0_2F_T 0x00000c94 -#define NV10TCL_VERTEX_TX0_2I 0x00000c98 -#define NV10TCL_VERTEX_TX0_2I_S_SHIFT 0 -#define NV10TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff -#define NV10TCL_VERTEX_TX0_2I_T_SHIFT 16 -#define NV10TCL_VERTEX_TX0_2I_T_MASK 0xffff0000 -#define NV10TCL_VERTEX_TX0_4F_S 0x00000ca0 -#define NV10TCL_VERTEX_TX0_4F_T 0x00000ca4 -#define NV10TCL_VERTEX_TX0_4F_R 0x00000ca8 -#define NV10TCL_VERTEX_TX0_4F_Q 0x00000cac -#define NV10TCL_VERTEX_TX0_4I_ST 0x00000cb0 -#define NV10TCL_VERTEX_TX0_4I_ST_S_SHIFT 0 -#define NV10TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff -#define NV10TCL_VERTEX_TX0_4I_ST_T_SHIFT 16 -#define NV10TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000 -#define NV10TCL_VERTEX_TX0_4I_RQ 0x00000cb4 -#define NV10TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0 -#define NV10TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff -#define NV10TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16 -#define NV10TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000 -#define NV10TCL_VERTEX_TX1_2F_S 0x00000cb8 -#define NV10TCL_VERTEX_TX1_2F_T 0x00000cbc -#define NV10TCL_VERTEX_TX1_2I 0x00000cc0 -#define NV10TCL_VERTEX_TX1_2I_S_SHIFT 0 -#define NV10TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff -#define NV10TCL_VERTEX_TX1_2I_T_SHIFT 16 -#define NV10TCL_VERTEX_TX1_2I_T_MASK 0xffff0000 -#define NV10TCL_VERTEX_TX1_4F_S 0x00000cc8 -#define NV10TCL_VERTEX_TX1_4F_T 0x00000ccc -#define NV10TCL_VERTEX_TX1_4F_R 0x00000cd0 -#define NV10TCL_VERTEX_TX1_4F_Q 0x00000cd4 -#define NV10TCL_VERTEX_TX1_4I_ST 0x00000cd8 -#define NV10TCL_VERTEX_TX1_4I_ST_S_SHIFT 0 -#define NV10TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff -#define NV10TCL_VERTEX_TX1_4I_ST_T_SHIFT 16 -#define NV10TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000 -#define NV10TCL_VERTEX_TX1_4I_RQ 0x00000cdc -#define NV10TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0 -#define NV10TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff -#define NV10TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16 -#define NV10TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000 -#define NV10TCL_VERTEX_FOG_1F 0x00000ce0 -#define NV10TCL_VERTEX_WGH_1F 0x00000ce4 -#define NV10TCL_EDGEFLAG_ENABLE 0x00000cec -#define NV10TCL_VERTEX_ARRAY_VALIDATE 0x00000cf0 -#define NV10TCL_VERTEX_ARRAY_OFFSET_POS 0x00000d00 -#define NV10TCL_VERTEX_ARRAY_FORMAT_POS 0x00000d04 -#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_SHIFT 0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_MASK 0x0000000f -#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_SHIFT 4 -#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_MASK 0x000000f0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_SHIFT 8 -#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_MASK 0x0000ff00 -#define NV10TCL_VERTEX_ARRAY_OFFSET_COL 0x00000d08 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL 0x00000d0c -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_SHIFT 0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_MASK 0x0000000f -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_SHIFT 4 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_MASK 0x000000f0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_SHIFT 8 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_MASK 0x0000ff00 -#define NV10TCL_VERTEX_ARRAY_OFFSET_COL2 0x00000d10 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2 0x00000d14 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_SHIFT 0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_MASK 0x0000000f -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_SHIFT 4 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_MASK 0x000000f0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_SHIFT 8 -#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_MASK 0x0000ff00 -#define NV10TCL_VERTEX_ARRAY_OFFSET_TX0 0x00000d18 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0 0x00000d1c -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_SHIFT 0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_MASK 0x0000000f -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_SHIFT 4 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_MASK 0x000000f0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_SHIFT 8 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_MASK 0x0000ff00 -#define NV10TCL_VERTEX_ARRAY_OFFSET_TX1 0x00000d20 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1 0x00000d24 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_SHIFT 0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_MASK 0x0000000f -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_SHIFT 4 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_MASK 0x000000f0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_SHIFT 8 -#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_MASK 0x0000ff00 -#define NV10TCL_VERTEX_ARRAY_OFFSET_NOR 0x00000d28 -#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR 0x00000d2c -#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_SHIFT 0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_MASK 0x0000000f -#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_SHIFT 4 -#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_MASK 0x000000f0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_SHIFT 8 -#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_MASK 0x0000ff00 -#define NV10TCL_VERTEX_ARRAY_OFFSET_WGH 0x00000d30 -#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH 0x00000d34 -#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_SHIFT 0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_MASK 0x0000000f -#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_SHIFT 4 -#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_MASK 0x000000f0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_SHIFT 8 -#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_MASK 0x0000ff00 -#define NV10TCL_VERTEX_ARRAY_OFFSET_FOG 0x00000d38 -#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG 0x00000d3c -#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_SHIFT 0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_MASK 0x0000000f -#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_SHIFT 4 -#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_MASK 0x000000f0 -#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_SHIFT 8 -#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_MASK 0x0000ff00 -#define NV10TCL_VERTEX_BEGIN_END 0x00000dfc -#define NV10TCL_VERTEX_BEGIN_END_STOP 0x00000000 -#define NV10TCL_VERTEX_BEGIN_END_POINTS 0x00000001 -#define NV10TCL_VERTEX_BEGIN_END_LINES 0x00000002 -#define NV10TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 -#define NV10TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 -#define NV10TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005 -#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 -#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 -#define NV10TCL_VERTEX_BEGIN_END_QUADS 0x00000008 -#define NV10TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 -#define NV10TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a -#define NV10TCL_DRAW_INDEX 0x00000e00 -#define NV10TCL_DRAW_INDEX_I0_SHIFT 0 -#define NV10TCL_DRAW_INDEX_I0_MASK 0x0000ffff -#define NV10TCL_DRAW_INDEX_I1_SHIFT 24 -#define NV10TCL_DRAW_INDEX_I1_MASK 0xff000000 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END 0x000013fc -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_STOP 0x00000000 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POINTS 0x00000001 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINES 0x00000002 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_LOOP 0x00000003 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_STRIP 0x00000004 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLES 0x00000005 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_STRIP 0x00000006 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_FAN 0x00000007 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUADS 0x00000008 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUAD_STRIP 0x00000009 -#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POLYGON 0x0000000a -#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS 0x00001400 -#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_SHIFT 0 -#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_MASK 0x0000ffff -#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_SHIFT 24 -#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_MASK 0xff000000 -#define NV10TCL_VERTEX_ARRAY_DATA 0x00001800 - - -#define NV04_CONTEXT_COLOR_KEY 0x00000057 - - - -#define NV03_CONTEXT_SURFACES_2D 0x00000058 - -#define NV03_CONTEXT_SURFACES_2D_SYNCHRONIZE 0x00000100 -#define NV03_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180 -#define NV03_CONTEXT_SURFACES_2D_DMA_SOURCE 0x00000184 -#define NV03_CONTEXT_SURFACES_2D_DMA_DESTIN 0x00000188 -#define NV03_CONTEXT_SURFACES_2D_COLOR_FORMAT 0x00000300 -#define NV03_CONTEXT_SURFACES_2D_PITCH 0x00000304 -#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0 -#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff -#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16 -#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000 -#define NV03_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308 -#define NV03_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c - - -#define NV03_CONTEXT_SURFACES_3D 0x0000005a - -#define NV03_CONTEXT_SURFACES_3D_SYNCHRONIZE 0x00000100 -#define NV03_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180 -#define NV03_CONTEXT_SURFACES_3D_DMA_SURFACE 0x00000184 -#define NV03_CONTEXT_SURFACES_3D_PITCH 0x00000300 -#define NV03_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x00000304 -#define NV03_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000308 - - -#define NV04_RENDER_SOLID_LINE 0x0000005c - -#define NV04_RENDER_SOLID_LINE_SURFACE 0x00000198 - - -#define NV04_RENDER_SOLID_TRIANGLE 0x0000005d - - - -#define NV04_RENDER_SOLID_RECTANGLE 0x0000005e - -#define NV04_RENDER_SOLID_RECTANGLE_SURFACE 0x00000198 - - -#define NV04_IMAGE_BLIT 0x0000005f - -#define NV04_IMAGE_BLIT_NOP 0x00000100 -#define NV04_IMAGE_BLIT_NOTIFY 0x00000104 -#define NV04_IMAGE_BLIT_DMA_NOTIFY 0x00000180 -#define NV04_IMAGE_BLIT_COLOR_KEY 0x00000184 -#define NV04_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188 -#define NV04_IMAGE_BLIT_PATTERN 0x0000018c -#define NV04_IMAGE_BLIT_ROP 0x00000190 -#define NV04_IMAGE_BLIT_BETA4 0x00000198 -#define NV04_IMAGE_BLIT_SURFACE 0x0000019c -#define NV04_IMAGE_BLIT_OPERATION 0x000002fc -#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_AND 0x00000000 -#define NV04_IMAGE_BLIT_OPERATION_ROP_AND 0x00000001 -#define NV04_IMAGE_BLIT_OPERATION_BLEND_AND 0x00000002 -#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY 0x00000003 -#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV04_IMAGE_BLIT_OPERATION_BLEND_PREMULT 0x00000005 - - -#define NV04_INDEXED_IMAGE_FROM_CPU 0x00000060 - -#define NV04_INDEXED_IMAGE_FROM_CPU_NOP 0x00000100 -#define NV04_INDEXED_IMAGE_FROM_CPU_NOTIFY 0x00000104 -#define NV04_INDEXED_IMAGE_FROM_CPU_PATCH 0x0000010c -#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 -#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_LUT 0x00000184 -#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_FORMAT 0x000003e8 -#define NV04_INDEXED_IMAGE_FROM_CPU_INDEX_FORMAT 0x000003ec -#define NV04_INDEXED_IMAGE_FROM_CPU_LUT_OFFSET 0x000003f0 -#define NV04_INDEXED_IMAGE_FROM_CPU_POINT 0x000003f4 -#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_OUT 0x000003f8 -#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_IN 0x000003fc -#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR 0x00000400 - - -#define NV04_IMAGE_FROM_CPU 0x00000061 - -#define NV04_IMAGE_FROM_CPU_BETA4 0x00000198 -#define NV04_IMAGE_FROM_CPU_SURFACE 0x0000019c - - -#define NV10_CONTEXT_SURFACES_2D 0x00000062 - - - -#define NV05_SCALED_IMAGE_FROM_MEMORY 0x00000063 - -#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc -#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000 -#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001 -#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002 - - -#define NV01_IMAGE_SRCCOPY_AND 0x00000064 - -#define NV01_IMAGE_SRCCOPY_AND_NOTIFY 0x00000104 -#define NV01_IMAGE_SRCCOPY_AND_DMA_NOTIFY 0x00000180 -#define NV01_IMAGE_SRCCOPY_AND_IMAGE_OUTPUT 0x00000200 -#define NV01_IMAGE_SRCCOPY_AND_IMAGE_INPUT 0x00000204 - - -#define NV05_INDEXED_IMAGE_FROM_CPU 0x00000064 - -#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_KEY 0x00000188 -#define NV05_INDEXED_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x0000018c -#define NV05_INDEXED_IMAGE_FROM_CPU_PATTERN 0x00000190 -#define NV05_INDEXED_IMAGE_FROM_CPU_ROP 0x00000194 -#define NV05_INDEXED_IMAGE_FROM_CPU_BETA1 0x00000198 -#define NV05_INDEXED_IMAGE_FROM_CPU_BETA4 0x0000019c -#define NV05_INDEXED_IMAGE_FROM_CPU_SURFACE 0x000001a0 -#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000003e0 -#define NV05_INDEXED_IMAGE_FROM_CPU_OPERATION 0x000003e4 -#define NV05_INDEXED_IMAGE_FROM_CPU_INDICES 0x00000400 - - -#define NV05_IMAGE_FROM_CPU 0x00000065 - -#define NV05_IMAGE_FROM_CPU_BETA4 0x00000198 -#define NV05_IMAGE_FROM_CPU_SURFACE 0x0000019c - - -#define NV05_STRETCHED_IMAGE_FROM_CPU 0x00000066 - -#define NV05_STRETCHED_IMAGE_FROM_CPU_BETA4 0x00000194 -#define NV05_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000198 -#define NV05_STRETCHED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8 - - -#define NV04_IMAGE_BLEND_PREMULT 0x00000067 - -#define NV04_IMAGE_BLEND_PREMULT_NOP 0x00000100 -#define NV04_IMAGE_BLEND_PREMULT_NOTIFY 0x00000104 -#define NV04_IMAGE_BLEND_PREMULT_DMA_NOTIFY 0x00000180 -#define NV04_IMAGE_BLEND_PREMULT_IMAGE_OUTPUT 0x00000200 -#define NV04_IMAGE_BLEND_PREMULT_BETA_INPUT 0x00000204 -#define NV04_IMAGE_BLEND_PREMULT_IMAGE_INPUT 0x00000208 - - -#define NV03_CHANNEL_PIO 0x0000006a - - - -#define NV03_CHANNEL_DMA 0x0000006b - - - -#define NV04_BETA_SOLID 0x00000072 - -#define NV04_BETA_SOLID_NOP 0x00000100 -#define NV04_BETA_SOLID_NOTIFY 0x00000104 -#define NV04_BETA_SOLID_DMA_NOTIFY 0x00000180 -#define NV04_BETA_SOLID_BETA_OUTPUT 0x00000200 -#define NV04_BETA_SOLID_BETA_FACTOR 0x00000300 - - -#define NV04_STRETCHED_IMAGE_FROM_CPU 0x00000076 - - - -#define NV04_SCALED_IMAGE_FROM_MEMORY 0x00000077 - -#define NV04_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100 -#define NV04_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104 -#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180 -#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184 -#define NV04_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188 -#define NV04_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c -#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190 -#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA4 0x00000194 -#define NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000198 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008 -#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005 -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308 -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0 -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16 -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0 -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16 -#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT 0x00000310 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_SHIFT 0 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_MASK 0x0000ffff -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT 16 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_MASK 0xffff0000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE 0x00000314 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_SHIFT 0 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_MASK 0x0000ffff -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT 16 -#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_MASK 0xffff0000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_DU_DX 0x00000318 -#define NV04_SCALED_IMAGE_FROM_MEMORY_DV_DY 0x0000031c -#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE 0x00000400 -#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_SHIFT 0 -#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_MASK 0x0000ffff -#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT 16 -#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_MASK 0xffff0000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT 0x00000404 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_SHIFT 0 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_MASK 0x0000ffff -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_SHIFT 16 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_MASK 0x00ff0000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER 0x00010000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CORNER 0x00020000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_SHIFT 24 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_MASK 0xff000000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE 0x00000000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_BILINEAR 0x01000000 -#define NV04_SCALED_IMAGE_FROM_MEMORY_ADDRESS 0x00000408 -#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT 0x0000040c -#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_SHIFT 0 -#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_MASK 0x0000ffff -#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_SHIFT 16 -#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_MASK 0xffff0000 - - -#define NV10_TEXTURE_FROM_CPU 0x0000007b - -#define NV10_TEXTURE_FROM_CPU_NOP 0x00000100 -#define NV10_TEXTURE_FROM_CPU_NOTIFY 0x00000104 -#define NV10_TEXTURE_FROM_CPU_WAIT_FOR_IDLE 0x00000108 -#define NV10_TEXTURE_FROM_CPU_PM_TRIGGER 0x00000140 -#define NV10_TEXTURE_FROM_CPU_DMA_NOTIFY 0x00000180 -#define NV10_TEXTURE_FROM_CPU_SURFACE 0x00000184 -#define NV10_TEXTURE_FROM_CPU_COLOR_FORMAT 0x00000300 -#define NV10_TEXTURE_FROM_CPU_POINT 0x00000304 -#define NV10_TEXTURE_FROM_CPU_POINT_X_SHIFT 0 -#define NV10_TEXTURE_FROM_CPU_POINT_X_MASK 0x0000ffff -#define NV10_TEXTURE_FROM_CPU_POINT_Y_SHIFT 16 -#define NV10_TEXTURE_FROM_CPU_POINT_Y_MASK 0xffff0000 -#define NV10_TEXTURE_FROM_CPU_SIZE 0x00000308 -#define NV10_TEXTURE_FROM_CPU_SIZE_W_SHIFT 0 -#define NV10_TEXTURE_FROM_CPU_SIZE_W_MASK 0x0000ffff -#define NV10_TEXTURE_FROM_CPU_SIZE_H_SHIFT 16 -#define NV10_TEXTURE_FROM_CPU_SIZE_H_MASK 0xffff0000 -#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL 0x0000030c -#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_SHIFT 0 -#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_MASK 0x0000ffff -#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_SHIFT 16 -#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_MASK 0xffff0000 -#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL 0x00000310 -#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_SHIFT 0 -#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_MASK 0x0000ffff -#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_SHIFT 16 -#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_MASK 0xffff0000 -#define NV10_TEXTURE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) -#define NV10_TEXTURE_FROM_CPU_COLOR__SIZE 0x00000700 - - -#define NV10_VIDEO_DISPLAY 0x0000007c - - - -#define NV10_DVD_SUBPICTURE 0x00000088 - - - -#define NV10_SCALED_IMAGE_FROM_MEMORY 0x00000089 - -#define NV10_SCALED_IMAGE_FROM_MEMORY_WAIT_FOR_IDLE 0x00000108 - - -#define NV10_IMAGE_FROM_CPU 0x0000008a - -#define NV10_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8 - - -#define NV10_CONTEXT_SURFACES_3D 0x00000093 - - - -#define NV10_DX5_TEXTURE_TRIANGLE 0x00000094 - - - -#define NV10_DX6_MULTI_TEXTURE_TRIANGLE 0x00000095 - - - -#define NV11TCL 0x00000096 - -#define NV11TCL_COLOR_LOGIC_OP_ENABLE 0x00000d40 -#define NV11TCL_COLOR_LOGIC_OP_OP 0x00000d44 -#define NV11TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 -#define NV11TCL_COLOR_LOGIC_OP_OP_AND 0x00001501 -#define NV11TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 -#define NV11TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503 -#define NV11TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 -#define NV11TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505 -#define NV11TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506 -#define NV11TCL_COLOR_LOGIC_OP_OP_OR 0x00001507 -#define NV11TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508 -#define NV11TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 -#define NV11TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a -#define NV11TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b -#define NV11TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c -#define NV11TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d -#define NV11TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e -#define NV11TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f - - -#define NV20TCL 0x00000097 - -#define NV20TCL_NOP 0x00000100 -#define NV20TCL_NOTIFY 0x00000104 -#define NV20TCL_DMA_NOTIFY 0x00000180 -#define NV20TCL_DMA_IN_MEMORY0 0x00000184 -#define NV20TCL_DMA_IN_MEMORY1 0x00000188 -#define NV20TCL_DMA_IN_MEMORY2 0x00000194 -#define NV20TCL_DMA_IN_MEMORY3 0x00000198 -#define NV20TCL_DMA_IN_MEMORY6 0x000001a4 -#define NV20TCL_DMA_IN_MEMORY7 0x000001a8 -#define NV20TCL_VIEWPORT_HORIZ 0x00000200 -#define NV20TCL_VIEWPORT_VERT 0x00000204 -#define NV20TCL_BUFFER_FORMAT 0x00000208 -#define NV20TCL_BUFFER_PITCH 0x0000020c -#define NV20TCL_COLOR_OFFSET 0x00000210 -#define NV20TCL_ZETA_OFFSET 0x00000214 -#define NV20TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4)) -#define NV20TCL_RC_IN_ALPHA__SIZE 0x00000008 -#define NV20TCL_RC_FINAL0 0x00000288 -#define NV20TCL_RC_FINAL1 0x0000028c -#define NV20TCL_LIGHT_CONTROL 0x00000294 -#define NV20TCL_FOG_MODE 0x0000029c -#define NV20TCL_FOG_COORD_DIST 0x000002a0 -#define NV20TCL_FOG_ENABLE 0x000002a4 -#define NV20TCL_FOG_COLOR 0x000002a8 -#define NV20TCL_VIEWPORT_CLIP_MODE 0x000002b4 -#define NV20TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4)) -#define NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 -#define NV20TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4)) -#define NV20TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 -#define NV20TCL_ALPHA_FUNC_ENABLE 0x00000300 -#define NV20TCL_BLEND_FUNC_ENABLE 0x00000304 -#define NV20TCL_CULL_FACE_ENABLE 0x00000308 -#define NV20TCL_DEPTH_TEST_ENABLE 0x0000030c -#define NV20TCL_DITHER_ENABLE 0x00000310 -#define NV20TCL_LIGHTING_ENABLE 0x00000314 -#define NV20TCL_POINT_PARAMETERS_ENABLE 0x00000318 -#define NV20TCL_LINE_SMOOTH_ENABLE 0x00000320 -#define NV20TCL_POLYGON_SMOOTH_ENABLE 0x00000324 -#define NV20TCL_STENCIL_ENABLE 0x0000032c -#define NV20TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330 -#define NV20TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334 -#define NV20TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338 -#define NV20TCL_ALPHA_FUNC_FUNC 0x0000033c -#define NV20TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 -#define NV20TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 -#define NV20TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 -#define NV20TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 -#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 -#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 -#define NV20TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV20TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 -#define NV20TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 -#define NV20TCL_ALPHA_FUNC_REF 0x00000340 -#define NV20TCL_BLEND_FUNC_SRC 0x00000344 -#define NV20TCL_BLEND_FUNC_SRC_ZERO 0x00000000 -#define NV20TCL_BLEND_FUNC_SRC_ONE 0x00000001 -#define NV20TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300 -#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302 -#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV20TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304 -#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV20TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306 -#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307 -#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308 -#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001 -#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003 -#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV20TCL_BLEND_FUNC_DST 0x00000348 -#define NV20TCL_BLEND_FUNC_DST_ZERO 0x00000000 -#define NV20TCL_BLEND_FUNC_DST_ONE 0x00000001 -#define NV20TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300 -#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302 -#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV20TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304 -#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV20TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306 -#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307 -#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308 -#define NV20TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001 -#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV20TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003 -#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV20TCL_BLEND_COLOR 0x0000034c -#define NV20TCL_BLEND_EQUATION 0x00000350 -#define NV20TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 -#define NV20TCL_BLEND_EQUATION_MIN 0x00008007 -#define NV20TCL_BLEND_EQUATION_MAX 0x00008008 -#define NV20TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a -#define NV20TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b -#define NV20TCL_DEPTH_FUNC 0x00000354 -#define NV20TCL_DEPTH_FUNC_NEVER 0x00000200 -#define NV20TCL_DEPTH_FUNC_LESS 0x00000201 -#define NV20TCL_DEPTH_FUNC_EQUAL 0x00000202 -#define NV20TCL_DEPTH_FUNC_LEQUAL 0x00000203 -#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204 -#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204 -#define NV20TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 -#define NV20TCL_DEPTH_FUNC_GEQUAL 0x00000206 -#define NV20TCL_DEPTH_FUNC_ALWAYS 0x00000207 -#define NV20TCL_COLOR_MASK 0x00000358 -#define NV20TCL_DEPTH_WRITE_ENABLE 0x0000035c -#define NV20TCL_STENCIL_MASK 0x00000360 -#define NV20TCL_STENCIL_FUNC_FUNC 0x00000364 -#define NV20TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200 -#define NV20TCL_STENCIL_FUNC_FUNC_LESS 0x00000201 -#define NV20TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202 -#define NV20TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203 -#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 -#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 -#define NV20TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV20TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206 -#define NV20TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207 -#define NV20TCL_STENCIL_FUNC_REF 0x00000368 -#define NV20TCL_STENCIL_FUNC_MASK 0x0000036c -#define NV20TCL_STENCIL_OP_FAIL 0x00000370 -#define NV20TCL_STENCIL_OP_FAIL_ZERO 0x00000000 -#define NV20TCL_STENCIL_OP_FAIL_INVERT 0x0000150a -#define NV20TCL_STENCIL_OP_FAIL_KEEP 0x00001e00 -#define NV20TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01 -#define NV20TCL_STENCIL_OP_FAIL_INCR 0x00001e02 -#define NV20TCL_STENCIL_OP_FAIL_DECR 0x00001e03 -#define NV20TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507 -#define NV20TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508 -#define NV20TCL_STENCIL_OP_ZFAIL 0x00000374 -#define NV20TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000 -#define NV20TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a -#define NV20TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00 -#define NV20TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01 -#define NV20TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02 -#define NV20TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03 -#define NV20TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV20TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV20TCL_STENCIL_OP_ZPASS 0x00000378 -#define NV20TCL_STENCIL_OP_ZPASS_ZERO 0x00000000 -#define NV20TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a -#define NV20TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00 -#define NV20TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01 -#define NV20TCL_STENCIL_OP_ZPASS_INCR 0x00001e02 -#define NV20TCL_STENCIL_OP_ZPASS_DECR 0x00001e03 -#define NV20TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV20TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV20TCL_SHADE_MODEL 0x0000037c -#define NV20TCL_SHADE_MODEL_FLAT 0x00001d00 -#define NV20TCL_SHADE_MODEL_SMOOTH 0x00001d01 -#define NV20TCL_LINE_WIDTH 0x00000380 -#define NV20TCL_POLYGON_OFFSET_FACTOR 0x00000384 -#define NV20TCL_POLYGON_OFFSET_UNITS 0x00000388 -#define NV20TCL_POLYGON_MODE_FRONT 0x0000038c -#define NV20TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 -#define NV20TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 -#define NV20TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 -#define NV20TCL_POLYGON_MODE_BACK 0x00000390 -#define NV20TCL_POLYGON_MODE_BACK_POINT 0x00001b00 -#define NV20TCL_POLYGON_MODE_BACK_LINE 0x00001b01 -#define NV20TCL_POLYGON_MODE_BACK_FILL 0x00001b02 -#define NV20TCL_DEPTH_RANGE_NEAR 0x00000394 -#define NV20TCL_DEPTH_RANGE_FAR 0x00000398 -#define NV20TCL_CULL_FACE 0x0000039c -#define NV20TCL_CULL_FACE_FRONT 0x00000404 -#define NV20TCL_CULL_FACE_BACK 0x00000405 -#define NV20TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 -#define NV20TCL_FRONT_FACE 0x000003a0 -#define NV20TCL_FRONT_FACE_CW 0x00000900 -#define NV20TCL_FRONT_FACE_CCW 0x00000901 -#define NV20TCL_NORMALIZE_ENABLE 0x000003a4 -#define NV20TCL_SEPARATE_SPECULAR_ENABLE 0x000003b8 -#define NV20TCL_ENABLED_LIGHTS 0x000003bc -#define NV20TCL_CLIP_PLANE_ENABLE(x) (0x000003c0+((x)*4)) -#define NV20TCL_CLIP_PLANE_ENABLE__SIZE 0x00000010 -#define NV20TCL_TX_MATRIX_ENABLE(x) (0x00000420+((x)*4)) -#define NV20TCL_TX_MATRIX_ENABLE__SIZE 0x00000004 -#define NV20TCL_POINT_SIZE 0x0000043c -#define NV20TCL_MODELVIEW_MATRIX(x) (0x00000480+((x)*4)) -#define NV20TCL_MODELVIEW_MATRIX__SIZE 0x00000010 -#define NV20TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4)) -#define NV20TCL_PROJECTION_MATRIX__SIZE 0x00000010 -#define NV20TCL_TX0_MATRIX(x) (0x000006c0+((x)*4)) -#define NV20TCL_TX0_MATRIX__SIZE 0x00000010 -#define NV20TCL_TX1_MATRIX(x) (0x00000700+((x)*4)) -#define NV20TCL_TX1_MATRIX__SIZE 0x00000010 -#define NV20TCL_TX2_MATRIX(x) (0x00000740+((x)*4)) -#define NV20TCL_TX2_MATRIX__SIZE 0x00000010 -#define NV20TCL_TX3_MATRIX(x) (0x00000780+((x)*4)) -#define NV20TCL_TX3_MATRIX__SIZE 0x00000010 -#define NV20TCL_FOG_EQUATION_CONSTANT 0x000009c0 -#define NV20TCL_FOG_EQUATION_LINEAR 0x000009c4 -#define NV20TCL_FOG_EQUATION_QUADRATIC 0x000009c8 -#define NV20TCL_VIEWPORT_SCALE0_X 0x00000a20 -#define NV20TCL_VIEWPORT_SCALE0_Y 0x00000a24 -#define NV20TCL_VIEWPORT_SCALE0_Z 0x00000a28 -#define NV20TCL_VIEWPORT_SCALE0_W 0x00000a2c -#define NV20TCL_POINT_PARAMETER(x) (0x00000a30+((x)*4)) -#define NV20TCL_POINT_PARAMETER__SIZE 0x00000007 -#define NV20TCL_RC_CONSTANT_COLOR0(x) (0x00000a60+((x)*4)) -#define NV20TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008 -#define NV20TCL_RC_CONSTANT_COLOR1(x) (0x00000a80+((x)*4)) -#define NV20TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008 -#define NV20TCL_RC_OUT_ALPHA(x) (0x00000aa0+((x)*4)) -#define NV20TCL_RC_OUT_ALPHA__SIZE 0x00000008 -#define NV20TCL_RC_IN_RGB(x) (0x00000ac0+((x)*4)) -#define NV20TCL_RC_IN_RGB__SIZE 0x00000008 -#define NV20TCL_VIEWPORT_SCALE1_X 0x00000af0 -#define NV20TCL_VIEWPORT_SCALE1_Y 0x00000af4 -#define NV20TCL_VIEWPORT_SCALE1_Z 0x00000af8 -#define NV20TCL_VIEWPORT_SCALE1_W 0x00000afc -#define NV20TCL_VP_UPLOAD_INST(x) (0x00000b00+((x)*4)) -#define NV20TCL_VP_UPLOAD_INST__SIZE 0x00000004 -#define NV20TCL_VP_UPLOAD_CONST(x) (0x00000b80+((x)*4)) -#define NV20TCL_VP_UPLOAD_CONST__SIZE 0x00000004 -#define NV20TCL_POLYGON_STIPPLE_ENABLE 0x0000147c -#define NV20TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) -#define NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 -#define NV20TCL_VERTEX_POS_3F_X 0x00001500 -#define NV20TCL_VERTEX_POS_3F_Y 0x00001504 -#define NV20TCL_VERTEX_POS_3F_Z 0x00001508 -#define NV20TCL_VERTEX_POS_4F_X 0x00001518 -#define NV20TCL_VERTEX_POS_4F_Y 0x0000151c -#define NV20TCL_VERTEX_POS_4F_Z 0x00001520 -#define NV20TCL_VERTEX_POS_3I_XY 0x00001528 -#define NV20TCL_VERTEX_POS_3I_XY_X_SHIFT 0 -#define NV20TCL_VERTEX_POS_3I_XY_X_MASK 0x0000ffff -#define NV20TCL_VERTEX_POS_3I_XY_Y_SHIFT 16 -#define NV20TCL_VERTEX_POS_3I_XY_Y_MASK 0xffff0000 -#define NV20TCL_VERTEX_POS_3I_Z 0x0000152c -#define NV20TCL_VERTEX_POS_3I_Z_Z_SHIFT 0 -#define NV20TCL_VERTEX_POS_3I_Z_Z_MASK 0x0000ffff -#define NV20TCL_VERTEX_NOR_3F_X 0x00001530 -#define NV20TCL_VERTEX_NOR_3F_Y 0x00001534 -#define NV20TCL_VERTEX_NOR_3F_Z 0x00001538 -#define NV20TCL_VERTEX_NOR_3I_XY 0x00001540 -#define NV20TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 -#define NV20TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff -#define NV20TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 -#define NV20TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 -#define NV20TCL_VERTEX_NOR_3I_Z 0x00001544 -#define NV20TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 -#define NV20TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff -#define NV20TCL_VERTEX_COL_4F_X 0x00001550 -#define NV20TCL_VERTEX_COL_4F_Y 0x00001554 -#define NV20TCL_VERTEX_COL_4F_Z 0x00001558 -#define NV20TCL_VERTEX_COL_4F_W 0x0000155c -#define NV20TCL_VERTEX_COL_3F_X 0x00001560 -#define NV20TCL_VERTEX_COL_3F_Y 0x00001564 -#define NV20TCL_VERTEX_COL_3F_Z 0x00001568 -#define NV20TCL_VERTEX_COL_4I 0x0000156c -#define NV20TCL_VERTEX_COL_4I_R_SHIFT 0 -#define NV20TCL_VERTEX_COL_4I_R_MASK 0x000000ff -#define NV20TCL_VERTEX_COL_4I_G_SHIFT 8 -#define NV20TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 -#define NV20TCL_VERTEX_COL_4I_B_SHIFT 16 -#define NV20TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 -#define NV20TCL_VERTEX_COL_4I_A_SHIFT 24 -#define NV20TCL_VERTEX_COL_4I_A_MASK 0xff000000 -#define NV20TCL_VERTEX_COL2_3F_X 0x00001580 -#define NV20TCL_VERTEX_COL2_3F_Y 0x00001584 -#define NV20TCL_VERTEX_COL2_3F_Z 0x00001588 -#define NV20TCL_VERTEX_COL2_4I 0x0000158c -#define NV20TCL_VERTEX_COL2_4I_R_SHIFT 0 -#define NV20TCL_VERTEX_COL2_4I_R_MASK 0x000000ff -#define NV20TCL_VERTEX_COL2_4I_G_SHIFT 8 -#define NV20TCL_VERTEX_COL2_4I_G_MASK 0x0000ff00 -#define NV20TCL_VERTEX_COL2_4I_B_SHIFT 16 -#define NV20TCL_VERTEX_COL2_4I_B_MASK 0x00ff0000 -#define NV20TCL_VERTEX_COL2_4I_A_SHIFT 24 -#define NV20TCL_VERTEX_COL2_4I_A_MASK 0xff000000 -#define NV20TCL_VERTEX_TX0_2F_S 0x00001590 -#define NV20TCL_VERTEX_TX0_2F_T 0x00001594 -#define NV20TCL_VERTEX_TX0_2I 0x00001598 -#define NV20TCL_VERTEX_TX0_2I_S_SHIFT 0 -#define NV20TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX0_2I_T_SHIFT 16 -#define NV20TCL_VERTEX_TX0_2I_T_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX0_4F_S 0x000015a0 -#define NV20TCL_VERTEX_TX0_4F_T 0x000015a4 -#define NV20TCL_VERTEX_TX0_4F_R 0x000015a8 -#define NV20TCL_VERTEX_TX0_4F_Q 0x000015ac -#define NV20TCL_VERTEX_TX0_4I_ST 0x000015b0 -#define NV20TCL_VERTEX_TX0_4I_ST_S_SHIFT 0 -#define NV20TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX0_4I_ST_T_SHIFT 16 -#define NV20TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX0_4I_RQ 0x000015b4 -#define NV20TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0 -#define NV20TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16 -#define NV20TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX1_2F_S 0x000015b8 -#define NV20TCL_VERTEX_TX1_2F_T 0x000015bc -#define NV20TCL_VERTEX_TX1_2I 0x000015c0 -#define NV20TCL_VERTEX_TX1_2I_S_SHIFT 0 -#define NV20TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX1_2I_T_SHIFT 16 -#define NV20TCL_VERTEX_TX1_2I_T_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX1_4F_S 0x000015c8 -#define NV20TCL_VERTEX_TX1_4F_T 0x000015cc -#define NV20TCL_VERTEX_TX1_4F_R 0x000015d0 -#define NV20TCL_VERTEX_TX1_4F_Q 0x000015d4 -#define NV20TCL_VERTEX_TX1_4I_ST 0x000015d8 -#define NV20TCL_VERTEX_TX1_4I_ST_S_SHIFT 0 -#define NV20TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX1_4I_ST_T_SHIFT 16 -#define NV20TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX1_4I_RQ 0x000015dc -#define NV20TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0 -#define NV20TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16 -#define NV20TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX2_2F_S 0x000015e0 -#define NV20TCL_VERTEX_TX2_2F_T 0x000015e4 -#define NV20TCL_VERTEX_TX2_2I 0x000015e8 -#define NV20TCL_VERTEX_TX2_2I_S_SHIFT 0 -#define NV20TCL_VERTEX_TX2_2I_S_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX2_2I_T_SHIFT 16 -#define NV20TCL_VERTEX_TX2_2I_T_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX2_4F_S 0x000015f0 -#define NV20TCL_VERTEX_TX2_4F_T 0x000015f4 -#define NV20TCL_VERTEX_TX2_4F_R 0x000015f8 -#define NV20TCL_VERTEX_TX2_4F_Q 0x000015fc -#define NV20TCL_VERTEX_TX2_4I_ST 0x00001600 -#define NV20TCL_VERTEX_TX2_4I_ST_S_SHIFT 0 -#define NV20TCL_VERTEX_TX2_4I_ST_S_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX2_4I_ST_T_SHIFT 16 -#define NV20TCL_VERTEX_TX2_4I_ST_T_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX2_4I_RQ 0x00001604 -#define NV20TCL_VERTEX_TX2_4I_RQ_R_SHIFT 0 -#define NV20TCL_VERTEX_TX2_4I_RQ_R_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX2_4I_RQ_Q_SHIFT 16 -#define NV20TCL_VERTEX_TX2_4I_RQ_Q_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX3_2F_S 0x00001608 -#define NV20TCL_VERTEX_TX3_2F_T 0x0000160c -#define NV20TCL_VERTEX_TX3_2I 0x00001610 -#define NV20TCL_VERTEX_TX3_2I_S_SHIFT 0 -#define NV20TCL_VERTEX_TX3_2I_S_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX3_2I_T_SHIFT 16 -#define NV20TCL_VERTEX_TX3_2I_T_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX3_4F_S 0x00001620 -#define NV20TCL_VERTEX_TX3_4F_T 0x00001624 -#define NV20TCL_VERTEX_TX3_4F_R 0x00001628 -#define NV20TCL_VERTEX_TX3_4F_Q 0x0000162c -#define NV20TCL_VERTEX_TX3_4I_ST 0x00001630 -#define NV20TCL_VERTEX_TX3_4I_ST_S_SHIFT 0 -#define NV20TCL_VERTEX_TX3_4I_ST_S_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX3_4I_ST_T_SHIFT 16 -#define NV20TCL_VERTEX_TX3_4I_ST_T_MASK 0xffff0000 -#define NV20TCL_VERTEX_TX3_4I_RQ 0x00001634 -#define NV20TCL_VERTEX_TX3_4I_RQ_R_SHIFT 0 -#define NV20TCL_VERTEX_TX3_4I_RQ_R_MASK 0x0000ffff -#define NV20TCL_VERTEX_TX3_4I_RQ_Q_SHIFT 16 -#define NV20TCL_VERTEX_TX3_4I_RQ_Q_MASK 0xffff0000 -#define NV20TCL_EDGEFLAG_ENABLE 0x000016bc -#define NV20TCL_VERTEX_ATTR_OFFSET(x) (0x00001720+((x)*4)) -#define NV20TCL_VERTEX_ATTR_OFFSET__SIZE 0x00000010 -#define NV20TCL_VERTEX_ARRAY_FORMAT(x) (0x00001760+((x)*4)) -#define NV20TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 -#define NV20TCL_VERTEX_ARRAY_FORMAT_TYPE_SHIFT 0 -#define NV20TCL_VERTEX_ARRAY_FORMAT_TYPE_MASK 0x0000000f -#define NV20TCL_VERTEX_ARRAY_FORMAT_FIELDS_SHIFT 4 -#define NV20TCL_VERTEX_ARRAY_FORMAT_FIELDS_MASK 0x000000f0 -#define NV20TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 8 -#define NV20TCL_VERTEX_ARRAY_FORMAT_STRIDE_MASK 0x0000ff00 -#define NV20TCL_COLOR_LOGIC_OP_ENABLE 0x000017bc -#define NV20TCL_COLOR_LOGIC_OP_OP 0x000017c0 -#define NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE 0x000017c4 -#define NV20TCL_TX_SHADER_CULL_MODE 0x000017f8 -#define NV20TCL_VERTEX_BEGIN_END 0x000017fc -#define NV20TCL_VERTEX_BUFFER_DRAW_ARRAYS 0x00001810 -#define NV20TCL_VERTEX_ARRAY_DATA 0x00001818 -#define NV20TCL_TX_OFFSET(x) (0x00001b00+((x)*64)) -#define NV20TCL_TX_OFFSET__SIZE 0x00000004 -#define NV20TCL_TX_FORMAT(x) (0x00001b04+((x)*64)) -#define NV20TCL_TX_FORMAT__SIZE 0x00000004 -#define NV20TCL_TX_WRAP(x) (0x00001b08+((x)*64)) -#define NV20TCL_TX_WRAP__SIZE 0x00000004 -#define NV20TCL_TX_ENABLE(x) (0x00001b0c+((x)*64)) -#define NV20TCL_TX_ENABLE__SIZE 0x00000004 -#define NV20TCL_TX_FILTER(x) (0x00001b14+((x)*64)) -#define NV20TCL_TX_FILTER__SIZE 0x00000004 -#define NV20TCL_TX_BORDER_COLOR(x) (0x00001b24+((x)*64)) -#define NV20TCL_TX_BORDER_COLOR__SIZE 0x00000004 -#define NV20TCL_SCISSOR_HORIZ 0x00001c30 -#define NV20TCL_SCISSOR_VERT 0x00001c50 -#define NV20TCL_MULTISAMPLE_CONTROL 0x00001d7c -#define NV20TCL_CLEAR_VALUE 0x00001d90 -#define NV20TCL_CLEAR_BUFFERS 0x00001d94 -#define NV20TCL_RC_COLOR0 0x00001e20 -#define NV20TCL_RC_COLOR1 0x00001e24 -#define NV20TCL_RC_OUT_RGB(x) (0x00001e40+((x)*4)) -#define NV20TCL_RC_OUT_RGB__SIZE 0x00000008 -#define NV20TCL_RC_ENABLE 0x00001e60 -#define NV20TCL_TX_SHADER_OP 0x00001e70 -#define NV20TCL_VP_UPLOAD_CONST_ID 0x00001ea4 - - -#define NV17TCL 0x00000099 - -#define NV17TCL_DMA_IN_MEMORY4 0x000001ac -#define NV17TCL_DMA_IN_MEMORY5 0x000001b0 -#define NV17TCL_COLOR_MASK_ENABLE 0x000002bc -#define NV17TCL_LMA_DEPTH_BUFFER_PITCH 0x00000d5c -#define NV17TCL_LMA_DEPTH_BUFFER_OFFSET 0x00000d60 -#define NV17TCL_LMA_DEPTH_FILL_VALUE 0x00000d68 -#define NV17TCL_LMA_DEPTH_BUFFER_CLEAR 0x00000d6c -#define NV17TCL_LMA_DEPTH_ENABLE 0x00001658 - - -#define NV20_SWIZZLED_SURFACE 0x0000009e - - - -#define NV12_IMAGE_BLIT 0x0000009f - - - -#define NV30_CONTEXT_SURFACES_2D 0x00000362 - - - -#define NV30_STRETCHED_IMAGE_FROM_CPU 0x00000366 - - - -#define NV30_TEXTURE_FROM_CPU 0x0000037b - - - -#define NV30_SCALED_IMAGE_FROM_MEMORY 0x00000389 - - - -#define NV30_IMAGE_FROM_CPU 0x0000038a - - - -#define NV30TCL 0x00000397 - - - -#define NV30_SWIZZLED_SURFACE 0x0000039e - - - -#define NV35TCL 0x00000497 - - - -#define NV25TCL 0x00000597 - -#define NV25TCL_DMA_IN_MEMORY4 0x0000019c -#define NV25TCL_DMA_IN_MEMORY5 0x000001a0 -#define NV25TCL_DMA_IN_MEMORY8 0x000001ac -#define NV25TCL_DMA_IN_MEMORY9 0x000001b0 - - -#define NV34TCL 0x00000697 - -#define NV34TCL_NOP 0x00000100 -#define NV34TCL_NOTIFY 0x00000104 -#define NV34TCL_DMA_NOTIFY 0x00000180 -#define NV34TCL_DMA_TEXTURE0 0x00000184 -#define NV34TCL_DMA_TEXTURE1 0x00000188 -#define NV34TCL_DMA_COLOR1 0x0000018c -#define NV34TCL_DMA_COLOR0 0x00000194 -#define NV34TCL_DMA_ZETA 0x00000198 -#define NV34TCL_DMA_VTXBUF0 0x0000019c -#define NV34TCL_DMA_VTXBUF1 0x000001a0 -#define NV34TCL_DMA_FENCE 0x000001a4 -#define NV34TCL_DMA_QUERY 0x000001a8 -#define NV34TCL_DMA_IN_MEMORY7 0x000001ac -#define NV34TCL_DMA_IN_MEMORY8 0x000001b0 -#define NV34TCL_RT_HORIZ 0x00000200 -#define NV34TCL_RT_HORIZ_X_SHIFT 0 -#define NV34TCL_RT_HORIZ_X_MASK 0x0000ffff -#define NV34TCL_RT_HORIZ_W_SHIFT 16 -#define NV34TCL_RT_HORIZ_W_MASK 0xffff0000 -#define NV34TCL_RT_VERT 0x00000204 -#define NV34TCL_RT_VERT_Y_SHIFT 0 -#define NV34TCL_RT_VERT_Y_MASK 0x0000ffff -#define NV34TCL_RT_VERT_H_SHIFT 16 -#define NV34TCL_RT_VERT_H_MASK 0xffff0000 -#define NV34TCL_RT_FORMAT 0x00000208 -#define NV34TCL_RT_FORMAT_TYPE_SHIFT 8 -#define NV34TCL_RT_FORMAT_TYPE_MASK 0x00000f00 -#define NV34TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 -#define NV34TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 -#define NV34TCL_RT_FORMAT_ZETA_SHIFT 5 -#define NV34TCL_RT_FORMAT_ZETA_MASK 0x000000e0 -#define NV34TCL_RT_FORMAT_ZETA_Z16 0x00000020 -#define NV34TCL_RT_FORMAT_ZETA_Z24S8 0x00000040 -#define NV34TCL_RT_FORMAT_COLOR_SHIFT 0 -#define NV34TCL_RT_FORMAT_COLOR_MASK 0x0000001f -#define NV34TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 -#define NV34TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 -#define NV34TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 -#define NV34TCL_RT_FORMAT_COLOR_B8 0x00000009 -#define NV34TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f -#define NV34TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 -#define NV34TCL_COLOR0_PITCH 0x0000020c -#define NV34TCL_COLOR0_PITCH_COLOR0_SHIFT 0 -#define NV34TCL_COLOR0_PITCH_COLOR0_MASK 0x0000ffff -#define NV34TCL_COLOR0_PITCH_ZETA_SHIFT 16 -#define NV34TCL_COLOR0_PITCH_ZETA_MASK 0xffff0000 -#define NV34TCL_COLOR0_OFFSET 0x00000210 -#define NV34TCL_ZETA_OFFSET 0x00000214 -#define NV34TCL_COLOR1_OFFSET 0x00000218 -#define NV34TCL_COLOR1_PITCH 0x0000021c -#define NV34TCL_RT_ENABLE 0x00000220 -#define NV34TCL_RT_ENABLE_MRT (1 << 4) -#define NV34TCL_RT_ENABLE_COLOR3 (1 << 3) -#define NV34TCL_RT_ENABLE_COLOR2 (1 << 2) -#define NV34TCL_RT_ENABLE_COLOR1 (1 << 1) -#define NV34TCL_RT_ENABLE_COLOR0 (1 << 0) -#define NV34TCL_ZETA_PITCH 0x0000022c -#define NV34TCL_LMA_DEPTH_OFFSET 0x00000230 -#define NV34TCL_TX_UNITS_ENABLE 0x0000023c -#define NV34TCL_TX_UNITS_ENABLE_TX0 (1 << 0) -#define NV34TCL_TX_UNITS_ENABLE_TX1 (1 << 1) -#define NV34TCL_TX_UNITS_ENABLE_TX2 (1 << 2) -#define NV34TCL_TX_UNITS_ENABLE_TX3 (1 << 3) -#define NV34TCL_TX_MATRIX_ENABLE(x) (0x00000240+((x)*4)) -#define NV34TCL_TX_MATRIX_ENABLE__SIZE 0x00000004 -#define NV34TCL_UNK0250(x) (0x00000250+((x)*4)) -#define NV34TCL_UNK0250__SIZE 0x00000004 -#define NV34TCL_VIEWPORT_TX_ORIGIN 0x000002b8 -#define NV34TCL_VIEWPORT_TX_ORIGIN_X_SHIFT 0 -#define NV34TCL_VIEWPORT_TX_ORIGIN_X_MASK 0x0000ffff -#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_SHIFT 16 -#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_MASK 0xffff0000 -#define NV34TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8)) -#define NV34TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 -#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_SHIFT 0 -#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_MASK 0x0000ffff -#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_SHIFT 16 -#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_MASK 0xffff0000 -#define NV34TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8)) -#define NV34TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 -#define NV34TCL_VIEWPORT_CLIP_VERT_T_SHIFT 0 -#define NV34TCL_VIEWPORT_CLIP_VERT_T_MASK 0x0000ffff -#define NV34TCL_VIEWPORT_CLIP_VERT_D_SHIFT 16 -#define NV34TCL_VIEWPORT_CLIP_VERT_D_MASK 0xffff0000 -#define NV34TCL_DITHER_ENABLE 0x00000300 -#define NV34TCL_ALPHA_FUNC_ENABLE 0x00000304 -#define NV34TCL_ALPHA_FUNC_FUNC 0x00000308 -#define NV34TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 -#define NV34TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 -#define NV34TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 -#define NV34TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 -#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 -#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 -#define NV34TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV34TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 -#define NV34TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 -#define NV34TCL_ALPHA_FUNC_REF 0x0000030c -#define NV34TCL_BLEND_FUNC_ENABLE 0x00000310 -#define NV34TCL_BLEND_FUNC_SRC 0x00000314 -#define NV34TCL_BLEND_FUNC_SRC_RGB_SHIFT 0 -#define NV34TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff -#define NV34TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 -#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 -#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 -#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 -#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 -#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 -#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 -#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 -#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 -#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 -#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000 -#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 -#define NV34TCL_BLEND_FUNC_DST 0x00000318 -#define NV34TCL_BLEND_FUNC_DST_RGB_SHIFT 0 -#define NV34TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff -#define NV34TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 -#define NV34TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 -#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 -#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 -#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV34TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 -#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV34TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 -#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 -#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 -#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 -#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 -#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 -#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 -#define NV34TCL_BLEND_FUNC_COLOR 0x0000031c -#define NV34TCL_BLEND_FUNC_EQUATION 0x00000320 -#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_ADD 0x00008006 -#define NV34TCL_BLEND_FUNC_EQUATION_MIN 0x00008007 -#define NV34TCL_BLEND_FUNC_EQUATION_MAX 0x00008008 -#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_SUBTRACT 0x0000800a -#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b -#define NV34TCL_COLOR_MASK 0x00000324 -#define NV34TCL_COLOR_MASK_B_SHIFT 0 -#define NV34TCL_COLOR_MASK_B_MASK 0x000000ff -#define NV34TCL_COLOR_MASK_G_SHIFT 8 -#define NV34TCL_COLOR_MASK_G_MASK 0x0000ff00 -#define NV34TCL_COLOR_MASK_R_SHIFT 16 -#define NV34TCL_COLOR_MASK_R_MASK 0x00ff0000 -#define NV34TCL_COLOR_MASK_A_SHIFT 24 -#define NV34TCL_COLOR_MASK_A_MASK 0xff000000 -#define NV34TCL_STENCIL_BACK_ENABLE 0x00000328 -#define NV34TCL_STENCIL_BACK_MASK 0x0000032c -#define NV34TCL_STENCIL_BACK_FUNC_FUNC 0x00000330 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 -#define NV34TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 -#define NV34TCL_STENCIL_BACK_FUNC_REF 0x00000334 -#define NV34TCL_STENCIL_BACK_FUNC_MASK 0x00000338 -#define NV34TCL_STENCIL_BACK_OP_FAIL 0x0000033c -#define NV34TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 -#define NV34TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a -#define NV34TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 -#define NV34TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 -#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 -#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 -#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 -#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 -#define NV34TCL_STENCIL_BACK_OP_ZFAIL 0x00000340 -#define NV34TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 -#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a -#define NV34TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 -#define NV34TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 -#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 -#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 -#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV34TCL_STENCIL_BACK_OP_ZPASS 0x00000344 -#define NV34TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 -#define NV34TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a -#define NV34TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 -#define NV34TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 -#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 -#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 -#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV34TCL_STENCIL_FRONT_ENABLE 0x00000348 -#define NV34TCL_STENCIL_FRONT_MASK 0x0000034c -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC 0x00000350 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 -#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 -#define NV34TCL_STENCIL_FRONT_FUNC_REF 0x00000354 -#define NV34TCL_STENCIL_FRONT_FUNC_MASK 0x00000358 -#define NV34TCL_STENCIL_FRONT_OP_FAIL 0x0000035c -#define NV34TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 -#define NV34TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a -#define NV34TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 -#define NV34TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 -#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 -#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 -#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 -#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL 0x00000360 -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV34TCL_STENCIL_FRONT_OP_ZPASS 0x00000364 -#define NV34TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 -#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a -#define NV34TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 -#define NV34TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 -#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 -#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 -#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV34TCL_SHADE_MODEL 0x00000368 -#define NV34TCL_SHADE_MODEL_FLAT 0x00001d00 -#define NV34TCL_SHADE_MODEL_SMOOTH 0x00001d01 -#define NV34TCL_FOG_ENABLE 0x0000036c -#define NV34TCL_FOG_COLOR 0x00000370 -#define NV34TCL_COLOR_LOGIC_OP_ENABLE 0x00000374 -#define NV34TCL_COLOR_LOGIC_OP_OP 0x00000378 -#define NV34TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 -#define NV34TCL_COLOR_LOGIC_OP_OP_AND 0x00001501 -#define NV34TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 -#define NV34TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503 -#define NV34TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 -#define NV34TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505 -#define NV34TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506 -#define NV34TCL_COLOR_LOGIC_OP_OP_OR 0x00001507 -#define NV34TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508 -#define NV34TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 -#define NV34TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a -#define NV34TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b -#define NV34TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c -#define NV34TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d -#define NV34TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e -#define NV34TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f -#define NV34TCL_NORMALIZE_ENABLE 0x0000037c -#define NV34TCL_COLOR_MATERIAL 0x00000390 -#define NV34TCL_COLOR_MATERIAL_FRONT_EMISSION_ENABLE (1 << 0) -#define NV34TCL_COLOR_MATERIAL_FRONT_AMBIENT_ENABLE (1 << 2) -#define NV34TCL_COLOR_MATERIAL_FRONT_DIFFUSE_ENABLE (1 << 4) -#define NV34TCL_COLOR_MATERIAL_FRONT_SPECULAR_ENABLE (1 << 6) -#define NV34TCL_COLOR_MATERIAL_BACK_EMISSION_ENABLE (1 << 8) -#define NV34TCL_COLOR_MATERIAL_BACK_AMBIENT_ENABLE (1 << 10) -#define NV34TCL_COLOR_MATERIAL_BACK_DIFFUSE_ENABLE (1 << 12) -#define NV34TCL_COLOR_MATERIAL_BACK_SPECULAR_ENABLE (1 << 14) -#define NV34TCL_DEPTH_RANGE_NEAR 0x00000394 -#define NV34TCL_DEPTH_RANGE_FAR 0x00000398 -#define NV34TCL_COLOR_MATERIAL_FRONT_R 0x000003a0 -#define NV34TCL_COLOR_MATERIAL_FRONT_G 0x000003a4 -#define NV34TCL_COLOR_MATERIAL_FRONT_B 0x000003a8 -#define NV34TCL_COLOR_MATERIAL_FRONT_A 0x000003b4 -#define NV34TCL_LINE_WIDTH 0x000003b8 -#define NV34TCL_LINE_SMOOTH_ENABLE 0x000003bc -#define NV34TCL_CLIP_PLANE_ENABLE(x) (0x00000400+((x)*4)) -#define NV34TCL_CLIP_PLANE_ENABLE__SIZE 0x00000020 -#define NV34TCL_CLIP_PLANE_ENABLE_FALSE 0x00000000 -#define NV34TCL_CLIP_PLANE_ENABLE_EYE_LINEAR 0x00002400 -#define NV34TCL_CLIP_PLANE_ENABLE_OBJECT_LINEAR 0x00002401 -#define NV34TCL_MODELVIEW_MATRIX(x) (0x00000480+((x)*4)) -#define NV34TCL_MODELVIEW_MATRIX__SIZE 0x00000010 -#define NV34TCL_INVERSE_MODELVIEW_MATRIX(x) (0x00000580+((x)*4)) -#define NV34TCL_INVERSE_MODELVIEW_MATRIX__SIZE 0x0000000c -#define NV34TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4)) -#define NV34TCL_PROJECTION_MATRIX__SIZE 0x00000010 -#define NV34TCL_TX0_MATRIX(x) (0x000006c0+((x)*4)) -#define NV34TCL_TX0_MATRIX__SIZE 0x00000010 -#define NV34TCL_TX1_MATRIX(x) (0x00000700+((x)*4)) -#define NV34TCL_TX1_MATRIX__SIZE 0x00000010 -#define NV34TCL_TX2_MATRIX(x) (0x00000740+((x)*4)) -#define NV34TCL_TX2_MATRIX__SIZE 0x00000010 -#define NV34TCL_TX3_MATRIX(x) (0x00000780+((x)*4)) -#define NV34TCL_TX3_MATRIX__SIZE 0x00000010 -#define NV34TCL_SCISSOR_HORIZ 0x000008c0 -#define NV34TCL_SCISSOR_HORIZ_X_SHIFT 0 -#define NV34TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff -#define NV34TCL_SCISSOR_HORIZ_W_SHIFT 16 -#define NV34TCL_SCISSOR_HORIZ_W_MASK 0xffff0000 -#define NV34TCL_SCISSOR_VERT 0x000008c4 -#define NV34TCL_SCISSOR_VERT_Y_SHIFT 0 -#define NV34TCL_SCISSOR_VERT_Y_MASK 0x0000ffff -#define NV34TCL_SCISSOR_VERT_H_SHIFT 16 -#define NV34TCL_SCISSOR_VERT_H_MASK 0xffff0000 -#define NV34TCL_FOG_COORD_DIST 0x000008c8 -#define NV34TCL_FOG_MODE 0x000008cc -#define NV34TCL_FOG_MODE_EXP 0x00000800 -#define NV34TCL_FOG_MODE_EXP_2 0x00000802 -#define NV34TCL_FOG_MODE_EXP2 0x00000803 -#define NV34TCL_FOG_MODE_LINEAR 0x00000804 -#define NV34TCL_FOG_MODE_LINEAR_2 0x00002601 -#define NV34TCL_FOG_EQUATION_CONSTANT 0x000008d0 -#define NV34TCL_FOG_EQUATION_LINEAR 0x000008d4 -#define NV34TCL_FOG_EQUATION_QUADRATIC 0x000008d8 -#define NV34TCL_FP_ACTIVE_PROGRAM 0x000008e4 -#define NV34TCL_FP_ACTIVE_PROGRAM_DMA0 (1 << 0) -#define NV34TCL_FP_ACTIVE_PROGRAM_DMA1 (1 << 1) -#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_SHIFT 2 -#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_MASK 0xfffffffc -#define NV34TCL_RC_COLOR0 0x000008ec -#define NV34TCL_RC_COLOR0_B_SHIFT 0 -#define NV34TCL_RC_COLOR0_B_MASK 0x000000ff -#define NV34TCL_RC_COLOR0_G_SHIFT 8 -#define NV34TCL_RC_COLOR0_G_MASK 0x0000ff00 -#define NV34TCL_RC_COLOR0_R_SHIFT 16 -#define NV34TCL_RC_COLOR0_R_MASK 0x00ff0000 -#define NV34TCL_RC_COLOR0_A_SHIFT 24 -#define NV34TCL_RC_COLOR0_A_MASK 0xff000000 -#define NV34TCL_RC_COLOR1 0x000008f0 -#define NV34TCL_RC_COLOR1_B_SHIFT 0 -#define NV34TCL_RC_COLOR1_B_MASK 0x000000ff -#define NV34TCL_RC_COLOR1_G_SHIFT 8 -#define NV34TCL_RC_COLOR1_G_MASK 0x0000ff00 -#define NV34TCL_RC_COLOR1_R_SHIFT 16 -#define NV34TCL_RC_COLOR1_R_MASK 0x00ff0000 -#define NV34TCL_RC_COLOR1_A_SHIFT 24 -#define NV34TCL_RC_COLOR1_A_MASK 0xff000000 -#define NV34TCL_RC_FINAL0 0x000008f4 -#define NV34TCL_RC_FINAL1 0x000008f8 -#define NV34TCL_RC_ENABLE 0x000008fc -#define NV34TCL_RC_IN_ALPHA(x) (0x00000900+((x)*32)) -#define NV34TCL_RC_IN_ALPHA__SIZE 0x00000008 -#define NV34TCL_RC_IN_RGB(x) (0x00000904+((x)*32)) -#define NV34TCL_RC_IN_RGB__SIZE 0x00000008 -#define NV34TCL_RC_CONSTANT_COLOR0(x) (0x00000908+((x)*32)) -#define NV34TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008 -#define NV34TCL_RC_CONSTANT_COLOR0_B_SHIFT 0 -#define NV34TCL_RC_CONSTANT_COLOR0_B_MASK 0x000000ff -#define NV34TCL_RC_CONSTANT_COLOR0_G_SHIFT 8 -#define NV34TCL_RC_CONSTANT_COLOR0_G_MASK 0x0000ff00 -#define NV34TCL_RC_CONSTANT_COLOR0_R_SHIFT 16 -#define NV34TCL_RC_CONSTANT_COLOR0_R_MASK 0x00ff0000 -#define NV34TCL_RC_CONSTANT_COLOR0_A_SHIFT 24 -#define NV34TCL_RC_CONSTANT_COLOR0_A_MASK 0xff000000 -#define NV34TCL_RC_CONSTANT_COLOR1(x) (0x0000090c+((x)*32)) -#define NV34TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008 -#define NV34TCL_RC_CONSTANT_COLOR1_B_SHIFT 0 -#define NV34TCL_RC_CONSTANT_COLOR1_B_MASK 0x000000ff -#define NV34TCL_RC_CONSTANT_COLOR1_G_SHIFT 8 -#define NV34TCL_RC_CONSTANT_COLOR1_G_MASK 0x0000ff00 -#define NV34TCL_RC_CONSTANT_COLOR1_R_SHIFT 16 -#define NV34TCL_RC_CONSTANT_COLOR1_R_MASK 0x00ff0000 -#define NV34TCL_RC_CONSTANT_COLOR1_A_SHIFT 24 -#define NV34TCL_RC_CONSTANT_COLOR1_A_MASK 0xff000000 -#define NV34TCL_RC_OUT_ALPHA(x) (0x00000910+((x)*32)) -#define NV34TCL_RC_OUT_ALPHA__SIZE 0x00000008 -#define NV34TCL_RC_OUT_RGB(x) (0x00000914+((x)*32)) -#define NV34TCL_RC_OUT_RGB__SIZE 0x00000008 -#define NV34TCL_VIEWPORT_HORIZ 0x00000a00 -#define NV34TCL_VIEWPORT_HORIZ_X_SHIFT 0 -#define NV34TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff -#define NV34TCL_VIEWPORT_HORIZ_W_SHIFT 16 -#define NV34TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 -#define NV34TCL_VIEWPORT_VERT 0x00000a04 -#define NV34TCL_VIEWPORT_VERT_Y_SHIFT 0 -#define NV34TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff -#define NV34TCL_VIEWPORT_VERT_H_SHIFT 16 -#define NV34TCL_VIEWPORT_VERT_H_MASK 0xffff0000 -#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10 -#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14 -#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18 -#define NV34TCL_VIEWPORT_TRANSLATE_X 0x00000a20 -#define NV34TCL_VIEWPORT_TRANSLATE_Y 0x00000a24 -#define NV34TCL_VIEWPORT_TRANSLATE_Z 0x00000a28 -#define NV34TCL_VIEWPORT_TRANSLATE_W 0x00000a2c -#define NV34TCL_VIEWPORT_SCALE_X 0x00000a30 -#define NV34TCL_VIEWPORT_SCALE_Y 0x00000a34 -#define NV34TCL_VIEWPORT_SCALE_Z 0x00000a38 -#define NV34TCL_VIEWPORT_SCALE_W 0x00000a3c -#define NV34TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a60 -#define NV34TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 -#define NV34TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a68 -#define NV34TCL_DEPTH_FUNC 0x00000a6c -#define NV34TCL_DEPTH_FUNC_NEVER 0x00000200 -#define NV34TCL_DEPTH_FUNC_LESS 0x00000201 -#define NV34TCL_DEPTH_FUNC_EQUAL 0x00000202 -#define NV34TCL_DEPTH_FUNC_LEQUAL 0x00000203 -#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204 -#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204 -#define NV34TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 -#define NV34TCL_DEPTH_FUNC_GEQUAL 0x00000206 -#define NV34TCL_DEPTH_FUNC_ALWAYS 0x00000207 -#define NV34TCL_DEPTH_WRITE_ENABLE 0x00000a70 -#define NV34TCL_DEPTH_TEST_ENABLE 0x00000a74 -#define NV34TCL_POLYGON_OFFSET_FACTOR 0x00000a78 -#define NV34TCL_POLYGON_OFFSET_UNITS 0x00000a7c -#define NV34TCL_VERTEX_NOR_3I_XY 0x00000a90 -#define NV34TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 -#define NV34TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff -#define NV34TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 -#define NV34TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 -#define NV34TCL_VERTEX_NOR_3I_Z 0x00000a94 -#define NV34TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 -#define NV34TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff -#define NV34TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) -#define NV34TCL_VP_UPLOAD_INST__SIZE 0x00000004 -#define NV34TCL_CLIP_PLANE_A(x) (0x00000e00+((x)*16)) -#define NV34TCL_CLIP_PLANE_A__SIZE 0x00000020 -#define NV34TCL_CLIP_PLANE_B(x) (0x00000e04+((x)*16)) -#define NV34TCL_CLIP_PLANE_B__SIZE 0x00000020 -#define NV34TCL_CLIP_PLANE_C(x) (0x00000e08+((x)*16)) -#define NV34TCL_CLIP_PLANE_C__SIZE 0x00000020 -#define NV34TCL_CLIP_PLANE_D(x) (0x00000e0c+((x)*16)) -#define NV34TCL_CLIP_PLANE_D__SIZE 0x00000020 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00001000+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00001004+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00001008+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000100c+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00001010+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00001014+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00001018+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000101c+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008 -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00001020+((x)*64)) -#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008 -#define NV34TCL_LIGHT_HALF_VECTOR_X(x) (0x00001028+((x)*64)) -#define NV34TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008 -#define NV34TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000102c+((x)*64)) -#define NV34TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008 -#define NV34TCL_LIGHT_HALF_VECTOR_Z(x) (0x00001030+((x)*64)) -#define NV34TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008 -#define NV34TCL_LIGHT_DIRECTION_X(x) (0x00001034+((x)*64)) -#define NV34TCL_LIGHT_DIRECTION_X__SIZE 0x00000008 -#define NV34TCL_LIGHT_DIRECTION_Y(x) (0x00001038+((x)*64)) -#define NV34TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008 -#define NV34TCL_LIGHT_DIRECTION_Z(x) (0x0000103c+((x)*64)) -#define NV34TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008 -#define NV34TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00001200+((x)*64)) -#define NV34TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008 -#define NV34TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00001204+((x)*64)) -#define NV34TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008 -#define NV34TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00001208+((x)*64)) -#define NV34TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008 -#define NV34TCL_LIGHT_SPOT_DIR_X(x) (0x0000120c+((x)*64)) -#define NV34TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008 -#define NV34TCL_LIGHT_SPOT_DIR_Y(x) (0x00001210+((x)*64)) -#define NV34TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008 -#define NV34TCL_LIGHT_SPOT_DIR_Z(x) (0x00001214+((x)*64)) -#define NV34TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008 -#define NV34TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00001218+((x)*64)) -#define NV34TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008 -#define NV34TCL_LIGHT_POSITION_X(x) (0x0000121c+((x)*64)) -#define NV34TCL_LIGHT_POSITION_X__SIZE 0x00000008 -#define NV34TCL_LIGHT_POSITION_Y(x) (0x00001220+((x)*64)) -#define NV34TCL_LIGHT_POSITION_Y__SIZE 0x00000008 -#define NV34TCL_LIGHT_POSITION_Z(x) (0x00001224+((x)*64)) -#define NV34TCL_LIGHT_POSITION_Z__SIZE 0x00000008 -#define NV34TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00001228+((x)*64)) -#define NV34TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008 -#define NV34TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000122c+((x)*64)) -#define NV34TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008 -#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00001230+((x)*64)) -#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008 -#define NV34TCL_FRONT_MATERIAL_SHININESS(x) (0x00001400+((x)*4)) -#define NV34TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006 -#define NV34TCL_FP_REG_CONTROL 0x00001450 -#define NV34TCL_FP_REG_CONTROL_UNK1_SHIFT 16 -#define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000 -#define NV34TCL_FP_REG_CONTROL_UNK0_SHIFT 0 -#define NV34TCL_FP_REG_CONTROL_UNK0_MASK 0x0000ffff -#define NV34TCL_VP_CLIP_PLANES_ENABLE 0x00001478 -#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 (1 << 1) -#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 (1 << 5) -#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 (1 << 9) -#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 (1 << 13) -#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 (1 << 17) -#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5 (1 << 21) -#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE6 (1 << 25) -#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE7 (1 << 29) -#define NV34TCL_POLYGON_STIPPLE_ENABLE 0x0000147c -#define NV34TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) -#define NV34TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 -#define NV34TCL_VERTEX_ATTR_3F_X(x) (0x00001500+((x)*16)) -#define NV34TCL_VERTEX_ATTR_3F_X__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_3F_Y(x) (0x00001504+((x)*16)) -#define NV34TCL_VERTEX_ATTR_3F_Y__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_3F_Z(x) (0x00001508+((x)*16)) -#define NV34TCL_VERTEX_ATTR_3F_Z__SIZE 0x00000010 -#define NV34TCL_VP_CLIP_PLANE_A(x) (0x00001600+((x)*16)) -#define NV34TCL_VP_CLIP_PLANE_A__SIZE 0x00000006 -#define NV34TCL_VP_CLIP_PLANE_B(x) (0x00001604+((x)*16)) -#define NV34TCL_VP_CLIP_PLANE_B__SIZE 0x00000006 -#define NV34TCL_VP_CLIP_PLANE_C(x) (0x00001608+((x)*16)) -#define NV34TCL_VP_CLIP_PLANE_C__SIZE 0x00000006 -#define NV34TCL_VP_CLIP_PLANE_D(x) (0x0000160c+((x)*16)) -#define NV34TCL_VP_CLIP_PLANE_D__SIZE 0x00000006 -#define NV34TCL_VERTEX_BUFFER_ADDRESS(x) (0x00001680+((x)*4)) -#define NV34TCL_VERTEX_BUFFER_ADDRESS__SIZE 0x00000010 -#define NV34TCL_VERTEX_BUFFER_ADDRESS_DMA1 (1 << 31) -#define NV34TCL_VERTEX_BUFFER_ADDRESS_OFFSET_SHIFT 0 -#define NV34TCL_VERTEX_BUFFER_ADDRESS_OFFSET_MASK 0x0fffffff -#define NV34TCL_VERTEX_ARRAY_FORMAT(x) (0x00001740+((x)*4)) -#define NV34TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_SHIFT 0 -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_MASK 0x0000000f -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT 0x00000002 -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE 0x00000004 -#define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT 4 -#define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_MASK 0x000000f0 -#define NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 8 -#define NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_MASK 0x0000ff00 -#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0 -#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4 -#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8 -#define NV34TCL_COLOR_MATERIAL_BACK_R 0x000017b0 -#define NV34TCL_COLOR_MATERIAL_BACK_G 0x000017b4 -#define NV34TCL_COLOR_MATERIAL_BACK_B 0x000017b8 -#define NV34TCL_COLOR_MATERIAL_BACK_A 0x000017c0 -#define NV34TCL_QUERY_RESET 0x000017c8 -#define NV34TCL_QUERY_UNK17CC 0x000017cc -#define NV34TCL_QUERY_GET 0x00001800 -#define NV34TCL_QUERY_GET_UNK24_SHIFT 24 -#define NV34TCL_QUERY_GET_UNK24_MASK 0xff000000 -#define NV34TCL_QUERY_GET_OFFSET_SHIFT 0 -#define NV34TCL_QUERY_GET_OFFSET_MASK 0x00ffffff -#define NV34TCL_VERTEX_BEGIN_END 0x00001808 -#define NV34TCL_VERTEX_BEGIN_END_STOP 0x00000000 -#define NV34TCL_VERTEX_BEGIN_END_POINTS 0x00000001 -#define NV34TCL_VERTEX_BEGIN_END_LINES 0x00000002 -#define NV34TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 -#define NV34TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 -#define NV34TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005 -#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 -#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 -#define NV34TCL_VERTEX_BEGIN_END_QUADS 0x00000008 -#define NV34TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 -#define NV34TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a -#define NV34TCL_VB_ELEMENT_U16 0x0000180c -#define NV34TCL_VB_ELEMENT_U16_I0_SHIFT 0 -#define NV34TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff -#define NV34TCL_VB_ELEMENT_U16_I1_SHIFT 16 -#define NV34TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000 -#define NV34TCL_VB_ELEMENT_U32 0x00001810 -#define NV34TCL_VB_VERTEX_BATCH 0x00001814 -#define NV34TCL_VB_VERTEX_BATCH_OFFSET_SHIFT 0 -#define NV34TCL_VB_VERTEX_BATCH_OFFSET_MASK 0x00ffffff -#define NV34TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24 -#define NV34TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000 -#define NV34TCL_VERTEX_DATA 0x00001818 -#define NV34TCL_POLYGON_MODE_FRONT 0x00001828 -#define NV34TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 -#define NV34TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 -#define NV34TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 -#define NV34TCL_POLYGON_MODE_BACK 0x0000182c -#define NV34TCL_POLYGON_MODE_BACK_POINT 0x00001b00 -#define NV34TCL_POLYGON_MODE_BACK_LINE 0x00001b01 -#define NV34TCL_POLYGON_MODE_BACK_FILL 0x00001b02 -#define NV34TCL_CULL_FACE 0x00001830 -#define NV34TCL_CULL_FACE_FRONT 0x00000404 -#define NV34TCL_CULL_FACE_BACK 0x00000405 -#define NV34TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 -#define NV34TCL_FRONT_FACE 0x00001834 -#define NV34TCL_FRONT_FACE_CW 0x00000900 -#define NV34TCL_FRONT_FACE_CCW 0x00000901 -#define NV34TCL_POLYGON_SMOOTH_ENABLE 0x00001838 -#define NV34TCL_CULL_FACE_ENABLE 0x0000183c -#define NV34TCL_VERTEX_ATTR_2F_X(x) (0x00001880+((x)*8)) -#define NV34TCL_VERTEX_ATTR_2F_X__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_2F_Y(x) (0x00001884+((x)*8)) -#define NV34TCL_VERTEX_ATTR_2F_Y__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_2I(x) (0x00001900+((x)*4)) -#define NV34TCL_VERTEX_ATTR_2I__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_2I_Y_SHIFT 16 -#define NV34TCL_VERTEX_ATTR_2I_Y_MASK 0xffff0000 -#define NV34TCL_VERTEX_ATTR_2I_X_SHIFT 0 -#define NV34TCL_VERTEX_ATTR_2I_X_MASK 0x0000ffff -#define NV34TCL_VERTEX_COL_4I(x) (0x0000194c+((x)*4)) -#define NV34TCL_VERTEX_COL_4I__SIZE 0x00000002 -#define NV34TCL_VERTEX_COL_4I_R_SHIFT 0 -#define NV34TCL_VERTEX_COL_4I_R_MASK 0x000000ff -#define NV34TCL_VERTEX_COL_4I_G_SHIFT 8 -#define NV34TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 -#define NV34TCL_VERTEX_COL_4I_B_SHIFT 16 -#define NV34TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 -#define NV34TCL_VERTEX_COL_4I_A_SHIFT 24 -#define NV34TCL_VERTEX_COL_4I_A_MASK 0xff000000 -#define NV34TCL_VERTEX_POS_4I_XY 0x00001980 -#define NV34TCL_VERTEX_POS_4I_XY_X_SHIFT 0 -#define NV34TCL_VERTEX_POS_4I_XY_X_MASK 0x0000ffff -#define NV34TCL_VERTEX_POS_4I_XY_Y_SHIFT 16 -#define NV34TCL_VERTEX_POS_4I_XY_Y_MASK 0xffff0000 -#define NV34TCL_VERTEX_POS_4I_ZW 0x00001984 -#define NV34TCL_VERTEX_POS_4I_ZW_Z_SHIFT 0 -#define NV34TCL_VERTEX_POS_4I_ZW_Z_MASK 0x0000ffff -#define NV34TCL_VERTEX_POS_4I_ZW_W_SHIFT 16 -#define NV34TCL_VERTEX_POS_4I_ZW_W_MASK 0xffff0000 -#define NV34TCL_VERTEX_TX_4I_ST(x) (0x000019c0+((x)*8)) -#define NV34TCL_VERTEX_TX_4I_ST__SIZE 0x00000004 -#define NV34TCL_VERTEX_TX_4I_ST_S_SHIFT 0 -#define NV34TCL_VERTEX_TX_4I_ST_S_MASK 0x0000ffff -#define NV34TCL_VERTEX_TX_4I_ST_T_SHIFT 16 -#define NV34TCL_VERTEX_TX_4I_ST_T_MASK 0xffff0000 -#define NV34TCL_VERTEX_TX_4I_RQ(x) (0x000019c4+((x)*8)) -#define NV34TCL_VERTEX_TX_4I_RQ__SIZE 0x00000004 -#define NV34TCL_VERTEX_TX_4I_RQ_R_SHIFT 0 -#define NV34TCL_VERTEX_TX_4I_RQ_R_MASK 0x0000ffff -#define NV34TCL_VERTEX_TX_4I_RQ_Q_SHIFT 16 -#define NV34TCL_VERTEX_TX_4I_RQ_Q_MASK 0xffff0000 -#define NV34TCL_TX_OFFSET(x) (0x00001a00+((x)*32)) -#define NV34TCL_TX_OFFSET__SIZE 0x00000004 -#define NV34TCL_TX_FORMAT(x) (0x00001a04+((x)*32)) -#define NV34TCL_TX_FORMAT__SIZE 0x00000004 -#define NV34TCL_TX_FORMAT_DMA0 (1 << 0) -#define NV34TCL_TX_FORMAT_DMA1 (1 << 1) -#define NV34TCL_TX_FORMAT_CUBE_MAP (1 << 2) -#define NV34TCL_TX_FORMAT_DIMS_SHIFT 4 -#define NV34TCL_TX_FORMAT_DIMS_MASK 0x000000f0 -#define NV34TCL_TX_FORMAT_DIMS_1D 0x00000010 -#define NV34TCL_TX_FORMAT_DIMS_2D 0x00000020 -#define NV34TCL_TX_FORMAT_DIMS_3D 0x00000030 -#define NV34TCL_TX_FORMAT_FORMAT_SHIFT 8 -#define NV34TCL_TX_FORMAT_FORMAT_MASK 0x0000ff00 -#define NV34TCL_TX_FORMAT_FORMAT_L8 0x00000000 -#define NV34TCL_TX_FORMAT_FORMAT_A8 0x00000100 -#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200 -#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300 -#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400 -#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600 -#define NV34TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700 -#define NV34TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00 -#define NV34TCL_TX_FORMAT_FORMAT_DXT1 0x00000c00 -#define NV34TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00 -#define NV34TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00 -#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000 -#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200 -#define NV34TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300 -#define NV34TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00 -#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00 -#define NV34TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00 -#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00 -#define NV34TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000 -#define NV34TCL_TX_FORMAT_FORMAT_A16 0x00003200 -#define NV34TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500 -#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00 -#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00 -#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00 -#define NV34TCL_TX_FORMAT_NPOT (1 << 12) -#define NV34TCL_TX_FORMAT_RECT (1 << 14) -#define NV34TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT 16 -#define NV34TCL_TX_FORMAT_MIPMAP_LEVELS_MASK 0x000f0000 -#define NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20 -#define NV34TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000 -#define NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24 -#define NV34TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x0f000000 -#define NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT 28 -#define NV34TCL_TX_FORMAT_BASE_SIZE_W_MASK 0xf0000000 -#define NV34TCL_TX_WRAP(x) (0x00001a08+((x)*32)) -#define NV34TCL_TX_WRAP__SIZE 0x00000004 -#define NV34TCL_TX_WRAP_S_SHIFT 0 -#define NV34TCL_TX_WRAP_S_MASK 0x000000ff -#define NV34TCL_TX_WRAP_S_REPEAT 0x00000001 -#define NV34TCL_TX_WRAP_S_MIRRORED_REPEAT 0x00000002 -#define NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE 0x00000003 -#define NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004 -#define NV34TCL_TX_WRAP_S_CLAMP 0x00000005 -#define NV34TCL_TX_WRAP_T_SHIFT 8 -#define NV34TCL_TX_WRAP_T_MASK 0x0000ff00 -#define NV34TCL_TX_WRAP_T_REPEAT 0x00000100 -#define NV34TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200 -#define NV34TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300 -#define NV34TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400 -#define NV34TCL_TX_WRAP_T_CLAMP 0x00000500 -#define NV34TCL_TX_WRAP_R_SHIFT 16 -#define NV34TCL_TX_WRAP_R_MASK 0x00ff0000 -#define NV34TCL_TX_WRAP_R_REPEAT 0x00010000 -#define NV34TCL_TX_WRAP_R_MIRRORED_REPEAT 0x00020000 -#define NV34TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000 -#define NV34TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000 -#define NV34TCL_TX_WRAP_R_CLAMP 0x00050000 -#define NV34TCL_TX_ENABLE(x) (0x00001a0c+((x)*32)) -#define NV34TCL_TX_ENABLE__SIZE 0x00000004 -#define NV34TCL_TX_ENABLE_ENABLE (1 << 30) -#define NV34TCL_TX_SWIZZLE(x) (0x00001a10+((x)*32)) -#define NV34TCL_TX_SWIZZLE__SIZE 0x00000004 -#define NV34TCL_TX_SWIZZLE_S0_X_SHIFT 14 -#define NV34TCL_TX_SWIZZLE_S0_X_MASK 0x0000c000 -#define NV34TCL_TX_SWIZZLE_S0_X_ZERO 0x00000000 -#define NV34TCL_TX_SWIZZLE_S0_X_ONE 0x00004000 -#define NV34TCL_TX_SWIZZLE_S0_X_S1 0x00008000 -#define NV34TCL_TX_SWIZZLE_S0_Y_SHIFT 12 -#define NV34TCL_TX_SWIZZLE_S0_Y_MASK 0x00003000 -#define NV34TCL_TX_SWIZZLE_S0_Y_ZERO 0x00000000 -#define NV34TCL_TX_SWIZZLE_S0_Y_ONE 0x00001000 -#define NV34TCL_TX_SWIZZLE_S0_Y_S1 0x00002000 -#define NV34TCL_TX_SWIZZLE_S0_Z_SHIFT 10 -#define NV34TCL_TX_SWIZZLE_S0_Z_MASK 0x00000c00 -#define NV34TCL_TX_SWIZZLE_S0_Z_ZERO 0x00000000 -#define NV34TCL_TX_SWIZZLE_S0_Z_ONE 0x00000400 -#define NV34TCL_TX_SWIZZLE_S0_Z_S1 0x00000800 -#define NV34TCL_TX_SWIZZLE_S0_W_SHIFT 8 -#define NV34TCL_TX_SWIZZLE_S0_W_MASK 0x00000300 -#define NV34TCL_TX_SWIZZLE_S0_W_ZERO 0x00000000 -#define NV34TCL_TX_SWIZZLE_S0_W_ONE 0x00000100 -#define NV34TCL_TX_SWIZZLE_S0_W_S1 0x00000200 -#define NV34TCL_TX_SWIZZLE_S1_X_SHIFT 6 -#define NV34TCL_TX_SWIZZLE_S1_X_MASK 0x000000c0 -#define NV34TCL_TX_SWIZZLE_S1_X_W 0x00000000 -#define NV34TCL_TX_SWIZZLE_S1_X_Z 0x00000040 -#define NV34TCL_TX_SWIZZLE_S1_X_Y 0x00000080 -#define NV34TCL_TX_SWIZZLE_S1_X_X 0x000000c0 -#define NV34TCL_TX_SWIZZLE_S1_Y_SHIFT 4 -#define NV34TCL_TX_SWIZZLE_S1_Y_MASK 0x00000030 -#define NV34TCL_TX_SWIZZLE_S1_Y_W 0x00000000 -#define NV34TCL_TX_SWIZZLE_S1_Y_Z 0x00000010 -#define NV34TCL_TX_SWIZZLE_S1_Y_Y 0x00000020 -#define NV34TCL_TX_SWIZZLE_S1_Y_X 0x00000030 -#define NV34TCL_TX_SWIZZLE_S1_Z_SHIFT 2 -#define NV34TCL_TX_SWIZZLE_S1_Z_MASK 0x0000000c -#define NV34TCL_TX_SWIZZLE_S1_Z_W 0x00000000 -#define NV34TCL_TX_SWIZZLE_S1_Z_Z 0x00000004 -#define NV34TCL_TX_SWIZZLE_S1_Z_Y 0x00000008 -#define NV34TCL_TX_SWIZZLE_S1_Z_X 0x0000000c -#define NV34TCL_TX_SWIZZLE_S1_W_SHIFT 0 -#define NV34TCL_TX_SWIZZLE_S1_W_MASK 0x00000003 -#define NV34TCL_TX_SWIZZLE_S1_W_W 0x00000000 -#define NV34TCL_TX_SWIZZLE_S1_W_Z 0x00000001 -#define NV34TCL_TX_SWIZZLE_S1_W_Y 0x00000002 -#define NV34TCL_TX_SWIZZLE_S1_W_X 0x00000003 -#define NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT 16 -#define NV34TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000 -#define NV34TCL_TX_FILTER(x) (0x00001a14+((x)*32)) -#define NV34TCL_TX_FILTER__SIZE 0x00000004 -#define NV34TCL_TX_FILTER_MINIFY_SHIFT 16 -#define NV34TCL_TX_FILTER_MINIFY_MASK 0x000f0000 -#define NV34TCL_TX_FILTER_MINIFY_NEAREST 0x00010000 -#define NV34TCL_TX_FILTER_MINIFY_LINEAR 0x00020000 -#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x00030000 -#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x00040000 -#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x00050000 -#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x00060000 -#define NV34TCL_TX_FILTER_MAGNIFY_SHIFT 24 -#define NV34TCL_TX_FILTER_MAGNIFY_MASK 0x0f000000 -#define NV34TCL_TX_FILTER_MAGNIFY_NEAREST 0x01000000 -#define NV34TCL_TX_FILTER_MAGNIFY_LINEAR 0x02000000 -#define NV34TCL_TX_FILTER_SIGNED_BLUE (1 << 28) -#define NV34TCL_TX_FILTER_SIGNED_GREEN (1 << 29) -#define NV34TCL_TX_FILTER_SIGNED_RED (1 << 30) -#define NV34TCL_TX_FILTER_SIGNED_ALPHA (1 << 31) -#define NV34TCL_TX_NPOT_SIZE(x) (0x00001a18+((x)*32)) -#define NV34TCL_TX_NPOT_SIZE__SIZE 0x00000004 -#define NV34TCL_TX_NPOT_SIZE_H_SHIFT 0 -#define NV34TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff -#define NV34TCL_TX_NPOT_SIZE_W_SHIFT 16 -#define NV34TCL_TX_NPOT_SIZE_W_MASK 0xffff0000 -#define NV34TCL_TX_BORDER_COLOR(x) (0x00001a1c+((x)*32)) -#define NV34TCL_TX_BORDER_COLOR__SIZE 0x00000004 -#define NV34TCL_TX_BORDER_COLOR_B_SHIFT 0 -#define NV34TCL_TX_BORDER_COLOR_B_MASK 0x000000ff -#define NV34TCL_TX_BORDER_COLOR_G_SHIFT 8 -#define NV34TCL_TX_BORDER_COLOR_G_MASK 0x0000ff00 -#define NV34TCL_TX_BORDER_COLOR_R_SHIFT 16 -#define NV34TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000 -#define NV34TCL_TX_BORDER_COLOR_A_SHIFT 24 -#define NV34TCL_TX_BORDER_COLOR_A_MASK 0xff000000 -#define NV34TCL_VERTEX_ATTR_4F_X(x) (0x00001c00+((x)*16)) -#define NV34TCL_VERTEX_ATTR_4F_X__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_4F_Y(x) (0x00001c04+((x)*16)) -#define NV34TCL_VERTEX_ATTR_4F_Y__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_4F_Z(x) (0x00001c08+((x)*16)) -#define NV34TCL_VERTEX_ATTR_4F_Z__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_4F_W(x) (0x00001c0c+((x)*16)) -#define NV34TCL_VERTEX_ATTR_4F_W__SIZE 0x00000010 -#define NV34TCL_FP_CONTROL 0x00001d60 -#define NV34TCL_FP_CONTROL_USES_KIL (1 << 7) -#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_SHIFT 0 -#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_MASK 0x0000000f -#define NV34TCL_MULTISAMPLE_CONTROL 0x00001d7c -#define NV34TCL_CLEAR_DEPTH_VALUE 0x00001d8c -#define NV34TCL_CLEAR_COLOR_VALUE 0x00001d90 -#define NV34TCL_CLEAR_COLOR_VALUE_B_SHIFT 0 -#define NV34TCL_CLEAR_COLOR_VALUE_B_MASK 0x000000ff -#define NV34TCL_CLEAR_COLOR_VALUE_G_SHIFT 8 -#define NV34TCL_CLEAR_COLOR_VALUE_G_MASK 0x0000ff00 -#define NV34TCL_CLEAR_COLOR_VALUE_R_SHIFT 16 -#define NV34TCL_CLEAR_COLOR_VALUE_R_MASK 0x00ff0000 -#define NV34TCL_CLEAR_COLOR_VALUE_A_SHIFT 24 -#define NV34TCL_CLEAR_COLOR_VALUE_A_MASK 0xff000000 -#define NV34TCL_CLEAR_BUFFERS 0x00001d94 -#define NV34TCL_CLEAR_BUFFERS_COLOR_A (1 << 7) -#define NV34TCL_CLEAR_BUFFERS_COLOR_B (1 << 6) -#define NV34TCL_CLEAR_BUFFERS_COLOR_G (1 << 5) -#define NV34TCL_CLEAR_BUFFERS_COLOR_R (1 << 4) -#define NV34TCL_CLEAR_BUFFERS_STENCIL (1 << 1) -#define NV34TCL_CLEAR_BUFFERS_DEPTH (1 << 0) -#define NV34TCL_DO_VERTICES 0x00001dac -#define NV34TCL_LINE_STIPPLE_ENABLE 0x00001db4 -#define NV34TCL_LINE_STIPPLE_PATTERN 0x00001db8 -#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0 -#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff -#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16 -#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000 -#define NV34TCL_BACK_MATERIAL_SHININESS(x) (0x00001e20+((x)*4)) -#define NV34TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006 -#define NV34TCL_VERTEX_FOG_1F 0x00001e54 -#define NV34TCL_VP_UPLOAD_FROM_ID 0x00001e9c -#define NV34TCL_VP_START_FROM_ID 0x00001ea0 -#define NV34TCL_POINT_PARAMETERS(x) (0x00001ec0+((x)*4)) -#define NV34TCL_POINT_PARAMETERS__SIZE 0x00000008 -#define NV34TCL_POINT_SIZE 0x00001ee0 -#define NV34TCL_POINT_PARAMETERS_ENABLE 0x00001ee4 -#define NV34TCL_POINT_SPRITE 0x00001ee8 -#define NV34TCL_POINT_SPRITE_ENABLE (1 << 0) -#define NV34TCL_POINT_SPRITE_R_MODE_SHIFT 1 -#define NV34TCL_POINT_SPRITE_R_MODE_MASK 0x00000006 -#define NV34TCL_POINT_SPRITE_R_MODE_GL_ZERO 0x00000000 -#define NV34TCL_POINT_SPRITE_R_MODE_GL_R 0x00000002 -#define NV34TCL_POINT_SPRITE_R_MODE_GL_S 0x00000004 -#define NV34TCL_POINT_SPRITE_COORD_REPLACE (1 << 11) -#define NV34TCL_VP_UPLOAD_CONST_ID 0x00001efc -#define NV34TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16)) -#define NV34TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004 -#define NV34TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16)) -#define NV34TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004 -#define NV34TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16)) -#define NV34TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004 -#define NV34TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16)) -#define NV34TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004 -#define NV34TCL_UNK1f80(x) (0x00001f80+((x)*4)) -#define NV34TCL_UNK1f80__SIZE 0x00000010 -#define NV34TCL_VP_ATTRIB_EN 0x00001ff0 -#define NV34TCL_VP_RESULT_EN 0x00001ff4 - - -#define NV40_CONTEXT_SURFACES_2D 0x00003062 - - - -#define NV40_STRETCHED_IMAGE_FROM_CPU 0x00003066 - - - -#define NV40_TEXTURE_FROM_CPU 0x0000307b - - - -#define NV40_SCALED_IMAGE_FROM_MEMORY 0x00003089 - - - -#define NV40_IMAGE_FROM_CPU 0x0000308a - - - -#define NV40_SWIZZLED_SURFACE 0x0000309e - - - -#define NV40TCL 0x00004097 - -#define NV40TCL_REF_CNT 0x00000050 -#define NV40TCL_NOP 0x00000100 -#define NV40TCL_NOTIFY 0x00000104 -#define NV40TCL_DMA_NOTIFY 0x00000180 -#define NV40TCL_DMA_TEXTURE0 0x00000184 -#define NV40TCL_DMA_TEXTURE1 0x00000188 -#define NV40TCL_DMA_COLOR1 0x0000018c -#define NV40TCL_DMA_COLOR0 0x00000194 -#define NV40TCL_DMA_ZETA 0x00000198 -#define NV40TCL_DMA_VTXBUF0 0x0000019c -#define NV40TCL_DMA_VTXBUF1 0x000001a0 -#define NV40TCL_DMA_FENCE 0x000001a4 -#define NV40TCL_DMA_QUERY 0x000001a8 -#define NV40TCL_DMA_UNK01AC 0x000001ac -#define NV40TCL_DMA_UNK01B0 0x000001b0 -#define NV40TCL_DMA_COLOR2 0x000001b4 -#define NV40TCL_DMA_COLOR3 0x000001b8 -#define NV40TCL_RT_HORIZ 0x00000200 -#define NV40TCL_RT_HORIZ_W_SHIFT 16 -#define NV40TCL_RT_HORIZ_W_MASK 0xffff0000 -#define NV40TCL_RT_HORIZ_X_SHIFT 0 -#define NV40TCL_RT_HORIZ_X_MASK 0x0000ffff -#define NV40TCL_RT_VERT 0x00000204 -#define NV40TCL_RT_VERT_H_SHIFT 16 -#define NV40TCL_RT_VERT_H_MASK 0xffff0000 -#define NV40TCL_RT_VERT_Y_SHIFT 0 -#define NV40TCL_RT_VERT_Y_MASK 0x0000ffff -#define NV40TCL_RT_FORMAT 0x00000208 -#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT 24 -#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_MASK 0xff000000 -#define NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT 16 -#define NV40TCL_RT_FORMAT_LOG2_WIDTH_MASK 0x00ff0000 -#define NV40TCL_RT_FORMAT_TYPE_SHIFT 8 -#define NV40TCL_RT_FORMAT_TYPE_MASK 0x00000f00 -#define NV40TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 -#define NV40TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 -#define NV40TCL_RT_FORMAT_ZETA_SHIFT 5 -#define NV40TCL_RT_FORMAT_ZETA_MASK 0x000000e0 -#define NV40TCL_RT_FORMAT_ZETA_Z16 0x00000020 -#define NV40TCL_RT_FORMAT_ZETA_Z24S8 0x00000040 -#define NV40TCL_RT_FORMAT_COLOR_SHIFT 0 -#define NV40TCL_RT_FORMAT_COLOR_MASK 0x0000001f -#define NV40TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 -#define NV40TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 -#define NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 -#define NV40TCL_RT_FORMAT_COLOR_B8 0x00000009 -#define NV40TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f -#define NV40TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 -#define NV40TCL_COLOR0_PITCH 0x0000020c -#define NV40TCL_COLOR0_OFFSET 0x00000210 -#define NV40TCL_ZETA_OFFSET 0x00000214 -#define NV40TCL_COLOR1_OFFSET 0x00000218 -#define NV40TCL_COLOR1_PITCH 0x0000021c -#define NV40TCL_RT_ENABLE 0x00000220 -#define NV40TCL_RT_ENABLE_MRT (1 << 4) -#define NV40TCL_RT_ENABLE_COLOR3 (1 << 3) -#define NV40TCL_RT_ENABLE_COLOR2 (1 << 2) -#define NV40TCL_RT_ENABLE_COLOR1 (1 << 1) -#define NV40TCL_RT_ENABLE_COLOR0 (1 << 0) -#define NV40TCL_ZETA_PITCH 0x0000022c -#define NV40TCL_COLOR2_PITCH 0x00000280 -#define NV40TCL_COLOR3_PITCH 0x00000284 -#define NV40TCL_COLOR2_OFFSET 0x00000288 -#define NV40TCL_COLOR3_OFFSET 0x0000028c -#define NV40TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8)) -#define NV40TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 -#define NV40TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8)) -#define NV40TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 -#define NV40TCL_DITHER_ENABLE 0x00000300 -#define NV40TCL_ALPHA_TEST_ENABLE 0x00000304 -#define NV40TCL_ALPHA_TEST_FUNC 0x00000308 -#define NV40TCL_ALPHA_TEST_FUNC_NEVER 0x00000200 -#define NV40TCL_ALPHA_TEST_FUNC_LESS 0x00000201 -#define NV40TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202 -#define NV40TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203 -#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 -#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 -#define NV40TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 -#define NV40TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206 -#define NV40TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207 -#define NV40TCL_ALPHA_TEST_REF 0x0000030c -#define NV40TCL_BLEND_ENABLE 0x00000310 -#define NV40TCL_BLEND_FUNC_SRC 0x00000314 -#define NV40TCL_BLEND_FUNC_SRC_RGB_SHIFT 0 -#define NV40TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff -#define NV40TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 -#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 -#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 -#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 -#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 -#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 -#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 -#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 -#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 -#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 -#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000 -#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 -#define NV40TCL_BLEND_FUNC_DST 0x00000318 -#define NV40TCL_BLEND_FUNC_DST_RGB_SHIFT 0 -#define NV40TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff -#define NV40TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 -#define NV40TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 -#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 -#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 -#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV40TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 -#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV40TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 -#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 -#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 -#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 -#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 -#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 -#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 -#define NV40TCL_BLEND_COLOR 0x0000031c -#define NV40TCL_BLEND_EQUATION 0x00000320 -#define NV40TCL_BLEND_EQUATION_RGB_SHIFT 0 -#define NV40TCL_BLEND_EQUATION_RGB_MASK 0x0000ffff -#define NV40TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 -#define NV40TCL_BLEND_EQUATION_RGB_MIN 0x00008007 -#define NV40TCL_BLEND_EQUATION_RGB_MAX 0x00008008 -#define NV40TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a -#define NV40TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b -#define NV40TCL_BLEND_EQUATION_ALPHA_SHIFT 16 -#define NV40TCL_BLEND_EQUATION_ALPHA_MASK 0xffff0000 -#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x80060000 -#define NV40TCL_BLEND_EQUATION_ALPHA_MIN 0x80070000 -#define NV40TCL_BLEND_EQUATION_ALPHA_MAX 0x80080000 -#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x800a0000 -#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x800b0000 -#define NV40TCL_COLOR_MASK 0x00000324 -#define NV40TCL_COLOR_MASK_BUFFER0_B_SHIFT 0 -#define NV40TCL_COLOR_MASK_BUFFER0_B_MASK 0x000000ff -#define NV40TCL_COLOR_MASK_BUFFER0_G_SHIFT 8 -#define NV40TCL_COLOR_MASK_BUFFER0_G_MASK 0x0000ff00 -#define NV40TCL_COLOR_MASK_BUFFER0_R_SHIFT 16 -#define NV40TCL_COLOR_MASK_BUFFER0_R_MASK 0x00ff0000 -#define NV40TCL_COLOR_MASK_BUFFER0_A_SHIFT 24 -#define NV40TCL_COLOR_MASK_BUFFER0_A_MASK 0xff000000 -#define NV40TCL_STENCIL_FRONT_ENABLE 0x00000328 -#define NV40TCL_STENCIL_FRONT_MASK 0x0000032c -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC 0x00000330 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 -#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 -#define NV40TCL_STENCIL_FRONT_FUNC_REF 0x00000334 -#define NV40TCL_STENCIL_FRONT_FUNC_MASK 0x00000338 -#define NV40TCL_STENCIL_FRONT_OP_FAIL 0x0000033c -#define NV40TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 -#define NV40TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a -#define NV40TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 -#define NV40TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 -#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 -#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 -#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 -#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL 0x00000340 -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV40TCL_STENCIL_FRONT_OP_ZPASS 0x00000344 -#define NV40TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 -#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a -#define NV40TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 -#define NV40TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 -#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 -#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 -#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV40TCL_STENCIL_BACK_ENABLE 0x00000348 -#define NV40TCL_STENCIL_BACK_MASK 0x0000034c -#define NV40TCL_STENCIL_BACK_FUNC_FUNC 0x00000350 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 -#define NV40TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 -#define NV40TCL_STENCIL_BACK_FUNC_REF 0x00000354 -#define NV40TCL_STENCIL_BACK_FUNC_MASK 0x00000358 -#define NV40TCL_STENCIL_BACK_OP_FAIL 0x0000035c -#define NV40TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 -#define NV40TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a -#define NV40TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 -#define NV40TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 -#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 -#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 -#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 -#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 -#define NV40TCL_STENCIL_BACK_OP_ZFAIL 0x00000360 -#define NV40TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 -#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a -#define NV40TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 -#define NV40TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 -#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 -#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 -#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV40TCL_STENCIL_BACK_OP_ZPASS 0x00000364 -#define NV40TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 -#define NV40TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a -#define NV40TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 -#define NV40TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 -#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 -#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 -#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV40TCL_SHADE_MODEL 0x00000368 -#define NV40TCL_SHADE_MODEL_FLAT 0x00001d00 -#define NV40TCL_SHADE_MODEL_SMOOTH 0x00001d01 -#define NV40TCL_MRT_COLOR_MASK 0x00000370 -#define NV40TCL_MRT_COLOR_MASK_BUFFER1_A (1 << 4) -#define NV40TCL_MRT_COLOR_MASK_BUFFER1_R (1 << 5) -#define NV40TCL_MRT_COLOR_MASK_BUFFER1_G (1 << 6) -#define NV40TCL_MRT_COLOR_MASK_BUFFER1_B (1 << 7) -#define NV40TCL_MRT_COLOR_MASK_BUFFER2_A (1 << 8) -#define NV40TCL_MRT_COLOR_MASK_BUFFER2_R (1 << 9) -#define NV40TCL_MRT_COLOR_MASK_BUFFER2_G (1 << 10) -#define NV40TCL_MRT_COLOR_MASK_BUFFER2_B (1 << 11) -#define NV40TCL_MRT_COLOR_MASK_BUFFER3_A (1 << 12) -#define NV40TCL_MRT_COLOR_MASK_BUFFER3_R (1 << 13) -#define NV40TCL_MRT_COLOR_MASK_BUFFER3_G (1 << 14) -#define NV40TCL_MRT_COLOR_MASK_BUFFER3_B (1 << 15) -#define NV40TCL_COLOR_LOGIC_OP_ENABLE 0x00000374 -#define NV40TCL_COLOR_LOGIC_OP 0x00000378 -#define NV40TCL_COLOR_LOGIC_OP_CLEAR 0x00001500 -#define NV40TCL_COLOR_LOGIC_OP_AND 0x00001501 -#define NV40TCL_COLOR_LOGIC_OP_AND_REVERSE 0x00001502 -#define NV40TCL_COLOR_LOGIC_OP_COPY 0x00001503 -#define NV40TCL_COLOR_LOGIC_OP_AND_INVERTED 0x00001504 -#define NV40TCL_COLOR_LOGIC_OP_NOOP 0x00001505 -#define NV40TCL_COLOR_LOGIC_OP_XOR 0x00001506 -#define NV40TCL_COLOR_LOGIC_OP_OR 0x00001507 -#define NV40TCL_COLOR_LOGIC_OP_NOR 0x00001508 -#define NV40TCL_COLOR_LOGIC_OP_EQUIV 0x00001509 -#define NV40TCL_COLOR_LOGIC_OP_INVERT 0x0000150a -#define NV40TCL_COLOR_LOGIC_OP_OR_REVERSE 0x0000150b -#define NV40TCL_COLOR_LOGIC_OP_COPY_INVERTED 0x0000150c -#define NV40TCL_COLOR_LOGIC_OP_OR_INVERTED 0x0000150d -#define NV40TCL_COLOR_LOGIC_OP_NAND 0x0000150e -#define NV40TCL_COLOR_LOGIC_OP_SET 0x0000150f -#define NV40TCL_DEPTH_RANGE_NEAR 0x00000394 -#define NV40TCL_DEPTH_RANGE_FAR 0x00000398 -#define NV40TCL_LINE_WIDTH 0x000003b8 -#define NV40TCL_LINE_SMOOTH_ENABLE 0x000003bc -#define NV40TCL_UNK03C0(x) (0x000003c0+((x)*4)) -#define NV40TCL_UNK03C0__SIZE 0x00000010 -#define NV40TCL_UNK0400(x) (0x00000400+((x)*4)) -#define NV40TCL_UNK0400__SIZE 0x00000010 -#define NV40TCL_UNK0440(x) (0x00000440+((x)*4)) -#define NV40TCL_UNK0440__SIZE 0x00000020 -#define NV40TCL_SCISSOR_HORIZ 0x000008c0 -#define NV40TCL_SCISSOR_HORIZ_X_SHIFT 0 -#define NV40TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff -#define NV40TCL_SCISSOR_HORIZ_W_SHIFT 16 -#define NV40TCL_SCISSOR_HORIZ_W_MASK 0xffff0000 -#define NV40TCL_SCISSOR_VERT 0x000008c4 -#define NV40TCL_SCISSOR_VERT_Y_SHIFT 0 -#define NV40TCL_SCISSOR_VERT_Y_MASK 0x0000ffff -#define NV40TCL_SCISSOR_VERT_H_SHIFT 16 -#define NV40TCL_SCISSOR_VERT_H_MASK 0xffff0000 -#define NV40TCL_FOG_MODE 0x000008cc -#define NV40TCL_FOG_EQUATION_CONSTANT 0x000008d0 -#define NV40TCL_FOG_EQUATION_LINEAR 0x000008d4 -#define NV40TCL_FOG_EQUATION_QUADRATIC 0x000008d8 -#define NV40TCL_FP_ADDRESS 0x000008e4 -#define NV40TCL_FP_ADDRESS_OFFSET_SHIFT 8 -#define NV40TCL_FP_ADDRESS_OFFSET_MASK 0xffffff00 -#define NV40TCL_FP_ADDRESS_DMA1 (1 << 1) -#define NV40TCL_FP_ADDRESS_DMA0 (1 << 0) -#define NV40TCL_VIEWPORT_HORIZ 0x00000a00 -#define NV40TCL_VIEWPORT_HORIZ_W_SHIFT 16 -#define NV40TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 -#define NV40TCL_VIEWPORT_HORIZ_X_SHIFT 0 -#define NV40TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff -#define NV40TCL_VIEWPORT_VERT 0x00000a04 -#define NV40TCL_VIEWPORT_VERT_H_SHIFT 16 -#define NV40TCL_VIEWPORT_VERT_H_MASK 0xffff0000 -#define NV40TCL_VIEWPORT_VERT_Y_SHIFT 0 -#define NV40TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff -#define NV40TCL_VIEWPORT_TRANSLATE_X 0x00000a20 -#define NV40TCL_VIEWPORT_TRANSLATE_Y 0x00000a24 -#define NV40TCL_VIEWPORT_TRANSLATE_Z 0x00000a28 -#define NV40TCL_VIEWPORT_TRANSLATE_W 0x00000a2c -#define NV40TCL_VIEWPORT_SCALE_X 0x00000a30 -#define NV40TCL_VIEWPORT_SCALE_Y 0x00000a34 -#define NV40TCL_VIEWPORT_SCALE_Z 0x00000a38 -#define NV40TCL_VIEWPORT_SCALE_W 0x00000a3c -#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a60 -#define NV40TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 -#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a68 -#define NV40TCL_DEPTH_FUNC 0x00000a6c -#define NV40TCL_DEPTH_FUNC_NEVER 0x00000200 -#define NV40TCL_DEPTH_FUNC_LESS 0x00000201 -#define NV40TCL_DEPTH_FUNC_EQUAL 0x00000202 -#define NV40TCL_DEPTH_FUNC_LEQUAL 0x00000203 -#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204 -#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204 -#define NV40TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 -#define NV40TCL_DEPTH_FUNC_GEQUAL 0x00000206 -#define NV40TCL_DEPTH_FUNC_ALWAYS 0x00000207 -#define NV40TCL_DEPTH_WRITE_ENABLE 0x00000a70 -#define NV40TCL_DEPTH_TEST_ENABLE 0x00000a74 -#define NV40TCL_POLYGON_OFFSET_FACTOR 0x00000a78 -#define NV40TCL_POLYGON_OFFSET_UNITS 0x00000a7c -#define NV40TCL_UNK0B40(x) (0x00000b40+((x)*4)) -#define NV40TCL_UNK0B40__SIZE 0x00000008 -#define NV40TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) -#define NV40TCL_VP_UPLOAD_INST__SIZE 0x00000004 -#define NV40TCL_CLIP_PLANE_ENABLE 0x00001478 -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 2) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 6) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 10) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 14) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 18) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 22) -#define NV40TCL_POLYGON_STIPPLE_ENABLE 0x0000147c -#define NV40TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) -#define NV40TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 -#define NV40TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16)) -#define NV40TCL_VTX_ATTR_3F_X__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16)) -#define NV40TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16)) -#define NV40TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 -#define NV40TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4)) -#define NV40TCL_VTXBUF_ADDRESS__SIZE 0x00000010 -#define NV40TCL_VTXBUF_ADDRESS_DMA1 (1 << 31) -#define NV40TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0 -#define NV40TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff -#define NV40TCL_VTX_CACHE_INVALIDATE 0x00001714 -#define NV40TCL_VTXFMT(x) (0x00001740+((x)*4)) -#define NV40TCL_VTXFMT__SIZE 0x00000010 -#define NV40TCL_VTXFMT_TYPE_SHIFT 0 -#define NV40TCL_VTXFMT_TYPE_MASK 0x0000000f -#define NV40TCL_VTXFMT_TYPE_FLOAT 0x00000002 -#define NV40TCL_VTXFMT_TYPE_UBYTE 0x00000004 -#define NV40TCL_VTXFMT_SIZE_SHIFT 4 -#define NV40TCL_VTXFMT_SIZE_MASK 0x000000f0 -#define NV40TCL_VTXFMT_STRIDE_SHIFT 8 -#define NV40TCL_VTXFMT_STRIDE_MASK 0x0000ff00 -#define NV40TCL_QUERY_RESET 0x000017c8 -#define NV40TCL_QUERY_UNK17CC 0x000017cc -#define NV40TCL_QUERY_GET 0x00001800 -#define NV40TCL_QUERY_GET_UNK24_SHIFT 24 -#define NV40TCL_QUERY_GET_UNK24_MASK 0xff000000 -#define NV40TCL_QUERY_GET_OFFSET_SHIFT 0 -#define NV40TCL_QUERY_GET_OFFSET_MASK 0x00ffffff -#define NV40TCL_BEGIN_END 0x00001808 -#define NV40TCL_BEGIN_END_STOP 0x00000000 -#define NV40TCL_BEGIN_END_POINTS 0x00000001 -#define NV40TCL_BEGIN_END_LINES 0x00000002 -#define NV40TCL_BEGIN_END_LINE_LOOP 0x00000003 -#define NV40TCL_BEGIN_END_LINE_STRIP 0x00000004 -#define NV40TCL_BEGIN_END_TRIANGLES 0x00000005 -#define NV40TCL_BEGIN_END_TRIANGLE_STRIP 0x00000006 -#define NV40TCL_BEGIN_END_TRIANGLE_FAN 0x00000007 -#define NV40TCL_BEGIN_END_QUADS 0x00000008 -#define NV40TCL_BEGIN_END_QUAD_STRIP 0x00000009 -#define NV40TCL_BEGIN_END_POLYGON 0x0000000a -#define NV40TCL_VB_ELEMENT_U16 0x0000180c -#define NV40TCL_VB_ELEMENT_U16_1_SHIFT 16 -#define NV40TCL_VB_ELEMENT_U16_1_MASK 0xffff0000 -#define NV40TCL_VB_ELEMENT_U16_0_SHIFT 0 -#define NV40TCL_VB_ELEMENT_U16_0_MASK 0x0000ffff -#define NV40TCL_VB_ELEMENT_U32 0x00001810 -#define NV40TCL_VB_VERTEX_BATCH 0x00001814 -#define NV40TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24 -#define NV40TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000 -#define NV40TCL_VB_VERTEX_BATCH_START_SHIFT 0 -#define NV40TCL_VB_VERTEX_BATCH_START_MASK 0x00ffffff -#define NV40TCL_VERTEX_DATA 0x00001818 -#define NV40TCL_IDXBUF_ADDRESS 0x0000181c -#define NV40TCL_IDXBUF_FORMAT 0x00001820 -#define NV40TCL_IDXBUF_FORMAT_TYPE_SHIFT 4 -#define NV40TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0 -#define NV40TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000 -#define NV40TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010 -#define NV40TCL_IDXBUF_FORMAT_DMA1 (1 << 0) -#define NV40TCL_VB_INDEX_BATCH 0x00001824 -#define NV40TCL_VB_INDEX_BATCH_COUNT_SHIFT 24 -#define NV40TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000 -#define NV40TCL_VB_INDEX_BATCH_START_SHIFT 0 -#define NV40TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff -#define NV40TCL_POLYGON_MODE_FRONT 0x00001828 -#define NV40TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 -#define NV40TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 -#define NV40TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 -#define NV40TCL_POLYGON_MODE_BACK 0x0000182c -#define NV40TCL_POLYGON_MODE_BACK_POINT 0x00001b00 -#define NV40TCL_POLYGON_MODE_BACK_LINE 0x00001b01 -#define NV40TCL_POLYGON_MODE_BACK_FILL 0x00001b02 -#define NV40TCL_CULL_FACE 0x00001830 -#define NV40TCL_CULL_FACE_FRONT 0x00000404 -#define NV40TCL_CULL_FACE_BACK 0x00000405 -#define NV40TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 -#define NV40TCL_FRONT_FACE 0x00001834 -#define NV40TCL_FRONT_FACE_CW 0x00000900 -#define NV40TCL_FRONT_FACE_CCW 0x00000901 -#define NV40TCL_POLYGON_SMOOTH_ENABLE 0x00001838 -#define NV40TCL_CULL_FACE_ENABLE 0x0000183c -#define NV40TCL_TEX_SIZE1(x) (0x00001840+((x)*4)) -#define NV40TCL_TEX_SIZE1__SIZE 0x00000008 -#define NV40TCL_TEX_SIZE1_DEPTH_SHIFT 20 -#define NV40TCL_TEX_SIZE1_DEPTH_MASK 0xfff00000 -#define NV40TCL_TEX_SIZE1_PITCH_SHIFT 0 -#define NV40TCL_TEX_SIZE1_PITCH_MASK 0x0000ffff -#define NV40TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8)) -#define NV40TCL_VTX_ATTR_2F_X__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8)) -#define NV40TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4)) -#define NV40TCL_VTX_ATTR_2I__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_2I_Y_SHIFT 16 -#define NV40TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 -#define NV40TCL_VTX_ATTR_2I_X_SHIFT 0 -#define NV40TCL_VTX_ATTR_2I_X_MASK 0x0000ffff -#define NV40TCL_VTX_ATTR_4I_0(x) (0x00001900+((x)*8)) -#define NV40TCL_VTX_ATTR_4I_0__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_4I_0_Y_SHIFT 16 -#define NV40TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000 -#define NV40TCL_VTX_ATTR_4I_0_X_SHIFT 0 -#define NV40TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff -#define NV40TCL_VTX_ATTR_4I_1(x) (0x00001904+((x)*8)) -#define NV40TCL_VTX_ATTR_4I_1__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_4I_1_W_SHIFT 16 -#define NV40TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000 -#define NV40TCL_VTX_ATTR_4I_1_Z_SHIFT 0 -#define NV40TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff -#define NV40TCL_TEX_OFFSET(x) (0x00001a00+((x)*32)) -#define NV40TCL_TEX_OFFSET__SIZE 0x00000010 -#define NV40TCL_TEX_FORMAT(x) (0x00001a04+((x)*32)) -#define NV40TCL_TEX_FORMAT__SIZE 0x00000010 -#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT 16 -#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_MASK 0x000f0000 -#define NV40TCL_TEX_FORMAT_RECT (1 << 14) -#define NV40TCL_TEX_FORMAT_LINEAR (1 << 13) -#define NV40TCL_TEX_FORMAT_FORMAT_SHIFT 8 -#define NV40TCL_TEX_FORMAT_FORMAT_MASK 0x00001f00 -#define NV40TCL_TEX_FORMAT_FORMAT_L8 0x00000100 -#define NV40TCL_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200 -#define NV40TCL_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000300 -#define NV40TCL_TEX_FORMAT_FORMAT_R5G6B5 0x00000400 -#define NV40TCL_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000500 -#define NV40TCL_TEX_FORMAT_FORMAT_DXT1 0x00000600 -#define NV40TCL_TEX_FORMAT_FORMAT_DXT3 0x00000700 -#define NV40TCL_TEX_FORMAT_FORMAT_DXT5 0x00000800 -#define NV40TCL_TEX_FORMAT_FORMAT_A8L8 0x00000b00 -#define NV40TCL_TEX_FORMAT_FORMAT_Z24 0x00001000 -#define NV40TCL_TEX_FORMAT_FORMAT_Z16 0x00001200 -#define NV40TCL_TEX_FORMAT_FORMAT_HILO8 0x00001800 -#define NV40TCL_TEX_FORMAT_FORMAT_RGBA16F 0x00001a00 -#define NV40TCL_TEX_FORMAT_FORMAT_RGBA32F 0x00001b00 -#define NV40TCL_TEX_FORMAT_DIMS_SHIFT 4 -#define NV40TCL_TEX_FORMAT_DIMS_MASK 0x000000f0 -#define NV40TCL_TEX_FORMAT_DIMS_1D 0x00000010 -#define NV40TCL_TEX_FORMAT_DIMS_2D 0x00000020 -#define NV40TCL_TEX_FORMAT_DIMS_3D 0x00000030 -#define NV40TCL_TEX_FORMAT_NO_BORDER (1 << 3) -#define NV40TCL_TEX_FORMAT_CUBIC (1 << 2) -#define NV40TCL_TEX_FORMAT_DMA1 (1 << 1) -#define NV40TCL_TEX_FORMAT_DMA0 (1 << 0) -#define NV40TCL_TEX_WRAP(x) (0x00001a08+((x)*32)) -#define NV40TCL_TEX_WRAP__SIZE 0x00000010 -#define NV40TCL_TEX_WRAP_S_SHIFT 0 -#define NV40TCL_TEX_WRAP_S_MASK 0x000000ff -#define NV40TCL_TEX_WRAP_S_REPEAT 0x00000001 -#define NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT 0x00000002 -#define NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE 0x00000003 -#define NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER 0x00000004 -#define NV40TCL_TEX_WRAP_S_CLAMP 0x00000005 -#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE 0x00000006 -#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER 0x00000007 -#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP 0x00000008 -#define NV40TCL_TEX_WRAP_T_SHIFT 8 -#define NV40TCL_TEX_WRAP_T_MASK 0x0000ff00 -#define NV40TCL_TEX_WRAP_T_REPEAT 0x00000100 -#define NV40TCL_TEX_WRAP_T_MIRRORED_REPEAT 0x00000200 -#define NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE 0x00000300 -#define NV40TCL_TEX_WRAP_T_CLAMP_TO_BORDER 0x00000400 -#define NV40TCL_TEX_WRAP_T_CLAMP 0x00000500 -#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_EDGE 0x00000600 -#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_BORDER 0x00000700 -#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP 0x00000800 -#define NV40TCL_TEX_WRAP_R_SHIFT 16 -#define NV40TCL_TEX_WRAP_R_MASK 0x00ff0000 -#define NV40TCL_TEX_WRAP_R_REPEAT 0x00010000 -#define NV40TCL_TEX_WRAP_R_MIRRORED_REPEAT 0x00020000 -#define NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE 0x00030000 -#define NV40TCL_TEX_WRAP_R_CLAMP_TO_BORDER 0x00040000 -#define NV40TCL_TEX_WRAP_R_CLAMP 0x00050000 -#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_EDGE 0x00060000 -#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_BORDER 0x00070000 -#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP 0x00080000 -#define NV40TCL_TEX_WRAP_RCOMP_SHIFT 28 -#define NV40TCL_TEX_WRAP_RCOMP_MASK 0xf0000000 -#define NV40TCL_TEX_WRAP_RCOMP_NEVER 0x00000000 -#define NV40TCL_TEX_WRAP_RCOMP_GREATER 0x10000000 -#define NV40TCL_TEX_WRAP_RCOMP_EQUAL 0x20000000 -#define NV40TCL_TEX_WRAP_RCOMP_GEQUAL 0x30000000 -#define NV40TCL_TEX_WRAP_RCOMP_LESS 0x40000000 -#define NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL 0x50000000 -#define NV40TCL_TEX_WRAP_RCOMP_LEQUAL 0x60000000 -#define NV40TCL_TEX_WRAP_RCOMP_ALWAYS 0x70000000 -#define NV40TCL_TEX_ENABLE(x) (0x00001a0c+((x)*32)) -#define NV40TCL_TEX_ENABLE__SIZE 0x00000010 -#define NV40TCL_TEX_ENABLE_ENABLE (1 << 31) -#define NV40TCL_TEX_ENABLE_ANISO_SHIFT 4 -#define NV40TCL_TEX_ENABLE_ANISO_MASK 0x000000f0 -#define NV40TCL_TEX_ENABLE_ANISO_NONE 0x00000000 -#define NV40TCL_TEX_ENABLE_ANISO_2X 0x00000010 -#define NV40TCL_TEX_ENABLE_ANISO_4X 0x00000020 -#define NV40TCL_TEX_ENABLE_ANISO_6X 0x00000030 -#define NV40TCL_TEX_ENABLE_ANISO_8X 0x00000040 -#define NV40TCL_TEX_ENABLE_ANISO_10X 0x00000050 -#define NV40TCL_TEX_ENABLE_ANISO_12X 0x00000060 -#define NV40TCL_TEX_ENABLE_ANISO_16X 0x00000070 -#define NV40TCL_TEX_SWIZZLE(x) (0x00001a10+((x)*32)) -#define NV40TCL_TEX_SWIZZLE__SIZE 0x00000010 -#define NV40TCL_TEX_SWIZZLE_S0_X_SHIFT 14 -#define NV40TCL_TEX_SWIZZLE_S0_X_MASK 0x0000c000 -#define NV40TCL_TEX_SWIZZLE_S0_X_ZERO 0x00000000 -#define NV40TCL_TEX_SWIZZLE_S0_X_ONE 0x00004000 -#define NV40TCL_TEX_SWIZZLE_S0_X_S1 0x00008000 -#define NV40TCL_TEX_SWIZZLE_S0_Y_SHIFT 12 -#define NV40TCL_TEX_SWIZZLE_S0_Y_MASK 0x00003000 -#define NV40TCL_TEX_SWIZZLE_S0_Y_ZERO 0x00000000 -#define NV40TCL_TEX_SWIZZLE_S0_Y_ONE 0x00001000 -#define NV40TCL_TEX_SWIZZLE_S0_Y_S1 0x00002000 -#define NV40TCL_TEX_SWIZZLE_S0_Z_SHIFT 10 -#define NV40TCL_TEX_SWIZZLE_S0_Z_MASK 0x00000c00 -#define NV40TCL_TEX_SWIZZLE_S0_Z_ZERO 0x00000000 -#define NV40TCL_TEX_SWIZZLE_S0_Z_ONE 0x00000400 -#define NV40TCL_TEX_SWIZZLE_S0_Z_S1 0x00000800 -#define NV40TCL_TEX_SWIZZLE_S0_W_SHIFT 8 -#define NV40TCL_TEX_SWIZZLE_S0_W_MASK 0x00000300 -#define NV40TCL_TEX_SWIZZLE_S0_W_ZERO 0x00000000 -#define NV40TCL_TEX_SWIZZLE_S0_W_ONE 0x00000100 -#define NV40TCL_TEX_SWIZZLE_S0_W_S1 0x00000200 -#define NV40TCL_TEX_SWIZZLE_S1_X_SHIFT 6 -#define NV40TCL_TEX_SWIZZLE_S1_X_MASK 0x000000c0 -#define NV40TCL_TEX_SWIZZLE_S1_X_W 0x00000000 -#define NV40TCL_TEX_SWIZZLE_S1_X_Z 0x00000040 -#define NV40TCL_TEX_SWIZZLE_S1_X_Y 0x00000080 -#define NV40TCL_TEX_SWIZZLE_S1_X_X 0x000000c0 -#define NV40TCL_TEX_SWIZZLE_S1_Y_SHIFT 4 -#define NV40TCL_TEX_SWIZZLE_S1_Y_MASK 0x00000030 -#define NV40TCL_TEX_SWIZZLE_S1_Y_W 0x00000000 -#define NV40TCL_TEX_SWIZZLE_S1_Y_Z 0x00000010 -#define NV40TCL_TEX_SWIZZLE_S1_Y_Y 0x00000020 -#define NV40TCL_TEX_SWIZZLE_S1_Y_X 0x00000030 -#define NV40TCL_TEX_SWIZZLE_S1_Z_SHIFT 2 -#define NV40TCL_TEX_SWIZZLE_S1_Z_MASK 0x0000000c -#define NV40TCL_TEX_SWIZZLE_S1_Z_W 0x00000000 -#define NV40TCL_TEX_SWIZZLE_S1_Z_Z 0x00000004 -#define NV40TCL_TEX_SWIZZLE_S1_Z_Y 0x00000008 -#define NV40TCL_TEX_SWIZZLE_S1_Z_X 0x0000000c -#define NV40TCL_TEX_SWIZZLE_S1_W_SHIFT 0 -#define NV40TCL_TEX_SWIZZLE_S1_W_MASK 0x00000003 -#define NV40TCL_TEX_SWIZZLE_S1_W_W 0x00000000 -#define NV40TCL_TEX_SWIZZLE_S1_W_Z 0x00000001 -#define NV40TCL_TEX_SWIZZLE_S1_W_Y 0x00000002 -#define NV40TCL_TEX_SWIZZLE_S1_W_X 0x00000003 -#define NV40TCL_TEX_FILTER(x) (0x00001a14+((x)*32)) -#define NV40TCL_TEX_FILTER__SIZE 0x00000010 -#define NV40TCL_TEX_FILTER_SIGNED_ALPHA (1 << 31) -#define NV40TCL_TEX_FILTER_SIGNED_RED (1 << 30) -#define NV40TCL_TEX_FILTER_SIGNED_GREEN (1 << 29) -#define NV40TCL_TEX_FILTER_SIGNED_BLUE (1 << 28) -#define NV40TCL_TEX_FILTER_MIN_SHIFT 16 -#define NV40TCL_TEX_FILTER_MIN_MASK 0x000f0000 -#define NV40TCL_TEX_FILTER_MIN_NEAREST 0x00010000 -#define NV40TCL_TEX_FILTER_MIN_LINEAR 0x00020000 -#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST 0x00030000 -#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST 0x00040000 -#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR 0x00050000 -#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR 0x00060000 -#define NV40TCL_TEX_FILTER_MAG_SHIFT 24 -#define NV40TCL_TEX_FILTER_MAG_MASK 0x0f000000 -#define NV40TCL_TEX_FILTER_MAG_NEAREST 0x01000000 -#define NV40TCL_TEX_FILTER_MAG_LINEAR 0x02000000 -#define NV40TCL_TEX_SIZE0(x) (0x00001a18+((x)*32)) -#define NV40TCL_TEX_SIZE0__SIZE 0x00000010 -#define NV40TCL_TEX_SIZE0_H_SHIFT 0 -#define NV40TCL_TEX_SIZE0_H_MASK 0x0000ffff -#define NV40TCL_TEX_SIZE0_W_SHIFT 16 -#define NV40TCL_TEX_SIZE0_W_MASK 0xffff0000 -#define NV40TCL_TEX_BORDER_COLOR(x) (0x00001a1c+((x)*32)) -#define NV40TCL_TEX_BORDER_COLOR__SIZE 0x00000010 -#define NV40TCL_TEX_BORDER_COLOR_B_SHIFT 0 -#define NV40TCL_TEX_BORDER_COLOR_B_MASK 0x000000ff -#define NV40TCL_TEX_BORDER_COLOR_G_SHIFT 8 -#define NV40TCL_TEX_BORDER_COLOR_G_MASK 0x0000ff00 -#define NV40TCL_TEX_BORDER_COLOR_R_SHIFT 16 -#define NV40TCL_TEX_BORDER_COLOR_R_MASK 0x00ff0000 -#define NV40TCL_TEX_BORDER_COLOR_A_SHIFT 24 -#define NV40TCL_TEX_BORDER_COLOR_A_MASK 0xff000000 -#define NV40TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16)) -#define NV40TCL_VTX_ATTR_4F_X__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16)) -#define NV40TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16)) -#define NV40TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16)) -#define NV40TCL_VTX_ATTR_4F_W__SIZE 0x00000010 -#define NV40TCL_FP_CONTROL 0x00001d60 -#define NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT 24 -#define NV40TCL_FP_CONTROL_TEMP_COUNT_MASK 0xff000000 -#define NV40TCL_FP_CONTROL_KIL (1 << 7) -#define NV40TCL_MULTISAMPLE_CONTROL 0x00001d7c -#define NV40TCL_CLEAR_VALUE_DEPTH 0x00001d8c -#define NV40TCL_CLEAR_VALUE_COLOR 0x00001d90 -#define NV40TCL_CLEAR_BUFFERS 0x00001d94 -#define NV40TCL_CLEAR_BUFFERS_COLOR_A (1 << 7) -#define NV40TCL_CLEAR_BUFFERS_COLOR_B (1 << 6) -#define NV40TCL_CLEAR_BUFFERS_COLOR_G (1 << 5) -#define NV40TCL_CLEAR_BUFFERS_COLOR_R (1 << 4) -#define NV40TCL_CLEAR_BUFFERS_STENCIL (1 << 1) -#define NV40TCL_CLEAR_BUFFERS_DEPTH (1 << 0) -#define NV40TCL_LINE_STIPPLE_ENABLE 0x00001db4 -#define NV40TCL_LINE_STIPPLE_PATTERN 0x00001db8 -#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0 -#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff -#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16 -#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000 -#define NV40TCL_VP_UPLOAD_FROM_ID 0x00001e9c -#define NV40TCL_VP_START_FROM_ID 0x00001ea0 -#define NV40TCL_POINT_SIZE 0x00001ee0 -#define NV40TCL_POINT_SPRITE 0x00001ee8 -#define NV40TCL_VP_UPLOAD_CONST_ID 0x00001efc -#define NV40TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16)) -#define NV40TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004 -#define NV40TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16)) -#define NV40TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004 -#define NV40TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16)) -#define NV40TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004 -#define NV40TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16)) -#define NV40TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004 -#define NV40TCL_TEX_CACHE_CTL 0x00001fd8 -#define NV40TCL_VP_ATTRIB_EN 0x00001ff0 -#define NV40TCL_VP_RESULT_EN 0x00001ff4 - - -#define NV44TCL 0x00004497 - - - -#define NV50_2D 0x0000502d - -#define NV50_2D_NOP 0x00000100 -#define NV50_2D_NOTIFY 0x00000104 -#define NV50_2D_DMA_NOTIFY 0x00000180 -#define NV50_2D_DMA_IN_MEMORY0 0x00000184 -#define NV50_2D_DMA_IN_MEMORY1 0x00000188 -#define NV50_2D_DMA_IN_MEMORY2 0x0000018c -#define NV50_2D_DST_FORMAT 0x00000200 -#define NV50_2D_DST_FORMAT_32BPP 0x000000cf -#define NV50_2D_DST_FORMAT_24BPP 0x000000e6 -#define NV50_2D_DST_FORMAT_16BPP 0x000000e8 -#define NV50_2D_DST_FORMAT_8BPP 0x000000f3 -#define NV50_2D_DST_FORMAT_15BPP 0x000000f8 -#define NV50_2D_DST_PITCH 0x00000214 -#define NV50_2D_DST_WIDTH 0x00000218 -#define NV50_2D_DST_HEIGHT 0x0000021c -#define NV50_2D_DST_ADDRESS_HIGH 0x00000220 -#define NV50_2D_DST_ADDRESS_LOW 0x00000224 -#define NV50_2D_SRC_FORMAT 0x00000230 -#define NV50_2D_SRC_FORMAT_32BPP 0x000000cf -#define NV50_2D_SRC_FORMAT_24BPP 0x000000e6 -#define NV50_2D_SRC_FORMAT_16BPP 0x000000e8 -#define NV50_2D_SRC_FORMAT_8BPP 0x000000f3 -#define NV50_2D_SRC_FORMAT_15BPP 0x000000f8 -#define NV50_2D_SRC_PITCH 0x00000244 -#define NV50_2D_SRC_WIDTH 0x00000248 -#define NV50_2D_SRC_HEIGHT 0x0000024c -#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250 -#define NV50_2D_SRC_ADDRESS_LOW 0x00000254 -#define NV50_2D_CLIP_X 0x00000280 -#define NV50_2D_CLIP_Y 0x00000284 -#define NV50_2D_CLIP_Z 0x00000288 -#define NV50_2D_CLIP_W 0x0000028c -#define NV50_2D_ROP 0x000002a0 -#define NV50_2D_OPERATION 0x000002ac -#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000 -#define NV50_2D_OPERATION_ROP_AND 0x00000001 -#define NV50_2D_OPERATION_BLEND_AND 0x00000002 -#define NV50_2D_OPERATION_SRCCOPY 0x00000003 -#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000004 -#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000005 -#define NV50_2D_PATTERN_FORMAT 0x000002e8 -#define NV50_2D_PATTERN_FORMAT_16BPP 0x00000000 -#define NV50_2D_PATTERN_FORMAT_15BPP 0x00000001 -#define NV50_2D_PATTERN_FORMAT_32BPP 0x00000002 -#define NV50_2D_PATTERN_FORMAT_8BPP 0x00000003 -#define NV50_2D_PATTERN_COLOR(x) (0x000002f0+((x)*4)) -#define NV50_2D_PATTERN_COLOR__SIZE 0x00000002 -#define NV50_2D_PATTERN_BITMAP(x) (0x000002f8+((x)*4)) -#define NV50_2D_PATTERN_BITMAP__SIZE 0x00000002 -#define NV50_2D_RECT_FORMAT 0x00000584 -#define NV50_2D_RECT_FORMAT_32BPP 0x000000cf -#define NV50_2D_RECT_FORMAT_24BPP 0x000000e6 -#define NV50_2D_RECT_FORMAT_16BPP 0x000000e8 -#define NV50_2D_RECT_FORMAT_8BPP 0x000000f3 -#define NV50_2D_RECT_FORMAT_15BPP 0x000000f8 -#define NV50_2D_RECT_COLOR 0x00000588 -#define NV50_2D_RECT_X1 0x00000600 -#define NV50_2D_RECT_Y1 0x00000604 -#define NV50_2D_RECT_X2 0x00000608 -#define NV50_2D_RECT_Y2 0x0000060c -#define NV50_2D_BLIT_DST_X 0x000008b0 -#define NV50_2D_BLIT_DST_Y 0x000008b4 -#define NV50_2D_BLIT_DST_W 0x000008b8 -#define NV50_2D_BLIT_DST_H 0x000008bc -#define NV50_2D_BLIT_SRC_X 0x000008d4 -#define NV50_2D_BLIT_SRC_Y 0x000008dc -#define NV50_2D_SIFC_UNK0800 0x00000800 -#define NV50_2D_SIFC_FORMAT 0x00000804 -#define NV50_2D_SIFC_FORMAT_32BPP 0x000000cf -#define NV50_2D_SIFC_FORMAT_24BPP 0x000000e6 -#define NV50_2D_SIFC_FORMAT_16BPP 0x000000e8 -#define NV50_2D_SIFC_FORMAT_8BPP 0x000000f3 -#define NV50_2D_SIFC_FORMAT_15BPP 0x000000f8 -#define NV50_2D_SIFC_WIDTH 0x00000838 -#define NV50_2D_SIFC_HEIGHT 0x0000083c -#define NV50_2D_SIFC_SCALE_UNK0840 0x00000840 -#define NV50_2D_SIFC_SCALE_UNK0844 0x00000844 -#define NV50_2D_SIFC_SCALE_UNK0848 0x00000848 -#define NV50_2D_SIFC_SCALE_UNK084C 0x0000084c -#define NV50_2D_SIFC_UNK0850 0x00000850 -#define NV50_2D_SIFC_DST_X 0x00000854 -#define NV50_2D_SIFC_UNK0858 0x00000858 -#define NV50_2D_SIFC_DST_Y 0x0000085c -#define NV50_2D_SIFC_DATA 0x00000860 - - -#define NV50_MEMORY_TO_MEMORY_FORMAT 0x00005039 - -#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x00000238 -#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c - - -#define NV50TCL 0x00005097 - -#define NV50TCL_NOP 0x00000100 -#define NV50TCL_NOTIFY 0x00000104 -#define NV50TCL_DMA_NOTIFY 0x00000180 -#define NV50TCL_DMA_IN_MEMORY0(x) (0x00000184+((x)*4)) -#define NV50TCL_DMA_IN_MEMORY0__SIZE 0x0000000b -#define NV50TCL_DMA_IN_MEMORY1(x) (0x000001c0+((x)*4)) -#define NV50TCL_DMA_IN_MEMORY1__SIZE 0x00000008 -#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32)) -#define NV50TCL_RT_ADDRESS_HIGH__SIZE 0x00000008 -#define NV50TCL_RT_ADDRESS_LOW(x) (0x00000204+((x)*32)) -#define NV50TCL_RT_ADDRESS_LOW__SIZE 0x00000008 -#define NV50TCL_RT_FORMAT(x) (0x00000208+((x)*32)) -#define NV50TCL_RT_FORMAT__SIZE 0x00000008 -#define NV50TCL_RT_UNK3(x) (0x0000020c+((x)*32)) -#define NV50TCL_RT_UNK3__SIZE 0x00000008 -#define NV50TCL_RT_UNK4(x) (0x00000210+((x)*32)) -#define NV50TCL_RT_UNK4__SIZE 0x00000008 -#define NV50TCL_VTX_ATTR_1F(x) (0x00000300+((x)*4)) -#define NV50TCL_VTX_ATTR_1F__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2F_X(x) (0x00000380+((x)*8)) -#define NV50TCL_VTX_ATTR_2F_X__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2F_Y(x) (0x00000384+((x)*8)) -#define NV50TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_3F_X(x) (0x00000400+((x)*16)) -#define NV50TCL_VTX_ATTR_3F_X__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_3F_Y(x) (0x00000404+((x)*16)) -#define NV50TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_3F_Z(x) (0x00000408+((x)*16)) -#define NV50TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_3F_W(x) (0x0000040c+((x)*16)) -#define NV50TCL_VTX_ATTR_3F_W__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4F_X(x) (0x00000500+((x)*16)) -#define NV50TCL_VTX_ATTR_4F_X__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4F_Y(x) (0x00000504+((x)*16)) -#define NV50TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4F_Z(x) (0x00000508+((x)*16)) -#define NV50TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4F_W(x) (0x0000050c+((x)*16)) -#define NV50TCL_VTX_ATTR_4F_W__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2I(x) (0x00000680+((x)*4)) -#define NV50TCL_VTX_ATTR_2I__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_2I_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_2I_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_2I_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4I_0(x) (0x00000700+((x)*8)) -#define NV50TCL_VTX_ATTR_4I_0__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4I_0_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4I_0_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4I_1(x) (0x00000704+((x)*8)) -#define NV50TCL_VTX_ATTR_4I_1__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4I_1_Z_SHIFT 0 -#define NV50TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4I_1_W_SHIFT 16 -#define NV50TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4NI_0(x) (0x00000780+((x)*8)) -#define NV50TCL_VTX_ATTR_4NI_0__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4NI_0_X_SHIFT 0 -#define NV50TCL_VTX_ATTR_4NI_0_X_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4NI_0_Y_SHIFT 16 -#define NV50TCL_VTX_ATTR_4NI_0_Y_MASK 0xffff0000 -#define NV50TCL_VTX_ATTR_4NI_1(x) (0x00000784+((x)*8)) -#define NV50TCL_VTX_ATTR_4NI_1__SIZE 0x00000010 -#define NV50TCL_VTX_ATTR_4NI_1_Z_SHIFT 0 -#define NV50TCL_VTX_ATTR_4NI_1_Z_MASK 0x0000ffff -#define NV50TCL_VTX_ATTR_4NI_1_W_SHIFT 16 -#define NV50TCL_VTX_ATTR_4NI_1_W_MASK 0xffff0000 -#define NV50TCL_VERTEX_ARRAY_FORMAT(x) (0x00000900+((x)*16)) -#define NV50TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 -#define NV50TCL_VIEWPORT_UNK0(x) (0x00000a00+((x)*4)) -#define NV50TCL_VIEWPORT_UNK0__SIZE 0x00000003 -#define NV50TCL_VIEWPORT_UNK1(x) (0x00000a0c+((x)*4)) -#define NV50TCL_VIEWPORT_UNK1__SIZE 0x00000003 -#define NV50TCL_VIEWPORT_HORIZ 0x00000c00 -#define NV50TCL_VIEWPORT_HORIZ_X_SHIFT 0 -#define NV50TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff -#define NV50TCL_VIEWPORT_HORIZ_W_SHIFT 16 -#define NV50TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 -#define NV50TCL_VIEWPORT_VERT 0x00000c04 -#define NV50TCL_VIEWPORT_VERT_Y_SHIFT 0 -#define NV50TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff -#define NV50TCL_VIEWPORT_VERT_H_SHIFT 16 -#define NV50TCL_VIEWPORT_VERT_H_MASK 0xffff0000 -#define NV50TCL_DEPTH_RANGE_NEAR 0x00000c08 -#define NV50TCL_DEPTH_RANGE_FAR 0x00000c0c -#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8)) -#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 -#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8)) -#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 -#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74 -#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78 -#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4)) -#define NV50TCL_CLEAR_COLOR__SIZE 0x00000004 -#define NV50TCL_CLEAR_DEPTH 0x00000d90 -#define NV50TCL_CLEAR_STENCIL 0x00000da0 -#define NV50TCL_POLYGON_MODE_FRONT 0x00000dac -#define NV50TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 -#define NV50TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 -#define NV50TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 -#define NV50TCL_POLYGON_MODE_BACK 0x00000db0 -#define NV50TCL_POLYGON_MODE_BACK_POINT 0x00001b00 -#define NV50TCL_POLYGON_MODE_BACK_LINE 0x00001b01 -#define NV50TCL_POLYGON_MODE_BACK_FILL 0x00001b02 -#define NV50TCL_POLYGON_SMOOTH_ENABLE 0x00000db4 -#define NV50TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 -#define NV50TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 -#define NV50TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 -#define NV50TCL_SCISSOR_HORIZ 0x00000e04 -#define NV50TCL_SCISSOR_HORIZ_L_SHIFT 0 -#define NV50TCL_SCISSOR_HORIZ_L_MASK 0x0000ffff -#define NV50TCL_SCISSOR_HORIZ_R_SHIFT 16 -#define NV50TCL_SCISSOR_HORIZ_R_MASK 0xffff0000 -#define NV50TCL_SCISSOR_VERT 0x00000e08 -#define NV50TCL_SCISSOR_VERT_T_SHIFT 0 -#define NV50TCL_SCISSOR_VERT_T_MASK 0x0000ffff -#define NV50TCL_SCISSOR_VERT_B_SHIFT 16 -#define NV50TCL_SCISSOR_VERT_B_MASK 0xffff0000 -#define NV50TCL_VP_UPLOAD_CONST_ID 0x00000f00 -#define NV50TCL_VP_UPLOAD_CONST(x) (0x00000f04+((x)*4)) -#define NV50TCL_VP_UPLOAD_CONST__SIZE 0x00000010 -#define NV50TCL_STENCIL_FRONT_FUNC_REF 0x00000f54 -#define NV50TCL_STENCIL_FRONT_MASK 0x00000f58 -#define NV50TCL_STENCIL_FRONT_FUNC_MASK 0x00000f5c -#define NV50TCL_GP_ADDRESS_HIGH 0x00000f70 -#define NV50TCL_GP_ADDRESS_LOW 0x00000f74 -#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c -#define NV50TCL_VP_ADDRESS_LOW 0x00000f80 -#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4 -#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8 -#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0 -#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4 -#define NV50TCL_UNKFF4 0x00000ff4 -#define NV50TCL_UNKFF4_W_SHIFT 16 -#define NV50TCL_UNKFF4_W_MASK 0xffff0000 -#define NV50TCL_UNKFF8 0x00000ff8 -#define NV50TCL_UNKFF8_H_SHIFT 16 -#define NV50TCL_UNKFF8_H_MASK 0xffff0000 -#define NV50TCL_RT_HORIZ(x) (0x00001240+((x)*8)) -#define NV50TCL_RT_HORIZ__SIZE 0x00000008 -#define NV50TCL_RT_VERT(x) (0x00001244+((x)*8)) -#define NV50TCL_RT_VERT__SIZE 0x00000008 -#define NV50TCL_DEPTH_TEST_ENABLE 0x000012cc -#define NV50TCL_SHADE_MODEL 0x000012d4 -#define NV50TCL_SHADE_MODEL_FLAT 0x00001d00 -#define NV50TCL_SHADE_MODEL_SMOOTH 0x00001d01 -#define NV50TCL_DEPTH_WRITE_ENABLE 0x000012e8 -#define NV50TCL_ALPHA_TEST_ENABLE 0x000012ec -#define NV50TCL_DEPTH_TEST_FUNC 0x0000130c -#define NV50TCL_DEPTH_TEST_FUNC_NEVER 0x00000200 -#define NV50TCL_DEPTH_TEST_FUNC_LESS 0x00000201 -#define NV50TCL_DEPTH_TEST_FUNC_EQUAL 0x00000202 -#define NV50TCL_DEPTH_TEST_FUNC_LEQUAL 0x00000203 -#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204 -#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204 -#define NV50TCL_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 -#define NV50TCL_DEPTH_TEST_FUNC_GEQUAL 0x00000206 -#define NV50TCL_DEPTH_TEST_FUNC_ALWAYS 0x00000207 -#define NV50TCL_ALPHA_TEST_REF 0x00001310 -#define NV50TCL_ALPHA_TEST_FUNC 0x00001314 -#define NV50TCL_ALPHA_TEST_FUNC_NEVER 0x00000200 -#define NV50TCL_ALPHA_TEST_FUNC_LESS 0x00000201 -#define NV50TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202 -#define NV50TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203 -#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 -#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 -#define NV50TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 -#define NV50TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206 -#define NV50TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207 -#define NV50TCL_BLEND_COLOR(x) (0x0000131c+((x)*4)) -#define NV50TCL_BLEND_COLOR__SIZE 0x00000004 -#define NV50TCL_BLEND_EQUATION_RGB 0x00001340 -#define NV50TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 -#define NV50TCL_BLEND_EQUATION_RGB_MIN 0x00008007 -#define NV50TCL_BLEND_EQUATION_RGB_MAX 0x00008008 -#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a -#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b -#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 -#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 -#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 -#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 -#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348 -#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 -#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 -#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 -#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 -#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c -#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 -#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007 -#define NV50TCL_BLEND_EQUATION_ALPHA_MAX 0x00008008 -#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a -#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b -#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00000001 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00000300 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00000302 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00000304 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00000306 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00000307 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00000308 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x00008001 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x00008003 -#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00000001 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00000300 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00000302 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00000304 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00000306 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00000307 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00000308 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x00008001 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x00008003 -#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004 -#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4)) -#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008 -#define NV50TCL_STENCIL_BACK_ENABLE 0x00001380 -#define NV50TCL_STENCIL_BACK_OP_FAIL 0x00001384 -#define NV50TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 -#define NV50TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a -#define NV50TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 -#define NV50TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 -#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 -#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL 0x00001388 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_BACK_OP_ZPASS 0x0000138c -#define NV50TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a -#define NV50TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC 0x00001390 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 -#define NV50TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 -#define NV50TCL_STENCIL_BACK_FUNC_REF 0x00001394 -#define NV50TCL_STENCIL_BACK_MASK 0x00001398 -#define NV50TCL_STENCIL_BACK_FUNC_MASK 0x0000139c -#define NV50TCL_LINE_WIDTH 0x000013b0 -#define NV50TCL_VP_START_ID 0x0000140c -#define NV50TCL_GP_START_ID 0x00001410 -#define NV50TCL_FP_START_ID 0x00001414 -#define NV50TCL_POINT_SIZE 0x00001518 -#define NV50TCL_TEX_CB0_ADDRESS_HIGH 0x0000155c -#define NV50TCL_TEX_CB0_ADDRESS_LOW 0x00001560 -#define NV50TCL_POLYGON_OFFSET_FACTOR 0x0000156c -#define NV50TCL_LINE_SMOOTH_ENABLE 0x00001570 -#define NV50TCL_TEX_CB1_ADDRESS_HIGH 0x00001574 -#define NV50TCL_TEX_CB1_ADDRESS_LOW 0x00001578 -#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001594 -#define NV50TCL_STENCIL_FRONT_OP_FAIL 0x00001598 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a -#define NV50TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL 0x0000159c -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS 0x000015a0 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 -#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC 0x000015a4 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 -#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 -#define NV50TCL_POLYGON_OFFSET_UNITS 0x000015bc -#define NV50TCL_VERTEX_BEGIN 0x000015dc -#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000 -#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001 -#define NV50TCL_VERTEX_BEGIN_LINE_LOOP 0x00000002 -#define NV50TCL_VERTEX_BEGIN_LINE_STRIP 0x00000003 -#define NV50TCL_VERTEX_BEGIN_TRIANGLES 0x00000004 -#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP 0x00000005 -#define NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN 0x00000006 -#define NV50TCL_VERTEX_BEGIN_QUADS 0x00000007 -#define NV50TCL_VERTEX_BEGIN_QUAD_STRIP 0x00000008 -#define NV50TCL_VERTEX_BEGIN_POLYGON 0x00000009 -#define NV50TCL_VERTEX_END 0x000015e0 -#define NV50TCL_VERTEX_DATA 0x00001640 -#define NV50TCL_VP_ATTR_EN_0 0x00001650 -#define NV50TCL_VP_ATTR_EN_0_7_SHIFT 28 -#define NV50TCL_VP_ATTR_EN_0_7_MASK 0xf0000000 -#define NV50TCL_VP_ATTR_EN_0_7_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_7_XNNN 0x10000000 -#define NV50TCL_VP_ATTR_EN_0_7_NYNN 0x20000000 -#define NV50TCL_VP_ATTR_EN_0_7_XYNN 0x30000000 -#define NV50TCL_VP_ATTR_EN_0_7_NNZN 0x40000000 -#define NV50TCL_VP_ATTR_EN_0_7_XNZN 0x50000000 -#define NV50TCL_VP_ATTR_EN_0_7_NYZN 0x60000000 -#define NV50TCL_VP_ATTR_EN_0_7_XYZN 0x70000000 -#define NV50TCL_VP_ATTR_EN_0_7_NNNW 0x80000000 -#define NV50TCL_VP_ATTR_EN_0_7_XNNW 0x90000000 -#define NV50TCL_VP_ATTR_EN_0_7_NYNW 0xa0000000 -#define NV50TCL_VP_ATTR_EN_0_7_XYNW 0xb0000000 -#define NV50TCL_VP_ATTR_EN_0_7_NNZW 0xc0000000 -#define NV50TCL_VP_ATTR_EN_0_7_XNZW 0xd0000000 -#define NV50TCL_VP_ATTR_EN_0_7_NYZW 0xe0000000 -#define NV50TCL_VP_ATTR_EN_0_7_XYZW 0xf0000000 -#define NV50TCL_VP_ATTR_EN_0_6_SHIFT 24 -#define NV50TCL_VP_ATTR_EN_0_6_MASK 0x0f000000 -#define NV50TCL_VP_ATTR_EN_0_6_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_6_XNNN 0x01000000 -#define NV50TCL_VP_ATTR_EN_0_6_NYNN 0x02000000 -#define NV50TCL_VP_ATTR_EN_0_6_XYNN 0x03000000 -#define NV50TCL_VP_ATTR_EN_0_6_NNZN 0x04000000 -#define NV50TCL_VP_ATTR_EN_0_6_XNZN 0x05000000 -#define NV50TCL_VP_ATTR_EN_0_6_NYZN 0x06000000 -#define NV50TCL_VP_ATTR_EN_0_6_XYZN 0x07000000 -#define NV50TCL_VP_ATTR_EN_0_6_NNNW 0x08000000 -#define NV50TCL_VP_ATTR_EN_0_6_XNNW 0x09000000 -#define NV50TCL_VP_ATTR_EN_0_6_NYNW 0x0a000000 -#define NV50TCL_VP_ATTR_EN_0_6_XYNW 0x0b000000 -#define NV50TCL_VP_ATTR_EN_0_6_NNZW 0x0c000000 -#define NV50TCL_VP_ATTR_EN_0_6_XNZW 0x0d000000 -#define NV50TCL_VP_ATTR_EN_0_6_NYZW 0x0e000000 -#define NV50TCL_VP_ATTR_EN_0_6_XYZW 0x0f000000 -#define NV50TCL_VP_ATTR_EN_0_5_SHIFT 20 -#define NV50TCL_VP_ATTR_EN_0_5_MASK 0x00f00000 -#define NV50TCL_VP_ATTR_EN_0_5_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_5_XNNN 0x00100000 -#define NV50TCL_VP_ATTR_EN_0_5_NYNN 0x00200000 -#define NV50TCL_VP_ATTR_EN_0_5_XYNN 0x00300000 -#define NV50TCL_VP_ATTR_EN_0_5_NNZN 0x00400000 -#define NV50TCL_VP_ATTR_EN_0_5_XNZN 0x00500000 -#define NV50TCL_VP_ATTR_EN_0_5_NYZN 0x00600000 -#define NV50TCL_VP_ATTR_EN_0_5_XYZN 0x00700000 -#define NV50TCL_VP_ATTR_EN_0_5_NNNW 0x00800000 -#define NV50TCL_VP_ATTR_EN_0_5_XNNW 0x00900000 -#define NV50TCL_VP_ATTR_EN_0_5_NYNW 0x00a00000 -#define NV50TCL_VP_ATTR_EN_0_5_XYNW 0x00b00000 -#define NV50TCL_VP_ATTR_EN_0_5_NNZW 0x00c00000 -#define NV50TCL_VP_ATTR_EN_0_5_XNZW 0x00d00000 -#define NV50TCL_VP_ATTR_EN_0_5_NYZW 0x00e00000 -#define NV50TCL_VP_ATTR_EN_0_5_XYZW 0x00f00000 -#define NV50TCL_VP_ATTR_EN_0_4_SHIFT 16 -#define NV50TCL_VP_ATTR_EN_0_4_MASK 0x000f0000 -#define NV50TCL_VP_ATTR_EN_0_4_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_4_XNNN 0x00010000 -#define NV50TCL_VP_ATTR_EN_0_4_NYNN 0x00020000 -#define NV50TCL_VP_ATTR_EN_0_4_XYNN 0x00030000 -#define NV50TCL_VP_ATTR_EN_0_4_NNZN 0x00040000 -#define NV50TCL_VP_ATTR_EN_0_4_XNZN 0x00050000 -#define NV50TCL_VP_ATTR_EN_0_4_NYZN 0x00060000 -#define NV50TCL_VP_ATTR_EN_0_4_XYZN 0x00070000 -#define NV50TCL_VP_ATTR_EN_0_4_NNNW 0x00080000 -#define NV50TCL_VP_ATTR_EN_0_4_XNNW 0x00090000 -#define NV50TCL_VP_ATTR_EN_0_4_NYNW 0x000a0000 -#define NV50TCL_VP_ATTR_EN_0_4_XYNW 0x000b0000 -#define NV50TCL_VP_ATTR_EN_0_4_NNZW 0x000c0000 -#define NV50TCL_VP_ATTR_EN_0_4_XNZW 0x000d0000 -#define NV50TCL_VP_ATTR_EN_0_4_NYZW 0x000e0000 -#define NV50TCL_VP_ATTR_EN_0_4_XYZW 0x000f0000 -#define NV50TCL_VP_ATTR_EN_0_3_SHIFT 12 -#define NV50TCL_VP_ATTR_EN_0_3_MASK 0x0000f000 -#define NV50TCL_VP_ATTR_EN_0_3_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_3_XNNN 0x00001000 -#define NV50TCL_VP_ATTR_EN_0_3_NYNN 0x00002000 -#define NV50TCL_VP_ATTR_EN_0_3_XYNN 0x00003000 -#define NV50TCL_VP_ATTR_EN_0_3_NNZN 0x00004000 -#define NV50TCL_VP_ATTR_EN_0_3_XNZN 0x00005000 -#define NV50TCL_VP_ATTR_EN_0_3_NYZN 0x00006000 -#define NV50TCL_VP_ATTR_EN_0_3_XYZN 0x00007000 -#define NV50TCL_VP_ATTR_EN_0_3_NNNW 0x00008000 -#define NV50TCL_VP_ATTR_EN_0_3_XNNW 0x00009000 -#define NV50TCL_VP_ATTR_EN_0_3_NYNW 0x0000a000 -#define NV50TCL_VP_ATTR_EN_0_3_XYNW 0x0000b000 -#define NV50TCL_VP_ATTR_EN_0_3_NNZW 0x0000c000 -#define NV50TCL_VP_ATTR_EN_0_3_XNZW 0x0000d000 -#define NV50TCL_VP_ATTR_EN_0_3_NYZW 0x0000e000 -#define NV50TCL_VP_ATTR_EN_0_3_XYZW 0x0000f000 -#define NV50TCL_VP_ATTR_EN_0_2_SHIFT 8 -#define NV50TCL_VP_ATTR_EN_0_2_MASK 0x00000f00 -#define NV50TCL_VP_ATTR_EN_0_2_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_2_XNNN 0x00000100 -#define NV50TCL_VP_ATTR_EN_0_2_NYNN 0x00000200 -#define NV50TCL_VP_ATTR_EN_0_2_XYNN 0x00000300 -#define NV50TCL_VP_ATTR_EN_0_2_NNZN 0x00000400 -#define NV50TCL_VP_ATTR_EN_0_2_XNZN 0x00000500 -#define NV50TCL_VP_ATTR_EN_0_2_NYZN 0x00000600 -#define NV50TCL_VP_ATTR_EN_0_2_XYZN 0x00000700 -#define NV50TCL_VP_ATTR_EN_0_2_NNNW 0x00000800 -#define NV50TCL_VP_ATTR_EN_0_2_XNNW 0x00000900 -#define NV50TCL_VP_ATTR_EN_0_2_NYNW 0x00000a00 -#define NV50TCL_VP_ATTR_EN_0_2_XYNW 0x00000b00 -#define NV50TCL_VP_ATTR_EN_0_2_NNZW 0x00000c00 -#define NV50TCL_VP_ATTR_EN_0_2_XNZW 0x00000d00 -#define NV50TCL_VP_ATTR_EN_0_2_NYZW 0x00000e00 -#define NV50TCL_VP_ATTR_EN_0_2_XYZW 0x00000f00 -#define NV50TCL_VP_ATTR_EN_0_1_SHIFT 4 -#define NV50TCL_VP_ATTR_EN_0_1_MASK 0x000000f0 -#define NV50TCL_VP_ATTR_EN_0_1_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_1_XNNN 0x00000010 -#define NV50TCL_VP_ATTR_EN_0_1_NYNN 0x00000020 -#define NV50TCL_VP_ATTR_EN_0_1_XYNN 0x00000030 -#define NV50TCL_VP_ATTR_EN_0_1_NNZN 0x00000040 -#define NV50TCL_VP_ATTR_EN_0_1_XNZN 0x00000050 -#define NV50TCL_VP_ATTR_EN_0_1_NYZN 0x00000060 -#define NV50TCL_VP_ATTR_EN_0_1_XYZN 0x00000070 -#define NV50TCL_VP_ATTR_EN_0_1_NNNW 0x00000080 -#define NV50TCL_VP_ATTR_EN_0_1_XNNW 0x00000090 -#define NV50TCL_VP_ATTR_EN_0_1_NYNW 0x000000a0 -#define NV50TCL_VP_ATTR_EN_0_1_XYNW 0x000000b0 -#define NV50TCL_VP_ATTR_EN_0_1_NNZW 0x000000c0 -#define NV50TCL_VP_ATTR_EN_0_1_XNZW 0x000000d0 -#define NV50TCL_VP_ATTR_EN_0_1_NYZW 0x000000e0 -#define NV50TCL_VP_ATTR_EN_0_1_XYZW 0x000000f0 -#define NV50TCL_VP_ATTR_EN_0_0_SHIFT 0 -#define NV50TCL_VP_ATTR_EN_0_0_MASK 0x0000000f -#define NV50TCL_VP_ATTR_EN_0_0_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_0_0_XNNN 0x00000001 -#define NV50TCL_VP_ATTR_EN_0_0_NYNN 0x00000002 -#define NV50TCL_VP_ATTR_EN_0_0_XYNN 0x00000003 -#define NV50TCL_VP_ATTR_EN_0_0_NNZN 0x00000004 -#define NV50TCL_VP_ATTR_EN_0_0_XNZN 0x00000005 -#define NV50TCL_VP_ATTR_EN_0_0_NYZN 0x00000006 -#define NV50TCL_VP_ATTR_EN_0_0_XYZN 0x00000007 -#define NV50TCL_VP_ATTR_EN_0_0_NNNW 0x00000008 -#define NV50TCL_VP_ATTR_EN_0_0_XNNW 0x00000009 -#define NV50TCL_VP_ATTR_EN_0_0_NYNW 0x0000000a -#define NV50TCL_VP_ATTR_EN_0_0_XYNW 0x0000000b -#define NV50TCL_VP_ATTR_EN_0_0_NNZW 0x0000000c -#define NV50TCL_VP_ATTR_EN_0_0_XNZW 0x0000000d -#define NV50TCL_VP_ATTR_EN_0_0_NYZW 0x0000000e -#define NV50TCL_VP_ATTR_EN_0_0_XYZW 0x0000000f -#define NV50TCL_VP_ATTR_EN_1 0x00001654 -#define NV50TCL_VP_ATTR_EN_1_15_SHIFT 28 -#define NV50TCL_VP_ATTR_EN_1_15_MASK 0xf0000000 -#define NV50TCL_VP_ATTR_EN_1_15_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_15_XNNN 0x10000000 -#define NV50TCL_VP_ATTR_EN_1_15_NYNN 0x20000000 -#define NV50TCL_VP_ATTR_EN_1_15_XYNN 0x30000000 -#define NV50TCL_VP_ATTR_EN_1_15_NNZN 0x40000000 -#define NV50TCL_VP_ATTR_EN_1_15_XNZN 0x50000000 -#define NV50TCL_VP_ATTR_EN_1_15_NYZN 0x60000000 -#define NV50TCL_VP_ATTR_EN_1_15_XYZN 0x70000000 -#define NV50TCL_VP_ATTR_EN_1_15_NNNW 0x80000000 -#define NV50TCL_VP_ATTR_EN_1_15_XNNW 0x90000000 -#define NV50TCL_VP_ATTR_EN_1_15_NYNW 0xa0000000 -#define NV50TCL_VP_ATTR_EN_1_15_XYNW 0xb0000000 -#define NV50TCL_VP_ATTR_EN_1_15_NNZW 0xc0000000 -#define NV50TCL_VP_ATTR_EN_1_15_XNZW 0xd0000000 -#define NV50TCL_VP_ATTR_EN_1_15_NYZW 0xe0000000 -#define NV50TCL_VP_ATTR_EN_1_15_XYZW 0xf0000000 -#define NV50TCL_VP_ATTR_EN_1_14_SHIFT 24 -#define NV50TCL_VP_ATTR_EN_1_14_MASK 0x0f000000 -#define NV50TCL_VP_ATTR_EN_1_14_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_14_XNNN 0x01000000 -#define NV50TCL_VP_ATTR_EN_1_14_NYNN 0x02000000 -#define NV50TCL_VP_ATTR_EN_1_14_XYNN 0x03000000 -#define NV50TCL_VP_ATTR_EN_1_14_NNZN 0x04000000 -#define NV50TCL_VP_ATTR_EN_1_14_XNZN 0x05000000 -#define NV50TCL_VP_ATTR_EN_1_14_NYZN 0x06000000 -#define NV50TCL_VP_ATTR_EN_1_14_XYZN 0x07000000 -#define NV50TCL_VP_ATTR_EN_1_14_NNNW 0x08000000 -#define NV50TCL_VP_ATTR_EN_1_14_XNNW 0x09000000 -#define NV50TCL_VP_ATTR_EN_1_14_NYNW 0x0a000000 -#define NV50TCL_VP_ATTR_EN_1_14_XYNW 0x0b000000 -#define NV50TCL_VP_ATTR_EN_1_14_NNZW 0x0c000000 -#define NV50TCL_VP_ATTR_EN_1_14_XNZW 0x0d000000 -#define NV50TCL_VP_ATTR_EN_1_14_NYZW 0x0e000000 -#define NV50TCL_VP_ATTR_EN_1_14_XYZW 0x0f000000 -#define NV50TCL_VP_ATTR_EN_1_13_SHIFT 20 -#define NV50TCL_VP_ATTR_EN_1_13_MASK 0x00f00000 -#define NV50TCL_VP_ATTR_EN_1_13_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_13_XNNN 0x00100000 -#define NV50TCL_VP_ATTR_EN_1_13_NYNN 0x00200000 -#define NV50TCL_VP_ATTR_EN_1_13_XYNN 0x00300000 -#define NV50TCL_VP_ATTR_EN_1_13_NNZN 0x00400000 -#define NV50TCL_VP_ATTR_EN_1_13_XNZN 0x00500000 -#define NV50TCL_VP_ATTR_EN_1_13_NYZN 0x00600000 -#define NV50TCL_VP_ATTR_EN_1_13_XYZN 0x00700000 -#define NV50TCL_VP_ATTR_EN_1_13_NNNW 0x00800000 -#define NV50TCL_VP_ATTR_EN_1_13_XNNW 0x00900000 -#define NV50TCL_VP_ATTR_EN_1_13_NYNW 0x00a00000 -#define NV50TCL_VP_ATTR_EN_1_13_XYNW 0x00b00000 -#define NV50TCL_VP_ATTR_EN_1_13_NNZW 0x00c00000 -#define NV50TCL_VP_ATTR_EN_1_13_XNZW 0x00d00000 -#define NV50TCL_VP_ATTR_EN_1_13_NYZW 0x00e00000 -#define NV50TCL_VP_ATTR_EN_1_13_XYZW 0x00f00000 -#define NV50TCL_VP_ATTR_EN_1_12_SHIFT 16 -#define NV50TCL_VP_ATTR_EN_1_12_MASK 0x000f0000 -#define NV50TCL_VP_ATTR_EN_1_12_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_12_XNNN 0x00010000 -#define NV50TCL_VP_ATTR_EN_1_12_NYNN 0x00020000 -#define NV50TCL_VP_ATTR_EN_1_12_XYNN 0x00030000 -#define NV50TCL_VP_ATTR_EN_1_12_NNZN 0x00040000 -#define NV50TCL_VP_ATTR_EN_1_12_XNZN 0x00050000 -#define NV50TCL_VP_ATTR_EN_1_12_NYZN 0x00060000 -#define NV50TCL_VP_ATTR_EN_1_12_XYZN 0x00070000 -#define NV50TCL_VP_ATTR_EN_1_12_NNNW 0x00080000 -#define NV50TCL_VP_ATTR_EN_1_12_XNNW 0x00090000 -#define NV50TCL_VP_ATTR_EN_1_12_NYNW 0x000a0000 -#define NV50TCL_VP_ATTR_EN_1_12_XYNW 0x000b0000 -#define NV50TCL_VP_ATTR_EN_1_12_NNZW 0x000c0000 -#define NV50TCL_VP_ATTR_EN_1_12_XNZW 0x000d0000 -#define NV50TCL_VP_ATTR_EN_1_12_NYZW 0x000e0000 -#define NV50TCL_VP_ATTR_EN_1_12_XYZW 0x000f0000 -#define NV50TCL_VP_ATTR_EN_1_11_SHIFT 12 -#define NV50TCL_VP_ATTR_EN_1_11_MASK 0x0000f000 -#define NV50TCL_VP_ATTR_EN_1_11_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_11_XNNN 0x00001000 -#define NV50TCL_VP_ATTR_EN_1_11_NYNN 0x00002000 -#define NV50TCL_VP_ATTR_EN_1_11_XYNN 0x00003000 -#define NV50TCL_VP_ATTR_EN_1_11_NNZN 0x00004000 -#define NV50TCL_VP_ATTR_EN_1_11_XNZN 0x00005000 -#define NV50TCL_VP_ATTR_EN_1_11_NYZN 0x00006000 -#define NV50TCL_VP_ATTR_EN_1_11_XYZN 0x00007000 -#define NV50TCL_VP_ATTR_EN_1_11_NNNW 0x00008000 -#define NV50TCL_VP_ATTR_EN_1_11_XNNW 0x00009000 -#define NV50TCL_VP_ATTR_EN_1_11_NYNW 0x0000a000 -#define NV50TCL_VP_ATTR_EN_1_11_XYNW 0x0000b000 -#define NV50TCL_VP_ATTR_EN_1_11_NNZW 0x0000c000 -#define NV50TCL_VP_ATTR_EN_1_11_XNZW 0x0000d000 -#define NV50TCL_VP_ATTR_EN_1_11_NYZW 0x0000e000 -#define NV50TCL_VP_ATTR_EN_1_11_XYZW 0x0000f000 -#define NV50TCL_VP_ATTR_EN_1_10_SHIFT 8 -#define NV50TCL_VP_ATTR_EN_1_10_MASK 0x00000f00 -#define NV50TCL_VP_ATTR_EN_1_10_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_10_XNNN 0x00000100 -#define NV50TCL_VP_ATTR_EN_1_10_NYNN 0x00000200 -#define NV50TCL_VP_ATTR_EN_1_10_XYNN 0x00000300 -#define NV50TCL_VP_ATTR_EN_1_10_NNZN 0x00000400 -#define NV50TCL_VP_ATTR_EN_1_10_XNZN 0x00000500 -#define NV50TCL_VP_ATTR_EN_1_10_NYZN 0x00000600 -#define NV50TCL_VP_ATTR_EN_1_10_XYZN 0x00000700 -#define NV50TCL_VP_ATTR_EN_1_10_NNNW 0x00000800 -#define NV50TCL_VP_ATTR_EN_1_10_XNNW 0x00000900 -#define NV50TCL_VP_ATTR_EN_1_10_NYNW 0x00000a00 -#define NV50TCL_VP_ATTR_EN_1_10_XYNW 0x00000b00 -#define NV50TCL_VP_ATTR_EN_1_10_NNZW 0x00000c00 -#define NV50TCL_VP_ATTR_EN_1_10_XNZW 0x00000d00 -#define NV50TCL_VP_ATTR_EN_1_10_NYZW 0x00000e00 -#define NV50TCL_VP_ATTR_EN_1_10_XYZW 0x00000f00 -#define NV50TCL_VP_ATTR_EN_1_9_SHIFT 4 -#define NV50TCL_VP_ATTR_EN_1_9_MASK 0x000000f0 -#define NV50TCL_VP_ATTR_EN_1_9_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_9_XNNN 0x00000010 -#define NV50TCL_VP_ATTR_EN_1_9_NYNN 0x00000020 -#define NV50TCL_VP_ATTR_EN_1_9_XYNN 0x00000030 -#define NV50TCL_VP_ATTR_EN_1_9_NNZN 0x00000040 -#define NV50TCL_VP_ATTR_EN_1_9_XNZN 0x00000050 -#define NV50TCL_VP_ATTR_EN_1_9_NYZN 0x00000060 -#define NV50TCL_VP_ATTR_EN_1_9_XYZN 0x00000070 -#define NV50TCL_VP_ATTR_EN_1_9_NNNW 0x00000080 -#define NV50TCL_VP_ATTR_EN_1_9_XNNW 0x00000090 -#define NV50TCL_VP_ATTR_EN_1_9_NYNW 0x000000a0 -#define NV50TCL_VP_ATTR_EN_1_9_XYNW 0x000000b0 -#define NV50TCL_VP_ATTR_EN_1_9_NNZW 0x000000c0 -#define NV50TCL_VP_ATTR_EN_1_9_XNZW 0x000000d0 -#define NV50TCL_VP_ATTR_EN_1_9_NYZW 0x000000e0 -#define NV50TCL_VP_ATTR_EN_1_9_XYZW 0x000000f0 -#define NV50TCL_VP_ATTR_EN_1_8_SHIFT 0 -#define NV50TCL_VP_ATTR_EN_1_8_MASK 0x0000000f -#define NV50TCL_VP_ATTR_EN_1_8_NONE 0x00000000 -#define NV50TCL_VP_ATTR_EN_1_8_XNNN 0x00000001 -#define NV50TCL_VP_ATTR_EN_1_8_NYNN 0x00000002 -#define NV50TCL_VP_ATTR_EN_1_8_XYNN 0x00000003 -#define NV50TCL_VP_ATTR_EN_1_8_NNZN 0x00000004 -#define NV50TCL_VP_ATTR_EN_1_8_XNZN 0x00000005 -#define NV50TCL_VP_ATTR_EN_1_8_NYZN 0x00000006 -#define NV50TCL_VP_ATTR_EN_1_8_XYZN 0x00000007 -#define NV50TCL_VP_ATTR_EN_1_8_NNNW 0x00000008 -#define NV50TCL_VP_ATTR_EN_1_8_XNNW 0x00000009 -#define NV50TCL_VP_ATTR_EN_1_8_NYNW 0x0000000a -#define NV50TCL_VP_ATTR_EN_1_8_XYNW 0x0000000b -#define NV50TCL_VP_ATTR_EN_1_8_NNZW 0x0000000c -#define NV50TCL_VP_ATTR_EN_1_8_XNZW 0x0000000d -#define NV50TCL_VP_ATTR_EN_1_8_NYZW 0x0000000e -#define NV50TCL_VP_ATTR_EN_1_8_XYZW 0x0000000f -#define NV50TCL_LINE_STIPPLE_ENABLE 0x0000166c -#define NV50TCL_LINE_STIPPLE_PATTERN 0x00001680 -#define NV50TCL_POLYGON_STIPPLE_ENABLE 0x0000168c -#define NV50TCL_VP_REG_HPOS 0x000016bc -#define NV50TCL_VP_REG_HPOS_X_SHIFT 0 -#define NV50TCL_VP_REG_HPOS_X_MASK 0x000000ff -#define NV50TCL_VP_REG_HPOS_Y_SHIFT 8 -#define NV50TCL_VP_REG_HPOS_Y_MASK 0x0000ff00 -#define NV50TCL_VP_REG_HPOS_Z_SHIFT 16 -#define NV50TCL_VP_REG_HPOS_Z_MASK 0x00ff0000 -#define NV50TCL_VP_REG_HPOS_W_SHIFT 24 -#define NV50TCL_VP_REG_HPOS_W_MASK 0xff000000 -#define NV50TCL_VP_REG_COL0 0x000016c0 -#define NV50TCL_VP_REG_COL0_X_SHIFT 0 -#define NV50TCL_VP_REG_COL0_X_MASK 0x000000ff -#define NV50TCL_VP_REG_COL0_Y_SHIFT 8 -#define NV50TCL_VP_REG_COL0_Y_MASK 0x0000ff00 -#define NV50TCL_VP_REG_COL0_Z_SHIFT 16 -#define NV50TCL_VP_REG_COL0_Z_MASK 0x00ff0000 -#define NV50TCL_VP_REG_COL0_W_SHIFT 24 -#define NV50TCL_VP_REG_COL0_W_MASK 0xff000000 -#define NV50TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001700+((x)*4)) -#define NV50TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 -#define NV50TCL_CULL_FACE_ENABLE 0x00001918 -#define NV50TCL_FRONT_FACE 0x0000191c -#define NV50TCL_FRONT_FACE_CW 0x00000900 -#define NV50TCL_FRONT_FACE_CCW 0x00000901 -#define NV50TCL_CULL_FACE 0x00001920 -#define NV50TCL_CULL_FACE_FRONT 0x00000404 -#define NV50TCL_CULL_FACE_BACK 0x00000405 -#define NV50TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 -#define NV50TCL_LOGIC_OP_ENABLE 0x000019c4 -#define NV50TCL_LOGIC_OP 0x000019c8 -#define NV50TCL_LOGIC_OP_CLEAR 0x00001500 -#define NV50TCL_LOGIC_OP_AND 0x00001501 -#define NV50TCL_LOGIC_OP_AND_REVERSE 0x00001502 -#define NV50TCL_LOGIC_OP_COPY 0x00001503 -#define NV50TCL_LOGIC_OP_AND_INVERTED 0x00001504 -#define NV50TCL_LOGIC_OP_NOOP 0x00001505 -#define NV50TCL_LOGIC_OP_XOR 0x00001506 -#define NV50TCL_LOGIC_OP_OR 0x00001507 -#define NV50TCL_LOGIC_OP_NOR 0x00001508 -#define NV50TCL_LOGIC_OP_EQUIV 0x00001509 -#define NV50TCL_LOGIC_OP_INVERT 0x0000150a -#define NV50TCL_LOGIC_OP_OR_REVERSE 0x0000150b -#define NV50TCL_LOGIC_OP_COPY_INVERTED 0x0000150c -#define NV50TCL_LOGIC_OP_OR_INVERTED 0x0000150d -#define NV50TCL_LOGIC_OP_NAND 0x0000150e -#define NV50TCL_LOGIC_OP_SET 0x0000150f -#define NV50TCL_CLEAR_BUFFERS 0x000019d0 -#define NV50TCL_COLOR_MASK(x) (0x00001a00+((x)*4)) -#define NV50TCL_COLOR_MASK__SIZE 0x00000008 -#define NV50TCL_COLOR_MASK_R_SHIFT 0 -#define NV50TCL_COLOR_MASK_R_MASK 0x0000000f -#define NV50TCL_COLOR_MASK_G_SHIFT 4 -#define NV50TCL_COLOR_MASK_G_MASK 0x000000f0 -#define NV50TCL_COLOR_MASK_B_SHIFT 8 -#define NV50TCL_COLOR_MASK_B_MASK 0x00000f00 -#define NV50TCL_COLOR_MASK_A_SHIFT 12 -#define NV50TCL_COLOR_MASK_A_MASK 0x0000f000 - - -#define NV50_COMPUTE 0x000050c0 - -#define NV50_COMPUTE_DMA_UNK0 0x000001a0 -#define NV50_COMPUTE_DMA_STATUS 0x000001a4 -#define NV50_COMPUTE_DMA_UNK1 0x000001b8 -#define NV50_COMPUTE_DMA_UNK2 0x000001bc -#define NV50_COMPUTE_DMA_UNK3 0x000001c0 -#define NV50_COMPUTE_UNK4_HIGH 0x00000210 -#define NV50_COMPUTE_UNK4_LOW 0x00000214 -#define NV50_COMPUTE_UNK5_HIGH 0x00000218 -#define NV50_COMPUTE_UNK5_LOW 0x0000021c -#define NV50_COMPUTE_UNK6_HIGH 0x00000294 -#define NV50_COMPUTE_UNK6_LOW 0x00000298 -#define NV50_COMPUTE_CONST_BASE_HIGH 0x000002a4 -#define NV50_COMPUTE_CONST_BASE_LO 0x000002a8 -#define NV50_COMPUTE_CONST_SIZE_SEG 0x000002ac -#define NV50_COMPUTE_REG_COUNT 0x000002c0 -#define NV50_COMPUTE_STATUS_HIGH 0x00000310 -#define NV50_COMPUTE_STATUS_LOW 0x00000314 -#define NV50_COMPUTE_EXECUTE 0x0000031c -#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374 -#define NV50_COMPUTE_GRIDDIM_YX 0x000003a4 -#define NV50_COMPUTE_SHARED_SIZE 0x000003a8 -#define NV50_COMPUTE_BLOCKDIM_YX 0x000003ac -#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0 -#define NV50_COMPUTE_CALL_ADDRESS 0x000003b4 -#define NV50_COMPUTE_GLOBAL_BASE_HIGH(x) (0x00000400+((x)*32)) -#define NV50_COMPUTE_GLOBAL_BASE_HIGH__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_BASE_LOW(x) (0x00000404+((x)*32)) -#define NV50_COMPUTE_GLOBAL_BASE_LOW__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH(x) (0x00000408+((x)*32)) -#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_LIMIT_LOW(x) (0x0000040c+((x)*32)) -#define NV50_COMPUTE_GLOBAL_LIMIT_LOW__SIZE 0x00000010 -#define NV50_COMPUTE_GLOBAL_UNK(x) (0x00000410+((x)*32)) -#define NV50_COMPUTE_GLOBAL_UNK__SIZE 0x00000010 -#define NV50_COMPUTE_USER_PARAM(x) (0x00000600+((x)*4)) -#define NV50_COMPUTE_USER_PARAM__SIZE 0x00000040 - - -#define NV54TCL 0x00008297 - - - -#endif /* NOUVEAU_REG_H */ diff --git a/src/mesa/pipe/nouveau/nouveau_gldefs.h b/src/mesa/pipe/nouveau/nouveau_gldefs.h deleted file mode 100644 index e1015c93a27..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_gldefs.h +++ /dev/null @@ -1,196 +0,0 @@ -#ifndef __NOUVEAU_GLDEFS_H__ -#define __NOUVEAU_GLDEFS_H__ - -static INLINE unsigned -nvgl_blend_func(unsigned factor) -{ - switch (factor) { - case PIPE_BLENDFACTOR_ZERO: - return 0x0000; - case PIPE_BLENDFACTOR_ONE: - return 0x0001; - case PIPE_BLENDFACTOR_SRC_COLOR: - return 0x0300; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return 0x0301; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return 0x0302; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return 0x0303; - case PIPE_BLENDFACTOR_DST_ALPHA: - return 0x0304; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return 0x0305; - case PIPE_BLENDFACTOR_DST_COLOR: - return 0x0306; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return 0x0307; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return 0x0308; - case PIPE_BLENDFACTOR_CONST_COLOR: - return 0x8001; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return 0x8002; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return 0x8003; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return 0x8004; - default: - return 0x0000; - } -} - -static INLINE unsigned -nvgl_blend_eqn(unsigned func) -{ - switch (func) { - case PIPE_BLEND_ADD: - return 0x8006; - case PIPE_BLEND_MIN: - return 0x8007; - case PIPE_BLEND_MAX: - return 0x8008; - case PIPE_BLEND_SUBTRACT: - return 0x800a; - case PIPE_BLEND_REVERSE_SUBTRACT: - return 0x800b; - default: - return 0x8006; - } -} - -static INLINE unsigned -nvgl_logicop_func(unsigned func) -{ - switch (func) { - case PIPE_LOGICOP_CLEAR: - return 0x1500; - case PIPE_LOGICOP_NOR: - return 0x1508; - case PIPE_LOGICOP_AND_INVERTED: - return 0x1504; - case PIPE_LOGICOP_COPY_INVERTED: - return 0x150c; - case PIPE_LOGICOP_AND_REVERSE: - return 0x1502; - case PIPE_LOGICOP_INVERT: - return 0x150a; - case PIPE_LOGICOP_XOR: - return 0x1506; - case PIPE_LOGICOP_NAND: - return 0x150e; - case PIPE_LOGICOP_AND: - return 0x1501; - case PIPE_LOGICOP_EQUIV: - return 0x1509; - case PIPE_LOGICOP_NOOP: - return 0x1505; - case PIPE_LOGICOP_OR_INVERTED: - return 0x150d; - case PIPE_LOGICOP_COPY: - return 0x1503; - case PIPE_LOGICOP_OR_REVERSE: - return 0x150b; - case PIPE_LOGICOP_OR: - return 0x1507; - case PIPE_LOGICOP_SET: - return 0x150f; - default: - return 0x1505; - } -} - -static INLINE unsigned -nvgl_comparison_op(unsigned op) -{ - switch (op) { - case PIPE_FUNC_NEVER: - return 0x0200; - case PIPE_FUNC_LESS: - return 0x0201; - case PIPE_FUNC_EQUAL: - return 0x0202; - case PIPE_FUNC_LEQUAL: - return 0x0203; - case PIPE_FUNC_GREATER: - return 0x0204; - case PIPE_FUNC_NOTEQUAL: - return 0x0205; - case PIPE_FUNC_GEQUAL: - return 0x0206; - case PIPE_FUNC_ALWAYS: - return 0x0207; - default: - return 0x0207; - } -} - -static INLINE unsigned -nvgl_polygon_mode(unsigned mode) -{ - switch (mode) { - case PIPE_POLYGON_MODE_POINT: - return 0x1b00; - case PIPE_POLYGON_MODE_LINE: - return 0x1b01; - case PIPE_POLYGON_MODE_FILL: - return 0x1b02; - default: - return 0x1b02; - } -} - -static INLINE unsigned -nvgl_stencil_op(unsigned op) -{ - switch (op) { - case PIPE_STENCIL_OP_ZERO: - return 0x0000; - case PIPE_STENCIL_OP_INVERT: - return 0x150a; - case PIPE_STENCIL_OP_KEEP: - return 0x1e00; - case PIPE_STENCIL_OP_REPLACE: - return 0x1e01; - case PIPE_STENCIL_OP_INCR: - return 0x1e02; - case PIPE_STENCIL_OP_DECR: - return 0x1e03; - case PIPE_STENCIL_OP_INCR_WRAP: - return 0x8507; - case PIPE_STENCIL_OP_DECR_WRAP: - return 0x8508; - default: - return 0x1e00; - } -} - -static INLINE unsigned -nvgl_primitive(unsigned prim) { - switch (prim) { - case PIPE_PRIM_POINTS: - return 0x0001; - case PIPE_PRIM_LINES: - return 0x0002; - case PIPE_PRIM_LINE_LOOP: - return 0x0003; - case PIPE_PRIM_LINE_STRIP: - return 0x0004; - case PIPE_PRIM_TRIANGLES: - return 0x0005; - case PIPE_PRIM_TRIANGLE_STRIP: - return 0x0006; - case PIPE_PRIM_TRIANGLE_FAN: - return 0x0007; - case PIPE_PRIM_QUADS: - return 0x0008; - case PIPE_PRIM_QUAD_STRIP: - return 0x0009; - case PIPE_PRIM_POLYGON: - return 0x000a; - default: - return 0x0001; - } -} - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_grobj.h b/src/mesa/pipe/nouveau/nouveau_grobj.h deleted file mode 100644 index 8f5abf90514..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_grobj.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_GROBJ_H__ -#define __NOUVEAU_GROBJ_H__ - -#include "nouveau_channel.h" - -struct nouveau_grobj { - struct nouveau_channel *channel; - int grclass; - uint32_t handle; - int subc; -}; - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_notifier.h b/src/mesa/pipe/nouveau/nouveau_notifier.h deleted file mode 100644 index 35adde1e324..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_notifier.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_NOTIFIER_H__ -#define __NOUVEAU_NOTIFIER_H__ - -#define NV_NOTIFIER_SIZE 32 -#define NV_NOTIFY_TIME_0 0x00000000 -#define NV_NOTIFY_TIME_1 0x00000004 -#define NV_NOTIFY_RETURN_VALUE 0x00000008 -#define NV_NOTIFY_STATE 0x0000000C -#define NV_NOTIFY_STATE_STATUS_MASK 0xFF000000 -#define NV_NOTIFY_STATE_STATUS_SHIFT 24 -#define NV_NOTIFY_STATE_STATUS_COMPLETED 0x00 -#define NV_NOTIFY_STATE_STATUS_IN_PROCESS 0x01 -#define NV_NOTIFY_STATE_ERROR_CODE_MASK 0x0000FFFF -#define NV_NOTIFY_STATE_ERROR_CODE_SHIFT 0 - -struct nouveau_notifier { - struct nouveau_channel *channel; - uint32_t handle; -}; - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_push.h b/src/mesa/pipe/nouveau/nouveau_push.h deleted file mode 100644 index 679472669b9..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_push.h +++ /dev/null @@ -1,83 +0,0 @@ -#ifndef __NOUVEAU_PUSH_H__ -#define __NOUVEAU_PUSH_H__ - -#include "pipe/nouveau/nouveau_winsys.h" - -#ifndef NOUVEAU_PUSH_CONTEXT -#error undefined push context -#endif - -#define OUT_RING(data) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - (*pc->nvws->channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - memcpy(pc->nvws->channel->pushbuf->cur, (src), (size) * 4); \ - pc->nvws->channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ - pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \ - OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ - pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING_NI(obj,mthd,size) do { \ - BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ -} while(0) - -#define FIRE_RING() do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - pc->nvws->push_flush(pc->nvws->channel, 0); \ -} while(0) - -#define OUT_RELOC(bo,data,flags,vor,tor) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - pc->nvws->push_reloc(pc->nvws->channel, \ - pc->nvws->channel->pushbuf->cur++, \ - (bo), (data), (flags), (vor), (tor)); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - pc->nvws->channel->vram->handle, \ - pc->nvws->channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ -#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ - pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \ - OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ - (flags), 0, 0); \ - pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_pushbuf.h b/src/mesa/pipe/nouveau/nouveau_pushbuf.h deleted file mode 100644 index 19097650982..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_pushbuf.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_PUSHBUF_H__ -#define __NOUVEAU_PUSHBUF_H__ - -struct nouveau_pushbuf { - struct nouveau_channel *channel; - unsigned remaining; - uint32_t *cur; -}; - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_resource.h b/src/mesa/pipe/nouveau/nouveau_resource.h deleted file mode 100644 index 1af7961d301..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_resource.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_RESOURCE_H__ -#define __NOUVEAU_RESOURCE_H__ - -struct nouveau_resource { - struct nouveau_resource *prev; - struct nouveau_resource *next; - - int in_use; - void *priv; - - unsigned int start; - unsigned int size; -}; - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_stateobj.h b/src/mesa/pipe/nouveau/nouveau_stateobj.h deleted file mode 100644 index 07c31b014a5..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_stateobj.h +++ /dev/null @@ -1,141 +0,0 @@ -#ifndef __NOUVEAU_STATEOBJ_H__ -#define __NOUVEAU_STATEOBJ_H__ - -#include "pipe/p_util.h" - -struct nouveau_stateobj_reloc { - struct pipe_buffer *bo; - - unsigned offset; - unsigned packet; - - unsigned data; - unsigned flags; - unsigned vor; - unsigned tor; -}; - -struct nouveau_stateobj { - int refcount; - - unsigned *push; - struct nouveau_stateobj_reloc *reloc; - - unsigned *cur; - unsigned cur_packet; - unsigned cur_reloc; -}; - -static INLINE struct nouveau_stateobj * -so_new(unsigned push, unsigned reloc) -{ - struct nouveau_stateobj *so; - - so = MALLOC(sizeof(struct nouveau_stateobj)); - so->refcount = 1; - so->push = MALLOC(sizeof(unsigned) * push); - so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); - - so->cur = so->push; - so->cur_reloc = so->cur_packet = 0; - - return so; -} - -static INLINE void -so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) -{ - struct nouveau_stateobj *so; - - so = *pso; - if (so) { - if (--so->refcount <= 0) { - free(so->push); - free(so->reloc); - free(so); - } - *pso = NULL; - } - - if (ref) { - ref->refcount++; - *pso = ref; - } -} - -static INLINE void -so_data(struct nouveau_stateobj *so, unsigned data) -{ - (*so->cur++) = (data); - so->cur_packet += 4; -} - -static INLINE void -so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, - unsigned mthd, unsigned size) -{ - so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); - so_data(so, (gr->subc << 13) | (size << 18) | mthd); -} - -static INLINE void -so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo, - unsigned data, unsigned flags, unsigned vor, unsigned tor) -{ - struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; - - r->bo = bo; - r->offset = so->cur - so->push; - r->packet = so->cur_packet; - r->data = data; - r->flags = flags; - r->vor = vor; - r->tor = tor; - so_data(so, data); -} - -static INLINE void -so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) -{ - struct nouveau_pushbuf *pb = nvws->channel->pushbuf; - unsigned nr, i; - - nr = so->cur - so->push; - if (pb->remaining < nr) - nvws->push_flush(nvws->channel, nr); - pb->remaining -= nr; - - memcpy(pb->cur, so->push, nr * 4); - for (i = 0; i < so->cur_reloc; i++) { - struct nouveau_stateobj_reloc *r = &so->reloc[i]; - - nvws->push_reloc(nvws->channel, pb->cur + r->offset, r->bo, - r->data, r->flags, r->vor, r->tor); - } - pb->cur += nr; -} - -static INLINE void -so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) -{ - struct nouveau_pushbuf *pb = nvws->channel->pushbuf; - unsigned i; - - i = so->cur_reloc << 1; - if (nvws->channel->pushbuf->remaining < i) - nvws->push_flush(nvws->channel, i); - nvws->channel->pushbuf->remaining -= i; - - for (i = 0; i < so->cur_reloc; i++) { - struct nouveau_stateobj_reloc *r = &so->reloc[i]; - - nvws->push_reloc(nvws->channel, pb->cur++, r->bo, r->packet, - (r->flags & - (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART)) | - NOUVEAU_BO_DUMMY, 0, 0); - nvws->push_reloc(nvws->channel, pb->cur++, r->bo, r->data, - r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor); - } -} - -#endif diff --git a/src/mesa/pipe/nouveau/nouveau_winsys.h b/src/mesa/pipe/nouveau/nouveau_winsys.h deleted file mode 100644 index 818ae9afae2..00000000000 --- a/src/mesa/pipe/nouveau/nouveau_winsys.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef NOUVEAU_WINSYS_H -#define NOUVEAU_WINSYS_H - -#include <stdint.h> -#include "pipe/p_winsys.h" -#include "pipe/p_defines.h" - -#include "pipe/nouveau/nouveau_bo.h" -#include "pipe/nouveau/nouveau_channel.h" -#include "pipe/nouveau/nouveau_class.h" -#include "pipe/nouveau/nouveau_grobj.h" -#include "pipe/nouveau/nouveau_notifier.h" -#include "pipe/nouveau/nouveau_resource.h" -#include "pipe/nouveau/nouveau_pushbuf.h" - -struct nouveau_winsys { - struct nouveau_context *nv; - - struct nouveau_channel *channel; - - int (*res_init)(struct nouveau_resource **heap, unsigned start, - unsigned size); - int (*res_alloc)(struct nouveau_resource *heap, int size, void *priv, - struct nouveau_resource **); - void (*res_free)(struct nouveau_resource **); - - int (*push_reloc)(struct nouveau_channel *, void *ptr, - struct pipe_buffer *, uint32_t data, - uint32_t flags, uint32_t vor, uint32_t tor); - int (*push_flush)(struct nouveau_channel *, unsigned size); - - int (*grobj_alloc)(struct nouveau_winsys *, int grclass, - struct nouveau_grobj **); - void (*grobj_free)(struct nouveau_grobj **); - - int (*notifier_alloc)(struct nouveau_winsys *, int count, - struct nouveau_notifier **); - void (*notifier_free)(struct nouveau_notifier **); - void (*notifier_reset)(struct nouveau_notifier *, int id); - uint32_t (*notifier_status)(struct nouveau_notifier *, int id); - uint32_t (*notifier_retval)(struct nouveau_notifier *, int id); - int (*notifier_wait)(struct nouveau_notifier *, int id, - int status, int timeout); - - int (*surface_copy)(struct nouveau_winsys *, struct pipe_surface *, - unsigned, unsigned, struct pipe_surface *, - unsigned, unsigned, unsigned, unsigned); - int (*surface_fill)(struct nouveau_winsys *, struct pipe_surface *, - unsigned, unsigned, unsigned, unsigned, unsigned); -}; - -extern struct pipe_context * -nv30_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); - -extern struct pipe_context * -nv40_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); - -extern struct pipe_context * -nv50_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); - -#endif diff --git a/src/mesa/pipe/nv30/Makefile b/src/mesa/pipe/nv30/Makefile deleted file mode 100644 index dd4b7e73cda..00000000000 --- a/src/mesa/pipe/nv30/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv30 - -DRIVER_SOURCES = \ - nv30_clear.c \ - nv30_context.c \ - nv30_draw.c \ - nv30_fragprog.c \ - nv30_fragtex.c \ - nv30_miptree.c \ - nv30_query.c \ - nv30_state.c \ - nv30_state_emit.c \ - nv30_surface.c \ - nv30_vbo.c \ - nv30_vertprog.c - -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -include ../Makefile.template - -symlinks: - diff --git a/src/mesa/pipe/nv30/nv30_clear.c b/src/mesa/pipe/nv30/nv30_clear.c deleted file mode 100644 index 71f413588ee..00000000000 --- a/src/mesa/pipe/nv30/nv30_clear.c +++ /dev/null @@ -1,12 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "nv30_context.h" - -void -nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue) -{ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); -} diff --git a/src/mesa/pipe/nv30/nv30_context.c b/src/mesa/pipe/nv30/nv30_context.c deleted file mode 100644 index d12aab85d8c..00000000000 --- a/src/mesa/pipe/nv30/nv30_context.c +++ /dev/null @@ -1,431 +0,0 @@ -#include "pipe/draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_winsys.h" -#include "pipe/p_util.h" - -#include "nv30_context.h" - -static const char * -nv30_get_name(struct pipe_context *pipe) -{ - struct nv30_context *nv30 = nv30_context(pipe); - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv30->chipset); - return buffer; -} - -static const char * -nv30_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv30_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 2; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv30_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static void -nv30_flush(struct pipe_context *pipe, unsigned flags) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_winsys *nvws = nv30->nvws; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (1); - } - - if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv30->sync, 0); - BEGIN_RING(rankine, 0x104, 1); - OUT_RING (0); - BEGIN_RING(rankine, 0x100, 1); - OUT_RING (0); - } - - FIRE_RING(); - - if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv30->sync, 0, 0, 2000); -} - -static void -nv30_destroy(struct pipe_context *pipe) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_winsys *nvws = nv30->nvws; - - if (nv30->draw) - draw_destroy(nv30->draw); - - nvws->res_free(&nv30->vertprog.exec_heap); - nvws->res_free(&nv30->vertprog.data_heap); - - nvws->res_free(&nv30->query_heap); - nvws->notifier_free(&nv30->query); - - nvws->notifier_free(&nv30->sync); - - nvws->grobj_free(&nv30->rankine); - - free(nv30); -} - -static boolean -nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) -{ - struct nouveau_winsys *nvws = nv30->nvws; - int ret; - int i; - - ret = nvws->grobj_alloc(nvws, rankine_class, &nv30->rankine); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - BEGIN_RING(rankine, NV34TCL_DMA_NOTIFY, 1); - OUT_RING (nv30->sync->handle); - BEGIN_RING(rankine, NV34TCL_DMA_TEXTURE0, 2); - OUT_RING (nvws->channel->vram->handle); - OUT_RING (nvws->channel->gart->handle); - BEGIN_RING(rankine, NV34TCL_DMA_COLOR1, 1); - OUT_RING (nvws->channel->vram->handle); - BEGIN_RING(rankine, NV34TCL_DMA_COLOR0, 2); - OUT_RING (nvws->channel->vram->handle); - OUT_RING (nvws->channel->vram->handle); - BEGIN_RING(rankine, NV34TCL_DMA_VTXBUF0, 2); - OUT_RING (nvws->channel->vram->handle); - OUT_RING (nvws->channel->gart->handle); -/* BEGIN_RING(rankine, NV34TCL_DMA_FENCE, 2); - OUT_RING (0); - OUT_RING (nv30->query->handle);*/ - BEGIN_RING(rankine, NV34TCL_DMA_IN_MEMORY7, 1); - OUT_RING (nvws->channel->vram->handle); - BEGIN_RING(rankine, NV34TCL_DMA_IN_MEMORY8, 1); - OUT_RING (nvws->channel->vram->handle); - - for (i=1; i<8; i++) { - BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); - } - - BEGIN_RING(rankine, 0x220, 1); - OUT_RING (1); - - BEGIN_RING(rankine, 0x03b0, 1); - OUT_RING (0x00100000); - BEGIN_RING(rankine, 0x1454, 1); - OUT_RING (0); - BEGIN_RING(rankine, 0x1d80, 1); - OUT_RING (3); - BEGIN_RING(rankine, 0x1450, 1); - OUT_RING (0x00030004); - - /* NEW */ - BEGIN_RING(rankine, 0x1e98, 1); - OUT_RING (0); - BEGIN_RING(rankine, 0x17e0, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x3f800000); - BEGIN_RING(rankine, 0x1f80, 16); - OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); - OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); - OUT_RING (0x0000ffff); - OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); - OUT_RING (0); OUT_RING (0); OUT_RING (0); - - BEGIN_RING(rankine, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); - - BEGIN_RING(rankine, 0x1d88, 1); - OUT_RING (0x00001200); - - BEGIN_RING(rankine, NV34TCL_RC_ENABLE, 1); - OUT_RING (0); - - /* Attempt to setup a known state.. Probably missing a heap of - * stuff here.. - */ - BEGIN_RING(rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_DEPTH_WRITE_ENABLE, 2); - OUT_RING (0); /* wr disable */ - OUT_RING (0); /* test disable */ - BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1); - OUT_RING (0x01010101); /* TR,TR,TR,TR */ - BEGIN_RING(rankine, NV34TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_ENABLE, 5); - OUT_RING (0); /* Blend enable */ - OUT_RING (0); /* Blend src */ - OUT_RING (0); /* Blend dst */ - OUT_RING (0x00000000); /* Blend colour */ - OUT_RING (0x8006); /* FUNC_ADD */ - BEGIN_RING(rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (0); - OUT_RING (0x1503 /*GL_COPY*/); - BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1); - OUT_RING (1); - BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1); - OUT_RING (0x1d01 /*GL_SMOOTH*/); - BEGIN_RING(rankine, NV34TCL_POLYGON_OFFSET_FACTOR,2); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - BEGIN_RING(rankine, NV34TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (0x1b02 /*GL_FILL*/); - OUT_RING (0x1b02 /*GL_FILL*/); - /* - Disable texture units - * - Set fragprog to MOVR result.color, fragment.color */ - for (i=0;i<16;i++) { - BEGIN_RING(rankine, - NV34TCL_TX_ENABLE(i), 1); - OUT_RING (0); - } - /* Polygon stipple */ - BEGIN_RING(rankine, - NV34TCL_POLYGON_STIPPLE_PATTERN(0), 0x20); - for (i=0;i<0x20;i++) - OUT_RING (0xFFFFFFFF); - - int w=4096; - int h=4096; - int pitch=4096*4; - BEGIN_RING(rankine, NV34TCL_RT_HORIZ, 5); - OUT_RING (w<<16); - OUT_RING (h<<16); - OUT_RING (0x148); /* format */ - OUT_RING (pitch << 16 | pitch); - OUT_RING (0x0); - BEGIN_RING(rankine, 0x0a00, 2); - OUT_RING ((w<<16) | 0); - OUT_RING ((h<<16) | 0); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING ((w-1)<<16); - OUT_RING ((h-1)<<16); - BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); - OUT_RING (w<<16); - OUT_RING (h<<16); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 2); - OUT_RING (w<<16); - OUT_RING (h<<16); - - BEGIN_RING(rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - - BEGIN_RING(rankine, NV34TCL_MODELVIEW_MATRIX(0), 16); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - - BEGIN_RING(rankine, NV34TCL_PROJECTION_MATRIX(0), 16); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - - BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); - OUT_RING (4096<<16); - OUT_RING (4096<<16); - - BEGIN_RING(rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); - OUT_RING (0xffff0000); - - FIRE_RING (); - return TRUE; -} - -#define NV30TCL_CHIPSET_3X_MASK 0x00000003 -#define NV34TCL_CHIPSET_3X_MASK 0x00000010 -#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 - -struct pipe_context * -nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, - unsigned chipset) -{ - struct nv30_context *nv30; - int rankine_class = 0, ret; - - if ((chipset & 0xf0) != 0x30) { - NOUVEAU_ERR("Not a NV3X chipset\n"); - return NULL; - } - - if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { - rankine_class = 0x0397; - } else if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { - rankine_class = 0x0697; - } else if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { - rankine_class = 0x0497; - } else { - NOUVEAU_ERR("Unknown NV3X chipset: NV%02x\n", chipset); - return NULL; - } - - nv30 = CALLOC_STRUCT(nv30_context); - if (!nv30) - return NULL; - nv30->chipset = chipset; - nv30->nvws = nvws; - - /* Notifier for sync purposes */ - ret = nvws->notifier_alloc(nvws, 1, &nv30->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv30_destroy(&nv30->pipe); - return NULL; - } - - /* Query objects */ - ret = nvws->notifier_alloc(nvws, 32, &nv30->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv30_destroy(&nv30->pipe); - return NULL; - } - - ret = nvws->res_init(&nv30->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv30_destroy(&nv30->pipe); - return NULL; - } - - /* Vtxprog resources */ - if (nvws->res_init(&nv30->vertprog.exec_heap, 0, 512) || - nvws->res_init(&nv30->vertprog.data_heap, 0, 256)) { - nv30_destroy(&nv30->pipe); - return NULL; - } - - /* Static rankine initialisation */ - if (!nv30_init_hwctx(nv30, rankine_class)) { - nv30_destroy(&nv30->pipe); - return NULL; - } - - /* Pipe context setup */ - nv30->pipe.winsys = pipe_winsys; - - nv30->pipe.destroy = nv30_destroy; - nv30->pipe.get_name = nv30_get_name; - nv30->pipe.get_vendor = nv30_get_vendor; - nv30->pipe.get_param = nv30_get_param; - nv30->pipe.get_paramf = nv30_get_paramf; - - nv30->pipe.draw_arrays = nv30_draw_arrays; - nv30->pipe.draw_elements = nv30_draw_elements; - nv30->pipe.clear = nv30_clear; - - nv30->pipe.flush = nv30_flush; - - nv30_init_query_functions(nv30); - nv30_init_surface_functions(nv30); - nv30_init_state_functions(nv30); - nv30_init_miptree_functions(nv30); - - nv30->draw = draw_create(); - assert(nv30->draw); - draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); - - return &nv30->pipe; -} - diff --git a/src/mesa/pipe/nv30/nv30_context.h b/src/mesa/pipe/nv30/nv30_context.h deleted file mode 100644 index f6c69545991..00000000000 --- a/src/mesa/pipe/nv30/nv30_context.h +++ /dev/null @@ -1,136 +0,0 @@ -#ifndef __NV30_CONTEXT_H__ -#define __NV30_CONTEXT_H__ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "pipe/draw/draw_vertex.h" - -#include "pipe/nouveau/nouveau_winsys.h" -#include "pipe/nouveau/nouveau_gldefs.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv30_context *ctx = nv30 -#include "pipe/nouveau/nouveau_push.h" - -#include "nv30_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -#define NV30_NEW_VERTPROG (1 << 1) -#define NV30_NEW_FRAGPROG (1 << 2) -#define NV30_NEW_ARRAYS (1 << 3) - -struct nv30_context { - struct pipe_context pipe; - struct nouveau_winsys *nvws; - - struct draw_context *draw; - - int chipset; - struct nouveau_grobj *rankine; - struct nouveau_notifier *sync; - - /* query objects */ - struct nouveau_notifier *query; - struct nouveau_resource *query_heap; - - uint32_t dirty; - - struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned dirty_samplers; - unsigned fp_samplers; - unsigned vp_samplers; - - uint32_t rt_enable; - struct pipe_buffer *rt[4]; - struct pipe_buffer *zeta; - - struct { - struct pipe_buffer *buffer; - uint32_t format; - } tex[16]; - - unsigned vb_enable; - struct { - struct pipe_buffer *buffer; - unsigned delta; - } vb[16]; - - struct { - struct nouveau_resource *exec_heap; - struct nouveau_resource *data_heap; - - struct nv30_vertex_program *active; - - struct nv30_vertex_program *current; - struct pipe_buffer *constant_buf; - } vertprog; - - struct { - struct nv30_fragment_program *active; - - struct nv30_fragment_program *current; - struct pipe_buffer *constant_buf; - } fragprog; - - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; -}; - -static INLINE struct nv30_context * -nv30_context(struct pipe_context *pipe) -{ - return (struct nv30_context *)pipe; -} - -extern void nv30_init_state_functions(struct nv30_context *nv30); -extern void nv30_init_surface_functions(struct nv30_context *nv30); -extern void nv30_init_miptree_functions(struct nv30_context *nv30); -extern void nv30_init_query_functions(struct nv30_context *nv30); - -/* nv30_draw.c */ -extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); - -/* nv30_vertprog.c */ -extern void nv30_vertprog_translate(struct nv30_context *, - struct nv30_vertex_program *); -extern void nv30_vertprog_bind(struct nv30_context *, - struct nv30_vertex_program *); -extern void nv30_vertprog_destroy(struct nv30_context *, - struct nv30_vertex_program *); - -/* nv30_fragprog.c */ -extern void nv30_fragprog_translate(struct nv30_context *, - struct nv30_fragment_program *); -extern void nv30_fragprog_bind(struct nv30_context *, - struct nv30_fragment_program *); -extern void nv30_fragprog_destroy(struct nv30_context *, - struct nv30_fragment_program *); - -/* nv30_fragtex.c */ -extern void nv30_fragtex_bind(struct nv30_context *); - -/* nv30_state.c and friends */ -extern void nv30_emit_hw_state(struct nv30_context *nv30); -extern void nv30_state_tex_update(struct nv30_context *nv30); - -/* nv30_vbo.c */ -extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern boolean nv30_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); - -/* nv30_clear.c */ -extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue); - -#endif diff --git a/src/mesa/pipe/nv30/nv30_draw.c b/src/mesa/pipe/nv30/nv30_draw.c deleted file mode 100644 index bdeb975ca1e..00000000000 --- a/src/mesa/pipe/nv30/nv30_draw.c +++ /dev/null @@ -1,62 +0,0 @@ -#include "pipe/draw/draw_private.h" -#include "pipe/p_util.h" - -#include "nv30_context.h" - -struct nv30_draw_stage { - struct draw_stage draw; - struct nv30_context *nv30; -}; - -static void -nv30_draw_point(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_line(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_flush(struct draw_stage *draw, unsigned flags) -{ -} - -static void -nv30_draw_reset_stipple_counter(struct draw_stage *draw) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_destroy(struct draw_stage *draw) -{ - free(draw); -} - -struct draw_stage * -nv30_draw_render_stage(struct nv30_context *nv30) -{ - struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage); - - nv30draw->nv30 = nv30; - nv30draw->draw.draw = nv30->draw; - nv30draw->draw.point = nv30_draw_point; - nv30draw->draw.line = nv30_draw_line; - nv30draw->draw.tri = nv30_draw_tri; - nv30draw->draw.flush = nv30_draw_flush; - nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter; - nv30draw->draw.destroy = nv30_draw_destroy; - - return &nv30draw->draw; -} - diff --git a/src/mesa/pipe/nv30/nv30_fragprog.c b/src/mesa/pipe/nv30/nv30_fragprog.c deleted file mode 100644 index 0db1ac868c7..00000000000 --- a/src/mesa/pipe/nv30/nv30_fragprog.c +++ /dev/null @@ -1,835 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" - -#include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" - -#include "nv30_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV30_FP_OP_COND_TR -#include "nv30_shader.h" - -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) -#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) - -#define MAX_CONSTS 128 -#define MAX_IMM 32 -struct nv30_fpc { - struct nv30_fragment_program *fp; - - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - - int high_temp; - int temp_temp_count; - int num_regs; - - uint depth_id; - uint colour_id; - - unsigned inst_offset; - - struct { - int pipe; - float vals[4]; - } consts[MAX_CONSTS]; - int nr_consts; - - struct nv30_sreg imm[MAX_IMM]; - unsigned nr_imm; -}; - -static INLINE struct nv30_sreg -temp(struct nv30_fpc *fpc) -{ - int idx; - - idx = fpc->temp_temp_count++; - idx += fpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); -} - -static INLINE struct nv30_sreg -constant(struct nv30_fpc *fpc, int pipe, float vals[4]) -{ - int idx; - - if (fpc->nr_consts == MAX_CONSTS) - assert(0); - idx = fpc->nr_consts++; - - fpc->consts[idx].pipe = pipe; - if (pipe == -1) - memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv30_sr(NV30SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ - (d), (m), (s0), (s1), (s2)) -#define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ - (d), (m), (s0), none, none) - -static void -grow_insns(struct nv30_fpc *fpc, int size) -{ - struct nv30_fragment_program *fp = fpc->fp; - - fp->insn_len += size; - fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); -} - -static void -emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - uint32_t sr = 0; - - switch (src.type) { - case NV30SR_INPUT: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); - break; - case NV30SR_OUTPUT: - sr |= NV30_FP_REG_SRC_HALF; - /* fall-through */ - case NV30SR_TEMP: - sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV30_FP_REG_SRC_SHIFT); - break; - case NV30SR_CONST: - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - if (fpc->consts[src.index].pipe >= 0) { - struct nv30_fragment_program_data *fpd; - - fp->consts = realloc(fp->consts, ++fp->nr_consts * - sizeof(*fpd)); - fpd = &fp->consts[fp->nr_consts - 1]; - fpd->offset = fpc->inst_offset + 4; - fpd->index = fpc->consts[src.index].pipe; - memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); - } else { - memcpy(&fp->insn[fpc->inst_offset + 4], - fpc->consts[src.index].vals, - sizeof(uint32_t) * 4); - } - - sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); - break; - case NV30SR_NONE: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_FP_REG_NEGATE; - - if (src.abs) - hw[1] |= (1 << (29 + pos)); - - sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); - - hw[pos + 1] |= sr; -} - -static void -emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - - switch (dst.type) { - case NV30SR_TEMP: - if (fpc->num_regs < (dst.index + 1)) - fpc->num_regs = dst.index + 1; - break; - case NV30SR_OUTPUT: - if (dst.index == 1) { - fp->fp_control |= 0xe; - } else { - hw[0] |= NV30_FP_OP_OUT_REG_HALF; - } - break; - case NV30SR_NONE: - hw[0] |= (1 << 30); - break; - default: - assert(0); - } - - hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); -} - -static void -nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw; - - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - memset(hw, 0, sizeof(uint32_t) * 4); - - if (op == NV30_FP_OP_OPCODE_KIL) - fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; - hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); - - if (sat) - hw[0] |= NV30_FP_OP_OUT_SAT; - - if (dst.cc_update) - hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); - - emit_dst(fpc, dst); - emit_src(fpc, 0, s0); - emit_src(fpc, 1, s1); - emit_src(fpc, 2, s2); -} - -static void -nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) -{ - struct nv30_fragment_program *fp = fpc->fp; - - nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - - fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); - fp->samplers |= (1 << unit); -} - -static INLINE struct nv30_sreg -tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) -{ - struct nv30_sreg src; - - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); - break; - case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); - break; - case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; - break; - case TGSI_FILE_TEMPORARY: - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); - if (fpc->high_temp < src.index) - fpc->high_temp = src.index; - break; - /* This is clearly insane, but gallium hands us shaders like this. - * Luckily fragprog results are just temp regs.. - */ - case TGSI_FILE_OUTPUT: - if (fsrc->SrcRegister.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); - else - return nv30_sr(NV30SR_OUTPUT, 1); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; - return src; -} - -static INLINE struct nv30_sreg -tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - int idx; - - switch (fdst->DstRegister.File) { - case TGSI_FILE_OUTPUT: - if (fdst->DstRegister.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); - else - return nv30_sr(NV30SR_OUTPUT, 1); - break; - case TGSI_FILE_TEMPORARY: - idx = fdst->DstRegister.Index + 1; - if (fpc->high_temp < idx) - fpc->high_temp = idx; - return nv30_sr(NV30SR_TEMP, idx); - case TGSI_FILE_NULL: - return nv30_sr(NV30SR_NONE, 0); - default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); - return nv30_sr(NV30SR_NONE, 0); - } -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv30_sreg *src) -{ - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; - uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, - fsrc->SrcRegisterExtSwz.NegateY, - fsrc->SrcRegisterExtSwz.NegateZ, - fsrc->SrcRegisterExtSwz.NegateW }; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - mask |= (1 << c); - break; - case TGSI_EXTSWIZZLE_ZERO: - zero_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; - case TGSI_EXTSWIZZLE_ONE: - one_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; - default: - assert(0); - } - - if (!tgsi.negate && neg[c]) - neg_mask |= (1 << c); - } - - if (mask == MASK_ALL && !neg_mask) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - if (zero_mask) - arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); - - if (one_mask) - arith(fpc, 0, STR, *src, one_mask, *src, none, none); - - if (neg_mask) { - struct nv30_sreg one = temp(fpc); - arith(fpc, 0, STR, one, neg_mask, one, none, none); - arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); - } - - return FALSE; -} - -static boolean -nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, - const struct tgsi_full_instruction *finst) -{ - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg src[3], dst, tmp; - int mask, sat, unit; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - fpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(fpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->FullSrcRegisters[i]; - - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - NOUVEAU_MSG("extra src attr %d\n", - fsrc->SrcRegister.Index); - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; - break; - case TGSI_FILE_OUTPUT: - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); - sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); - tmp.cc_update = 1; - arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV30_VP_INST_COND_LT; - arith(fpc, sat, MOV, dst, mask, src[1], none, none); - break; - case TGSI_OPCODE_COS: - arith(fpc, sat, COS, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); - arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), - swz(src[1], W, W, W, W), none); - break; - case TGSI_OPCODE_DST: - arith(fpc, sat, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(fpc, sat, EX2, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FLR: - arith(fpc, sat, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(fpc, sat, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_KIL: - arith(fpc, 0, KIL, none, 0, none, none, none); - break; - case TGSI_OPCODE_KILP: - dst = nv30_sr(NV30SR_NONE, 0); - dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; - arith(fpc, 0, KIL, dst, 0, none, none, none); - break; - case TGSI_OPCODE_LG2: - arith(fpc, sat, LG2, dst, mask, src[0], none, none); - break; -// case TGSI_OPCODE_LIT: - case TGSI_OPCODE_LRP: - tmp = temp(fpc); - arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); - arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); - break; - case TGSI_OPCODE_MAD: - arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(fpc, sat, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(fpc); - arith(fpc, 0, LG2, tmp, MASK_X, - swz(src[0], X, X, X, X), none, none); - arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(fpc, sat, EX2, dst, mask, - swz(tmp, X, X, X, X), none, none); - break; - case TGSI_OPCODE_RCP: - arith(fpc, sat, RCP, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_RET: - assert(0); - break; - case TGSI_OPCODE_RFL: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); - arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); - arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, - swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); - arith(fpc, sat, MAD, dst, mask, - swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); - break; - case TGSI_OPCODE_RSQ: - tmp = temp(fpc); - arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, - abs(swz(src[0], X, X, X, X)), none, none); - arith(fpc, sat, EX2, dst, mask, - neg(swz(tmp, X, X, X, X)), none, none); - break; - case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - break; - case TGSI_OPCODE_SIN: - arith(fpc, sat, SIN, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_SGE: - arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); - break; - case TGSI_OPCODE_TEX: - if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide == - TGSI_EXTSWIZZLE_W) { - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); - } else - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXB: - tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_XPD: - tmp = temp(fpc); - arith(fpc, 0, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_FP_OP_INPUT_SRC_POSITION; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { - hw = NV30_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.SemanticIndex == 1) { - hw = NV30_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_FP_OP_INPUT_SRC_FOGC; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad input semantic\n"); - return FALSE; - } - - fpc->attrib_map[fdec->u.DeclarationRange.First] = hw; - return TRUE; -} - -static boolean -nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - switch (fdec->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->u.DeclarationRange.First; - break; - case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->u.DeclarationRange.First; - break; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - return TRUE; -} - -void -nv30_fragprog_translate(struct nv30_context *nv30, - struct nv30_fragment_program *fp) -{ - struct tgsi_parse_context parse; - struct nv30_fpc *fpc = NULL; - - fpc = CALLOC(1, sizeof(struct nv30_fpc)); - if (!fpc) - return; - fpc->fp = fp; - fpc->high_temp = -1; - fpc->num_regs = 2; - - tgsi_parse_init(&parse, fp->pipe->tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) - goto out_err; - break; - case TGSI_FILE_OUTPUT: - if (!nv30_fragprog_parse_decl_output(fpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *imm; - float vals[4]; - int i; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(fpc->nr_imm < MAX_IMM); - - for (i = 0; i < 4; i++) - vals[i] = imm->u.ImmediateFloat32[i].Float; - fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - - finst = &parse.FullToken.FullInstruction; - if (!nv30_fragprog_parse_instruction(fpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - fp->fp_control |= (fpc->num_regs-1)/2; - fp->fp_reg_control = (1<<16)|0x4; - - /* Terminate final instruction */ - fp->insn[fpc->inst_offset] |= 0x00000001; - - /* Append NOP + END instruction, may or may not be necessary. */ - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - fp->insn[fpc->inst_offset + 0] = 0x00000001; - fp->insn[fpc->inst_offset + 1] = 0x00000000; - fp->insn[fpc->inst_offset + 2] = 0x00000000; - fp->insn[fpc->inst_offset + 3] = 0x00000000; - - fp->translated = TRUE; - fp->on_hw = FALSE; -out_err: - tgsi_parse_free(&parse); - free(fpc); -} - -void -nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) -{ - struct pipe_winsys *ws = nv30->pipe.winsys; - int i; - - if (!fp->translated) { - nv30_fragprog_translate(nv30, fp); - if (!fp->translated) - assert(0); - } - - if (fp->nr_consts) { - float *map = ws->buffer_map(ws, nv30->fragprog.constant_buf, - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < fp->nr_consts; i++) { - struct nv30_fragment_program_data *fpd = &fp->consts[i]; - uint32_t *p = &fp->insn[fpd->offset]; - uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; - - if (!memcmp(p, cb, 4 * sizeof(float))) - continue; - memcpy(p, cb, 4 * sizeof(float)); - fp->on_hw = 0; - } - ws->buffer_unmap(ws, nv30->fragprog.constant_buf); - } - - if (!fp->on_hw) { - const uint32_t le = 1; - uint32_t *map; - - if (!fp->buffer) - fp->buffer = ws->buffer_create(ws, 0x100, 0, - fp->insn_len * 4); - map = ws->buffer_map(ws, fp->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - } -#endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - ws->buffer_unmap(ws, fp->buffer); - fp->on_hw = TRUE; - } - - BEGIN_RING(rankine, NV34TCL_FP_CONTROL, 1); - OUT_RING (fp->fp_control); - BEGIN_RING(rankine, NV34TCL_FP_REG_CONTROL, 1); - OUT_RING (fp->fp_reg_control); - - nv30->fragprog.active = fp; -} - -void -nv30_fragprog_destroy(struct nv30_context *nv30, - struct nv30_fragment_program *fp) -{ - if (fp->insn_len) - free(fp->insn); -} - diff --git a/src/mesa/pipe/nv30/nv30_fragtex.c b/src/mesa/pipe/nv30/nv30_fragtex.c deleted file mode 100644 index 45ee6db8d6a..00000000000 --- a/src/mesa/pipe/nv30/nv30_fragtex.c +++ /dev/null @@ -1,160 +0,0 @@ -#include "nv30_context.h" - -static INLINE int log2i(int i) -{ - int r = 0; - - if (i & 0xffff0000) { - i >>= 16; - r += 16; - } - if (i & 0x0000ff00) { - i >>= 8; - r += 8; - } - if (i & 0x000000f0) { - i >>= 4; - r += 4; - } - if (i & 0x0000000c) { - i >>= 2; - r += 2; - } - if (i & 0x00000002) { - r += 1; - } - return r; -} - -#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ -{ \ - TRUE, \ - PIPE_FORMAT_##m, \ - NV34TCL_TX_FORMAT_FORMAT_##tf, \ - (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ - NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ - NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ - NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \ -} - -struct nv30_texture_format { - boolean defined; - uint pipe; - int format; - int swizzle; -}; - -static struct nv30_texture_format -nv30_texture_formats[] = { - _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), - _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), - _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), -// _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), - _(U_L8 , L8 , S1, S1, S1, ONE, X, X, X, X), - _(U_A8 , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), - _(U_I8 , L8 , S1, S1, S1, S1, X, X, X, X), - _(U_A8_L8 , A8L8 , S1, S1, S1, S1, X, X, X, Y), -// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), -// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), -// _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT1 , 0x86, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT3 , 0x87, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT5 , 0x88, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), - {}, -}; - -static struct nv30_texture_format * -nv30_fragtex_format(uint pipe_format) -{ - struct nv30_texture_format *tf = nv30_texture_formats; - - while (tf->defined) { - if (tf->pipe == pipe_format) - return tf; - tf++; - } - - return NULL; -} - - -static void -nv30_fragtex_build(struct nv30_context *nv30, int unit) -{ - struct nv30_sampler_state *ps = nv30->tex_sampler[unit]; - struct nv30_miptree *nv30mt = nv30->tex_miptree[unit]; - struct pipe_texture *pt = &nv30mt->base; - struct nv30_texture_format *tf; - uint32_t txf, txs, txp; - int swizzled = 0; /*XXX: implement in region code? */ - - tf = nv30_fragtex_format(pt->format); - if (!tf || !tf->defined) { - NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); - return; - } - - txf = tf->format << 8; - txf |= (pt->last_level + 1) << 16; - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; - txf |= 8; - - switch (pt->target) { -/* case PIPE_TEXTURE_CUBE: - txf |= NV34TCL_TEX_FORMAT_CUBIC;*/ - /* fall-through */ - case PIPE_TEXTURE_2D: - txf |= (2<<4); - break; - case PIPE_TEXTURE_3D: - txf |= (3<<4); - break; - case PIPE_TEXTURE_1D: - txf |= (1<<4); - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; - } - - txs = tf->swizzle; - - BEGIN_RING(rankine, NV34TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv30mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv30mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << 16) | pt->height[0]); - OUT_RING (ps->bcol); -} - -void -nv30_fragtex_bind(struct nv30_context *nv30) -{ - struct nv30_fragment_program *fp = nv30->fragprog.active; - unsigned samplers, unit; - - samplers = nv30->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - BEGIN_RING(rankine, NV34TCL_TX_ENABLE(unit), 1); - OUT_RING (0); - } - - samplers = nv30->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - nv30_fragtex_build(nv30, unit); - } - - nv30->fp_samplers = fp->samplers; -} - diff --git a/src/mesa/pipe/nv30/nv30_miptree.c b/src/mesa/pipe/nv30/nv30_miptree.c deleted file mode 100644 index 5fb89f4cfdc..00000000000 --- a/src/mesa/pipe/nv30/nv30_miptree.c +++ /dev/null @@ -1,105 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" - -#include "nv30_context.h" - -static void -nv30_miptree_layout(struct nv30_miptree *nv30mt) -{ - struct pipe_texture *pt = &nv30mt->base; - boolean swizzled = FALSE; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; - uint offset = 0; - int nr_faces, l, f; - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else - if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth[0]; - } else { - nr_faces = 1; - } - - for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; - - if (swizzled) - nv30mt->level[l].pitch = pt->width[l] * pt->cpp; - else - nv30mt->level[l].pitch = pt->width[0] * pt->cpp; - nv30mt->level[l].pitch = (nv30mt->level[l].pitch + 63) & ~63; - - nv30mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); - - } - - for (f = 0; f < nr_faces; f++) { - for (l = 0; l <= pt->last_level; l++) { - nv30mt->level[l].image_offset[f] = offset; - offset += nv30mt->level[l].pitch * pt->height[l]; - } - } - - nv30mt->total_size = offset; -} - -static void -nv30_miptree_create(struct pipe_context *pipe, struct pipe_texture **pt) -{ - struct pipe_winsys *ws = pipe->winsys; - struct nv30_miptree *nv30mt; - - nv30mt = realloc(*pt, sizeof(struct nv30_miptree)); - if (!nv30mt) - return; - *pt = NULL; - - nv30_miptree_layout(nv30mt); - - nv30mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, - nv30mt->total_size); - if (!nv30mt->buffer) { - free(nv30mt); - return; - } - - *pt = &nv30mt->base; -} - -static void -nv30_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - struct pipe_winsys *ws = pipe->winsys; - struct pipe_texture *mt = *pt; - - *pt = NULL; - if (--mt->refcount <= 0) { - struct nv30_miptree *nv30mt = (struct nv30_miptree *)mt; - int l; - - pipe_buffer_reference(ws, &nv30mt->buffer, NULL); - for (l = 0; l <= mt->last_level; l++) { - if (nv30mt->level[l].image_offset) - free(nv30mt->level[l].image_offset); - } - free(nv30mt); - } -} - -void -nv30_init_miptree_functions(struct nv30_context *nv30) -{ - nv30->pipe.texture_create = nv30_miptree_create; - nv30->pipe.texture_release = nv30_miptree_release; -} - diff --git a/src/mesa/pipe/nv30/nv30_query.c b/src/mesa/pipe/nv30/nv30_query.c deleted file mode 100644 index 71fdcfa24df..00000000000 --- a/src/mesa/pipe/nv30/nv30_query.c +++ /dev/null @@ -1,113 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_util.h" - -#include "nv30_context.h" - -struct nv30_query { - struct nouveau_resource *object; - unsigned type; - boolean ready; - uint64_t result; -}; - -static inline struct nv30_query * -nv30_query(struct pipe_query *pipe) -{ - return (struct nv30_query *)pipe; -} - -static struct pipe_query * -nv30_query_create(struct pipe_context *pipe, unsigned query_type) -{ - struct nv30_query *q; - - q = CALLOC(1, sizeof(struct nv30_query)); - q->type = query_type; - - return (struct pipe_query *)q; -} - -static void -nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_query *q = nv30_query(pq); - - if (q->object) - nv30->nvws->res_free(&q->object); - free(q); -} - -static void -nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_query *q = nv30_query(pq); - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (nv30->nvws->res_alloc(nv30->query_heap, 1, NULL, &q->object)) - assert(0); - nv30->nvws->notifier_reset(nv30->query, q->object->start); - - BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (1); - - q->ready = FALSE; -} - -static void -nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_query *q = nv30_query(pq); - - BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(); -} - -static boolean -nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64 *result) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_query *q = nv30_query(pq); - struct nouveau_winsys *nvws = nv30->nvws; - - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (!q->ready) { - unsigned status; - - status = nvws->notifier_status(nv30->query, q->object->start); - if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { - if (wait == FALSE) - return FALSE; - nvws->notifier_wait(nv30->query, q->object->start, - NV_NOTIFY_STATE_STATUS_COMPLETED, - 0); - } - - q->result = nvws->notifier_retval(nv30->query, - q->object->start); - q->ready = TRUE; - nvws->res_free(&q->object); - } - - *result = q->result; - return TRUE; -} - -void -nv30_init_query_functions(struct nv30_context *nv30) -{ - nv30->pipe.create_query = nv30_query_create; - nv30->pipe.destroy_query = nv30_query_destroy; - nv30->pipe.begin_query = nv30_query_begin; - nv30->pipe.end_query = nv30_query_end; - nv30->pipe.get_query_result = nv30_query_result; -} diff --git a/src/mesa/pipe/nv30/nv30_shader.h b/src/mesa/pipe/nv30/nv30_shader.h deleted file mode 100644 index dd3a36f78f3..00000000000 --- a/src/mesa/pipe/nv30/nv30_shader.h +++ /dev/null @@ -1,490 +0,0 @@ -#ifndef __NV30_SHADER_H__ -#define __NV30_SHADER_H__ - -/* Vertex programs instruction set - * - * 128bit opcodes, split into 4 32-bit ones for ease of use. - * - * Non-native instructions - * ABS - MOV + NV40_VP_INST0_DEST_ABS - * POW - EX2 + MUL + LG2 - * SUB - ADD, second source negated - * SWZ - MOV - * XPD - - * - * Register access - * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) - * - Only one CONST can be accessed per-instruction (move extras into TEMPs) - * - * Relative Addressing - * According to the value returned for - * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB - * - * there are only two address registers available. The destination in the - * ARL instruction is set to TEMP <n> (The temp isn't actually written). - * - * When using vanilla ARB_v_p, the proprietary driver will squish both the - * available ADDRESS regs into the first hardware reg in the X and Y - * components. - * - * To use an address reg as an index into consts, the CONST_SRC is set to - * (const_base + offset) and INDEX_CONST is set. - * - * To access the second address reg use ADDR_REG_SELECT_1. A particular - * component of the address regs is selected with ADDR_SWZ. - * - * Only one address register can be accessed per instruction. - * - * Conditional execution (see NV_vertex_program{2,3} for details) Conditional - * execution of an instruction is enabled by setting COND_TEST_ENABLE, and - * selecting the condition which will allow the test to pass with - * COND_{FL,LT,...}. It is possible to swizzle the values in the condition - * register, which allows for testing against an individual component. - * - * Branching: - * - * The BRA/CAL instructions seem to follow a slightly different opcode - * layout. The destination instruction ID (IADDR) overlaps a source field. - * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO - * command, and is incremented automatically on each UPLOAD_INST FIFO - * command. - * - * Conditional branching is achieved by using the condition tests described - * above. There doesn't appear to be dedicated looping instructions, but - * this can be done using a temp reg + conditional branching. - * - * Subroutines may be uploaded before the main program itself, but the first - * executed instruction is determined by the PROGRAM_START_ID FIFO command. - * - */ - -/* DWORD 0 */ - -#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) -#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ -#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ -#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ -#define NV30_VP_INST_VEC_RESULT (1 << 20) -#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 -#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) -#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) -#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) -#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) -#define NV30_VP_INST_COND_SHIFT 11 -#define NV30_VP_INST_COND_MASK (0x07 << 11) -# define NV30_VP_INST_COND_FL 0 /* guess */ -# define NV30_VP_INST_COND_LT 1 -# define NV30_VP_INST_COND_EQ 2 -# define NV30_VP_INST_COND_LE 3 -# define NV30_VP_INST_COND_GT 4 -# define NV30_VP_INST_COND_NE 5 -# define NV30_VP_INST_COND_GE 6 -# define NV30_VP_INST_COND_TR 7 /* guess */ -#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 -#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) -#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 -#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) -#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 -#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) -#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 -#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) -#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 -#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) -#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 -#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) -#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 -#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) - -/* DWORD 1 */ -#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 -#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) -# define NV30_VP_INST_OP_NOP 0x00 -# define NV30_VP_INST_OP_RCP 0x02 -# define NV30_VP_INST_OP_RCC 0x03 -# define NV30_VP_INST_OP_RSQ 0x04 -# define NV30_VP_INST_OP_EXP 0x05 -# define NV30_VP_INST_OP_LOG 0x06 -# define NV30_VP_INST_OP_LIT 0x07 -# define NV30_VP_INST_OP_BRA 0x09 -# define NV30_VP_INST_OP_CAL 0x0B -# define NV30_VP_INST_OP_RET 0x0C -# define NV30_VP_INST_OP_LG2 0x0D -# define NV30_VP_INST_OP_EX2 0x0E -# define NV30_VP_INST_OP_SIN 0x0F -# define NV30_VP_INST_OP_COS 0x10 -#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 -#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) -# define NV30_VP_INST_OP_NOPV 0x00 -# define NV30_VP_INST_OP_MOV 0x01 -# define NV30_VP_INST_OP_MUL 0x02 -# define NV30_VP_INST_OP_ADD 0x03 -# define NV30_VP_INST_OP_MAD 0x04 -# define NV30_VP_INST_OP_DP3 0x05 -# define NV30_VP_INST_OP_DP4 0x07 -# define NV30_VP_INST_OP_DPH 0x06 -# define NV30_VP_INST_OP_DST 0x08 -# define NV30_VP_INST_OP_MIN 0x09 -# define NV30_VP_INST_OP_MAX 0x0A -# define NV30_VP_INST_OP_SLT 0x0B -# define NV30_VP_INST_OP_SGE 0x0C -# define NV30_VP_INST_OP_ARL 0x0D -# define NV30_VP_INST_OP_FRC 0x0E -# define NV30_VP_INST_OP_FLR 0x0F -# define NV30_VP_INST_OP_SEQ 0x10 -# define NV30_VP_INST_OP_SFL 0x11 -# define NV30_VP_INST_OP_SGT 0x12 -# define NV30_VP_INST_OP_SLE 0x13 -# define NV30_VP_INST_OP_SNE 0x14 -# define NV30_VP_INST_OP_STR 0x15 -# define NV30_VP_INST_OP_SSG 0x16 -# define NV30_VP_INST_OP_ARR 0x17 -# define NV30_VP_INST_OP_ARA 0x18 -#define NV30_VP_INST_CONST_SRC_SHIFT 14 -#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) -#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ -#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ -# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ -# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ -# define NV30_VP_INST_IN_NORMAL 2 -# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ -# define NV30_VP_INST_IN_COL1 4 -# define NV30_VP_INST_IN_FOGC 5 -# define NV30_VP_INST_IN_TC0 8 -# define NV30_VP_INST_IN_TC(n) (8+n) -#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ -#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ - -/* Please note: the IADDR fields overlap other fields because they are used - * only for branch instructions. See Branching: label above - * - * DWORD 2 - */ -#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ -#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ -#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ -#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ -#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ -#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ -#define NV30_VP_INST_IADDR_SHIFT 2 -#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ - -/* DWORD 3 */ -#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ -#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ -#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 -#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) -#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 -#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) -#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 -#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) -#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ -#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ -#define NV30_VP_INST_DEST_SHIFT 2 -#define NV30_VP_INST_DEST_MASK (0x0F << 2) -# define NV30_VP_INST_DEST_POS 0 -# define NV30_VP_INST_DEST_BFC0 1 -# define NV30_VP_INST_DEST_BFC1 2 -# define NV30_VP_INST_DEST_COL0 3 -# define NV30_VP_INST_DEST_COL1 4 -# define NV30_VP_INST_DEST_FOGC 5 -# define NV30_VP_INST_DEST_PSZ 6 -# define NV30_VP_INST_DEST_TC(n) (8+n) - -#define NV30_VP_INST_LAST (1 << 0) - -/* Useful to split the source selection regs into their pieces */ -#define NV30_VP_SRC0_HIGH_SHIFT 6 -#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 -#define NV30_VP_SRC0_LOW_MASK 0x0000003F -#define NV30_VP_SRC2_HIGH_SHIFT 4 -#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 -#define NV30_VP_SRC2_LOW_MASK 0x0000000F - - -/* Source-register definition - matches NV20 exactly */ -#define NV30_VP_SRC_NEGATE (1<<14) -#define NV30_VP_SRC_SWZ_X_SHIFT 12 -#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) -#define NV30_VP_SRC_SWZ_Y_SHIFT 10 -#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) -#define NV30_VP_SRC_SWZ_Z_SHIFT 8 -#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) -#define NV30_VP_SRC_SWZ_W_SHIFT 6 -#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) -#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 -#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) -#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 -#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) -#define NV30_VP_SRC_REG_TYPE_SHIFT 0 -#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) -#define NV30_VP_SRC_REG_TYPE_TEMP 1 -#define NV30_VP_SRC_REG_TYPE_INPUT 2 -#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ - -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - * result.color == R0.xyzw - * result.depth == R1.z - * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 - * otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - * - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords. As such instructions such as: - * - * ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO - * is implemented simply by not writing to the relevant components of the destination. - * - * Conditional execution - * TODO - * - * Non-native instructions: - * LIT - * LRP - MAD+MAD - * SUB - ADD, negate second source - * RSQ - LG2 + EX2 - * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD - */ - -//== Opcode / Destination selection == -#define NV30_FP_OP_PROGRAM_END (1 << 0) -#define NV30_FP_OP_OUT_REG_SHIFT 1 -#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ -/* Needs to be set when writing outputs to get expected result.. */ -#define NV30_FP_OP_OUT_REG_HALF (1 << 7) -#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8) -#define NV30_FP_OP_OUTMASK_SHIFT 9 -#define NV30_FP_OP_OUTMASK_MASK (0xF << 9) -# define NV30_FP_OP_OUT_X (1<<9) -# define NV30_FP_OP_OUT_Y (1<<10) -# define NV30_FP_OP_OUT_Z (1<<11) -# define NV30_FP_OP_OUT_W (1<<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV30_FP_OP_INPUT_SRC_SHIFT 13 -#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13) -# define NV30_FP_OP_INPUT_SRC_POSITION 0x0 -# define NV30_FP_OP_INPUT_SRC_COL0 0x1 -# define NV30_FP_OP_INPUT_SRC_COL1 0x2 -# define NV30_FP_OP_INPUT_SRC_FOGC 0x3 -# define NV30_FP_OP_INPUT_SRC_TC0 0x4 -# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n) -#define NV30_FP_OP_TEX_UNIT_SHIFT 17 -#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ -#define NV30_FP_OP_PRECISION_SHIFT 22 -#define NV30_FP_OP_PRECISION_MASK (3 << 22) -# define NV30_FP_PRECISION_FP32 0 -# define NV30_FP_PRECISION_FP16 1 -# define NV30_FP_PRECISION_FX12 2 -#define NV30_FP_OP_OPCODE_SHIFT 24 -#define NV30_FP_OP_OPCODE_MASK (0x3F << 24) -# define NV30_FP_OP_OPCODE_NOP 0x00 -# define NV30_FP_OP_OPCODE_MOV 0x01 -# define NV30_FP_OP_OPCODE_MUL 0x02 -# define NV30_FP_OP_OPCODE_ADD 0x03 -# define NV30_FP_OP_OPCODE_MAD 0x04 -# define NV30_FP_OP_OPCODE_DP3 0x05 -# define NV30_FP_OP_OPCODE_DP4 0x06 -# define NV30_FP_OP_OPCODE_DST 0x07 -# define NV30_FP_OP_OPCODE_MIN 0x08 -# define NV30_FP_OP_OPCODE_MAX 0x09 -# define NV30_FP_OP_OPCODE_SLT 0x0A -# define NV30_FP_OP_OPCODE_SGE 0x0B -# define NV30_FP_OP_OPCODE_SLE 0x0C -# define NV30_FP_OP_OPCODE_SGT 0x0D -# define NV30_FP_OP_OPCODE_SNE 0x0E -# define NV30_FP_OP_OPCODE_SEQ 0x0F -# define NV30_FP_OP_OPCODE_FRC 0x10 -# define NV30_FP_OP_OPCODE_FLR 0x11 -# define NV30_FP_OP_OPCODE_KIL 0x12 -# define NV30_FP_OP_OPCODE_PK4B 0x13 -# define NV30_FP_OP_OPCODE_UP4B 0x14 -# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ -# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ -# define NV30_FP_OP_OPCODE_TEX 0x17 -# define NV30_FP_OP_OPCODE_TXP 0x18 -# define NV30_FP_OP_OPCODE_TXD 0x19 -# define NV30_FP_OP_OPCODE_RCP 0x1A -# define NV30_FP_OP_OPCODE_RSQ 0x1B -# define NV30_FP_OP_OPCODE_EX2 0x1C -# define NV30_FP_OP_OPCODE_LG2 0x1D -# define NV30_FP_OP_OPCODE_LIT 0x1E -# define NV30_FP_OP_OPCODE_LRP 0x1F -# define NV30_FP_OP_OPCODE_STR 0x20 -# define NV30_FP_OP_OPCODE_SFL 0x21 -# define NV30_FP_OP_OPCODE_COS 0x22 -# define NV30_FP_OP_OPCODE_SIN 0x23 -# define NV30_FP_OP_OPCODE_PK2H 0x24 -# define NV30_FP_OP_OPCODE_UP2H 0x25 -# define NV30_FP_OP_OPCODE_POW 0x26 -# define NV30_FP_OP_OPCODE_PK4UB 0x27 -# define NV30_FP_OP_OPCODE_UP4UB 0x28 -# define NV30_FP_OP_OPCODE_PK2US 0x29 -# define NV30_FP_OP_OPCODE_UP2US 0x2A -# define NV30_FP_OP_OPCODE_DP2A 0x2E -# define NV30_FP_OP_OPCODE_TXB 0x31 -# define NV30_FP_OP_OPCODE_RFL 0x36 -# define NV30_FP_OP_OPCODE_DIV 0x3A -#define NV30_FP_OP_OUT_SAT (1 << 31) - -/* high order bits of SRC0 */ -#define NV30_FP_OP_OUT_ABS (1 << 29) -#define NV30_FP_OP_COND_SWZ_W_SHIFT 27 -#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27) -#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25 -#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25) -#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23 -#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23) -#define NV30_FP_OP_COND_SWZ_X_SHIFT 21 -#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21) -#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21 -#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) -#define NV30_FP_OP_COND_SHIFT 18 -#define NV30_FP_OP_COND_MASK (0x07 << 18) -# define NV30_FP_OP_COND_FL 0 -# define NV30_FP_OP_COND_LT 1 -# define NV30_FP_OP_COND_EQ 2 -# define NV30_FP_OP_COND_LE 3 -# define NV30_FP_OP_COND_GT 4 -# define NV30_FP_OP_COND_NE 5 -# define NV30_FP_OP_COND_GE 6 -# define NV30_FP_OP_COND_TR 7 - -/* high order bits of SRC1 */ -#define NV30_FP_OP_DST_SCALE_SHIFT 28 -#define NV30_FP_OP_DST_SCALE_MASK (3 << 28) -#define NV30_FP_OP_DST_SCALE_1X 0 -#define NV30_FP_OP_DST_SCALE_2X 1 -#define NV30_FP_OP_DST_SCALE_4X 2 -#define NV30_FP_OP_DST_SCALE_8X 3 -#define NV30_FP_OP_DST_SCALE_INV_2X 5 -#define NV30_FP_OP_DST_SCALE_INV_4X 6 -#define NV30_FP_OP_DST_SCALE_INV_8X 7 - - -/* high order bits of SRC2 */ -#define NV30_FP_OP_INDEX_INPUT (1 << 30) - -//== Register selection == -#define NV30_FP_REG_TYPE_SHIFT 0 -#define NV30_FP_REG_TYPE_MASK (3 << 0) -# define NV30_FP_REG_TYPE_TEMP 0 -# define NV30_FP_REG_TYPE_INPUT 1 -# define NV30_FP_REG_TYPE_CONST 2 -#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */ -#define NV30_FP_REG_SRC_MASK (31 << 2) -#define NV30_FP_REG_SRC_HALF (1 << 8) -#define NV30_FP_REG_SWZ_ALL_SHIFT 9 -#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9) -#define NV30_FP_REG_SWZ_X_SHIFT 9 -#define NV30_FP_REG_SWZ_X_MASK (3 << 9) -#define NV30_FP_REG_SWZ_Y_SHIFT 11 -#define NV30_FP_REG_SWZ_Y_MASK (3 << 11) -#define NV30_FP_REG_SWZ_Z_SHIFT 13 -#define NV30_FP_REG_SWZ_Z_MASK (3 << 13) -#define NV30_FP_REG_SWZ_W_SHIFT 15 -#define NV30_FP_REG_SWZ_W_MASK (3 << 15) -# define NV30_FP_SWIZZLE_X 0 -# define NV30_FP_SWIZZLE_Y 1 -# define NV30_FP_SWIZZLE_Z 2 -# define NV30_FP_SWIZZLE_W 3 -#define NV30_FP_REG_NEGATE (1 << 17) - -#define NV30SR_NONE 0 -#define NV30SR_OUTPUT 1 -#define NV30SR_INPUT 2 -#define NV30SR_TEMP 3 -#define NV30SR_CONST 4 - -struct nv30_sreg { - int type; - int index; - - int dst_scale; - - int negate; - int abs; - int swz[4]; - - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; -}; - -static INLINE struct nv30_sreg -nv30_sr(int type, int index) -{ - struct nv30_sreg temp = { - .type = type, - .index = index, - .dst_scale = DEF_SCALE, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, - .cc_update = 0, - .cc_update_reg = 0, - .cc_test = DEF_CTEST, - .cc_test_reg = 0, - .cc_swz = { 0, 1, 2, 3 }, - }; - return temp; -} - -static INLINE struct nv30_sreg -nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) -{ - struct nv30_sreg dst = src; - - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; - return dst; -} - -static INLINE struct nv30_sreg -nv30_sr_neg(struct nv30_sreg src) -{ - src.negate = !src.negate; - return src; -} - -static INLINE struct nv30_sreg -nv30_sr_abs(struct nv30_sreg src) -{ - src.abs = 1; - return src; -} - -static INLINE struct nv30_sreg -nv30_sr_scale(struct nv30_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} - -#endif diff --git a/src/mesa/pipe/nv30/nv30_state.c b/src/mesa/pipe/nv30/nv30_state.c deleted file mode 100644 index 53368561e07..00000000000 --- a/src/mesa/pipe/nv30/nv30_state.c +++ /dev/null @@ -1,739 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" - -#include "nv30_context.h" -#include "nv30_state.h" - -static void * -nv30_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nv30_blend_state *cb; - - cb = malloc(sizeof(struct nv30_blend_state)); - - cb->b_enable = cso->blend_enable ? 1 : 0; - cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | - (nvgl_blend_func(cso->rgb_src_factor))); - cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | - (nvgl_blend_func(cso->rgb_dst_factor))); - cb->b_eqn = ((nvgl_blend_eqn(cso->alpha_func) << 16) | - (nvgl_blend_eqn(cso->rgb_func))); - - cb->l_enable = cso->logicop_enable ? 1 : 0; - cb->l_op = nvgl_logicop_func(cso->logicop_func); - - cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | - ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | - ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | - ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); - - cb->d_enable = cso->dither ? 1 : 0; - - return (void *)cb; -} - -static void -nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_blend_state *cb = hwcso; - - BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1); - OUT_RING (cb->d_enable); - - BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (cb->b_enable); - OUT_RING (cb->b_srcfunc); - OUT_RING (cb->b_dstfunc); - BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_EQUATION, 1); - OUT_RING (cb->b_eqn); - - BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1); - OUT_RING (cb->c_mask); - - BEGIN_RING(rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (cb->l_enable); - OUT_RING (cb->l_op); -} - -static void -nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV34TCL_TX_WRAP_S_CLAMP; - break; -/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; - break;*/ - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - } - - return ret >> NV34TCL_TX_WRAP_S_SHIFT; -} - -static void * -nv30_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nv30_sampler_state *ps; - uint32_t filter = 0; - - ps = malloc(sizeof(struct nv30_sampler_state)); - - ps->fmt = 0; - if (!cso->normalized_coords) - ps->fmt |= NV34TCL_TX_FORMAT_RECT; - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); - - ps->en = 0; - if (cso->max_anisotropy >= 2.0) { - /* no idea, binary driver sets it, works without it.. meh.. */ - ps->wrap |= (1 << 5); - -/* if (cso->max_anisotropy >= 16.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; - } else { - ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; - }*/ - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; - break; - } - break; - } - - ps->filt = filter; - -/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - }*/ - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - -static void -nv30_sampler_state_bind(struct pipe_context *pipe, unsigned unit, - void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_sampler_state *ps = hwcso; - - nv30->tex_sampler[unit] = ps; - nv30->dirty_samplers |= (1 << unit); -} - -static void -nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - -static void -nv30_set_sampler_texture(struct pipe_context *pipe, unsigned unit, - struct pipe_texture *miptree) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->tex_miptree[unit] = (struct nv30_miptree *)miptree; - nv30->dirty_samplers |= (1 << unit); -} - -static void * -nv30_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nv30_rasterizer_state *rs; - int i; - - /*XXX: ignored: - * light_twoside - * offset_cw/ccw -nohw - * scissor - * point_smooth -nohw - * multisample - * offset_units / offset_scale - */ - rs = malloc(sizeof(struct nv30_rasterizer_state)); - - rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; - - rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; - rs->line_smooth_en = cso->line_smooth ? 1 : 0; - rs->line_stipple_en = cso->line_stipple_enable ? 1 : 0; - rs->line_stipple = (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor; - - rs->point_size = *(uint32_t*)&cso->point_size; - - rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; - rs->poly_stipple_en = cso->poly_stipple_enable ? 1 : 0; - - if (cso->front_winding == PIPE_WINDING_CCW) { - rs->front_face = NV34TCL_FRONT_FACE_CCW; - rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); - rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); - } else { - rs->front_face = NV34TCL_FRONT_FACE_CW; - rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); - rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); - } - - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - rs->cull_face_en = 1; - if (cso->front_winding == PIPE_WINDING_CCW) - rs->cull_face = NV34TCL_CULL_FACE_FRONT; - else - rs->cull_face = NV34TCL_CULL_FACE_BACK; - break; - case PIPE_WINDING_CW: - rs->cull_face_en = 1; - if (cso->front_winding == PIPE_WINDING_CW) - rs->cull_face = NV34TCL_CULL_FACE_FRONT; - else - rs->cull_face = NV34TCL_CULL_FACE_BACK; - break; - case PIPE_WINDING_BOTH: - rs->cull_face_en = 1; - rs->cull_face = NV34TCL_CULL_FACE_FRONT_AND_BACK; - break; - case PIPE_WINDING_NONE: - default: - rs->cull_face_en = 0; - rs->cull_face = 0; - break; - } - - if (cso->point_sprite) { - rs->point_sprite = (1 << 0); - for (i = 0; i < 8; i++) { - if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) - rs->point_sprite |= (1 << (8 + i)); - } - } else { - rs->point_sprite = 0; - } - - return (void *)rs; -} - -static void -nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_rasterizer_state *rs = hwcso; - - BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1); - OUT_RING (rs->shade_model); - - BEGIN_RING(rankine, NV34TCL_LINE_WIDTH, 2); - OUT_RING (rs->line_width); - OUT_RING (rs->line_smooth_en); - BEGIN_RING(rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); - OUT_RING (rs->line_stipple_en); - OUT_RING (rs->line_stipple); - - BEGIN_RING(rankine, NV34TCL_POINT_SIZE, 1); - OUT_RING (rs->point_size); - - BEGIN_RING(rankine, NV34TCL_POLYGON_MODE_FRONT, 6); - OUT_RING (rs->poly_mode_front); - OUT_RING (rs->poly_mode_back); - OUT_RING (rs->cull_face); - OUT_RING (rs->front_face); - OUT_RING (rs->poly_smooth_en); - OUT_RING (rs->cull_face_en); - - BEGIN_RING(rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - OUT_RING (rs->poly_stipple_en); - - BEGIN_RING(rankine, NV34TCL_POINT_SPRITE, 1); - OUT_RING (rs->point_sprite); -} - -static void -nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - -static void -nv30_translate_stencil(const struct pipe_depth_stencil_alpha_state *cso, - unsigned idx, struct nv30_stencil_push *hw) -{ - hw->enable = cso->stencil[idx].enabled ? 1 : 0; - hw->wmask = cso->stencil[idx].write_mask; - hw->func = nvgl_comparison_op(cso->stencil[idx].func); - hw->ref = cso->stencil[idx].ref_value; - hw->vmask = cso->stencil[idx].value_mask; - hw->fail = nvgl_stencil_op(cso->stencil[idx].fail_op); - hw->zfail = nvgl_stencil_op(cso->stencil[idx].zfail_op); - hw->zpass = nvgl_stencil_op(cso->stencil[idx].zpass_op); -} - -static void * -nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nv30_depth_stencil_alpha_state *hw; - - hw = malloc(sizeof(struct nv30_depth_stencil_alpha_state)); - - hw->depth.func = nvgl_comparison_op(cso->depth.func); - hw->depth.write_enable = cso->depth.writemask ? 1 : 0; - hw->depth.test_enable = cso->depth.enabled ? 1 : 0; - - nv30_translate_stencil(cso, 0, &hw->stencil.front); - nv30_translate_stencil(cso, 1, &hw->stencil.back); - - hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; - hw->alpha.func = nvgl_comparison_op(cso->alpha.func); - hw->alpha.ref = float_to_ubyte(cso->alpha.ref); - - return (void *)hw; -} - -static void -nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_depth_stencil_alpha_state *hw = hwcso; - - BEGIN_RING(rankine, NV34TCL_DEPTH_FUNC, 3); - OUT_RINGp ((uint32_t *)&hw->depth, 3); - BEGIN_RING(rankine, NV34TCL_STENCIL_BACK_ENABLE, 16); - OUT_RINGp ((uint32_t *)&hw->stencil.back, 8); - OUT_RINGp ((uint32_t *)&hw->stencil.front, 8); - BEGIN_RING(rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); - OUT_RINGp ((uint32_t *)&hw->alpha.enabled, 3); -} - -static void -nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - -static void * -nv30_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv30_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nv30_vertex_program)); - vp->pipe = cso; - - return (void *)vp; -} - -static void -nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_vertex_program *vp = hwcso; - - nv30->vertprog.current = vp; - nv30->dirty |= NV30_NEW_VERTPROG; -} - -static void -nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_vertex_program *vp = hwcso; - - nv30_vertprog_destroy(nv30, vp); - free(vp); -} - -static void * -nv30_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv30_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nv30_fragment_program)); - fp->pipe = cso; - - return (void *)fp; -} - -static void -nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_fragment_program *fp = hwcso; - - nv30->fragprog.current = fp; - nv30->dirty |= NV30_NEW_FRAGPROG; -} - -static void -nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_fragment_program *fp = hwcso; - - nv30_fragprog_destroy(nv30, fp); - free(fp); -} - -static void -nv30_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_COLOR, 1); - OUT_RING ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0)); -} - -static void -nv30_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ -} - -static void -nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - if (shader == PIPE_SHADER_VERTEX) { - nv30->vertprog.constant_buf = buf->buffer; - nv30->dirty |= NV30_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nv30->fragprog.constant_buf = buf->buffer; - nv30->dirty |= NV30_NEW_FRAGPROG; - } -} - -static void -nv30_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct pipe_surface *rt[4], *zeta; - uint32_t rt_enable, rt_format, w, h; - int i, colour_format = 0, zeta_format = 0; - - rt_enable = 0; - for (i = 0; i < 4; i++) { - if (!fb->cbufs[i]) - continue; - - if (colour_format) { - assert(w == fb->cbufs[i]->width); - assert(h == fb->cbufs[i]->height); - assert(colour_format == fb->cbufs[i]->format); - } else { - w = fb->cbufs[i]->width; - h = fb->cbufs[i]->height; - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - rt[i] = fb->cbufs[i]; - } - } - - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV34TCL_RT_ENABLE_COLOR2 | - NV34TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV34TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - - zeta_format = fb->zsbuf->format; - zeta = fb->zsbuf; - } - - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { - BEGIN_RING(rankine, NV34TCL_COLOR0_PITCH, 1); - OUT_RING ( (rt[0]->pitch * rt[0]->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); - nv30->rt[0] = rt[0]->buffer; - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - BEGIN_RING(rankine, NV34TCL_COLOR1_PITCH, 2); - OUT_RING (rt[1]->pitch * rt[1]->cpp); - nv30->rt[1] = rt[1]->buffer; - } - - if (zeta_format) - { - nv30->zeta = zeta->buffer; - } - - nv30->rt_enable = rt_enable; - BEGIN_RING(rankine, NV34TCL_RT_ENABLE, 1); - OUT_RING (rt_enable); - BEGIN_RING(rankine, NV34TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - OUT_RING (rt_format); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 2); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); -} - -static void -nv30_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - BEGIN_RING(rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); - OUT_RINGp ((uint32_t *)stipple->stipple, 32); -} - -static void -nv30_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny); -} - -static void -nv30_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - BEGIN_RING(rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); - OUT_RINGf (vpt->translate[0]); - OUT_RINGf (vpt->translate[1]); - OUT_RINGf (vpt->translate[2]); - OUT_RINGf (vpt->translate[3]); - OUT_RINGf (vpt->scale[0]); - OUT_RINGf (vpt->scale[1]); - OUT_RINGf (vpt->scale[2]); - OUT_RINGf (vpt->scale[3]); -} - -static void -nv30_set_vertex_buffer(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_buffer *vb) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->vtxbuf[index] = *vb; - - nv30->dirty |= NV30_NEW_ARRAYS; -} - -static void -nv30_set_vertex_element(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_element *ve) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->vtxelt[index] = *ve; - - nv30->dirty |= NV30_NEW_ARRAYS; -} - -void -nv30_init_state_functions(struct nv30_context *nv30) -{ - nv30->pipe.create_blend_state = nv30_blend_state_create; - nv30->pipe.bind_blend_state = nv30_blend_state_bind; - nv30->pipe.delete_blend_state = nv30_blend_state_delete; - - nv30->pipe.create_sampler_state = nv30_sampler_state_create; - nv30->pipe.bind_sampler_state = nv30_sampler_state_bind; - nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; - nv30->pipe.set_sampler_texture = nv30_set_sampler_texture; - - nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; - nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; - nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete; - - nv30->pipe.create_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_create; - nv30->pipe.bind_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_bind; - nv30->pipe.delete_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_delete; - - nv30->pipe.create_vs_state = nv30_vp_state_create; - nv30->pipe.bind_vs_state = nv30_vp_state_bind; - nv30->pipe.delete_vs_state = nv30_vp_state_delete; - - nv30->pipe.create_fs_state = nv30_fp_state_create; - nv30->pipe.bind_fs_state = nv30_fp_state_bind; - nv30->pipe.delete_fs_state = nv30_fp_state_delete; - - nv30->pipe.set_blend_color = nv30_set_blend_color; - nv30->pipe.set_clip_state = nv30_set_clip_state; - nv30->pipe.set_constant_buffer = nv30_set_constant_buffer; - nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state; - nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple; - nv30->pipe.set_scissor_state = nv30_set_scissor_state; - nv30->pipe.set_viewport_state = nv30_set_viewport_state; - - nv30->pipe.set_vertex_buffer = nv30_set_vertex_buffer; - nv30->pipe.set_vertex_element = nv30_set_vertex_element; -} - diff --git a/src/mesa/pipe/nv30/nv30_state.h b/src/mesa/pipe/nv30/nv30_state.h deleted file mode 100644 index 233600f69ab..00000000000 --- a/src/mesa/pipe/nv30/nv30_state.h +++ /dev/null @@ -1,147 +0,0 @@ -#ifndef __NV30_STATE_H__ -#define __NV30_STATE_H__ - -#include "pipe/p_state.h" - -struct nv30_blend_state { - uint32_t b_enable; - uint32_t b_srcfunc; - uint32_t b_dstfunc; - uint32_t b_eqn; - - uint32_t l_enable; - uint32_t l_op; - - uint32_t c_mask; - - uint32_t d_enable; -}; - -struct nv30_sampler_state { - uint32_t fmt; - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - -struct nv30_rasterizer_state { - uint32_t shade_model; - - uint32_t line_width; - uint32_t line_smooth_en; - uint32_t line_stipple_en; - uint32_t line_stipple; - - uint32_t point_size; - - uint32_t poly_smooth_en; - uint32_t poly_stipple_en; - - uint32_t poly_mode_front; - uint32_t poly_mode_back; - - uint32_t front_face; - uint32_t cull_face; - uint32_t cull_face_en; - - uint32_t point_sprite; -}; - -struct nv30_vertex_program_exec { - uint32_t data[4]; - boolean has_branch_offset; - int const_index; -}; - -struct nv30_vertex_program_data { - int index; /* immediates == -1 */ - float value[4]; -}; - -struct nv30_vertex_program { - const struct pipe_shader_state *pipe; - - boolean translated; - struct nv30_vertex_program_exec *insns; - unsigned nr_insns; - struct nv30_vertex_program_data *consts; - unsigned nr_consts; - - struct nouveau_resource *exec; - unsigned exec_start; - struct nouveau_resource *data; - unsigned data_start; - unsigned data_start_min; - - uint32_t ir; - uint32_t or; -}; - -struct nv30_fragment_program_data { - unsigned offset; - unsigned index; -}; - -struct nv30_fragment_program { - const struct pipe_shader_state *pipe; - - boolean translated; - boolean on_hw; - unsigned samplers; - - uint32_t *insn; - int insn_len; - - struct nv30_fragment_program_data *consts; - unsigned nr_consts; - - struct pipe_buffer *buffer; - - uint32_t fp_control; - uint32_t fp_reg_control; -}; - -struct nv30_stencil_push { - uint32_t enable; - uint32_t wmask; - uint32_t func; - uint32_t ref; - uint32_t vmask; - uint32_t fail; - uint32_t zfail; - uint32_t zpass; -}; - -struct nv30_depth_stencil_alpha_state { - struct { - uint32_t func; - uint32_t write_enable; - uint32_t test_enable; - } depth; - - struct { - struct nv30_stencil_push back; - struct nv30_stencil_push front; - } stencil; - - struct { - uint32_t enabled; - uint32_t func; - uint32_t ref; - } alpha; -}; - -struct nv30_miptree { - struct pipe_texture base; - - struct pipe_buffer *buffer; - uint total_size; - - struct { - uint pitch; - uint *image_offset; - } level[PIPE_MAX_TEXTURE_LEVELS]; -}; - -#endif diff --git a/src/mesa/pipe/nv30/nv30_state_emit.c b/src/mesa/pipe/nv30/nv30_state_emit.c deleted file mode 100644 index 70b98836f0f..00000000000 --- a/src/mesa/pipe/nv30/nv30_state_emit.c +++ /dev/null @@ -1,83 +0,0 @@ -#include "nv30_context.h" -#include "nv30_state.h" - -void -nv30_emit_hw_state(struct nv30_context *nv30) -{ - int i; - - if (nv30->dirty & NV30_NEW_FRAGPROG) { - nv30_fragprog_bind(nv30, nv30->fragprog.current); - /*XXX: clear NV30_NEW_FRAGPROG if no new program uploaded */ - } - - if (nv30->dirty_samplers || (nv30->dirty & NV30_NEW_FRAGPROG)) { - nv30_fragtex_bind(nv30); -/* - BEGIN_RING(rankine, NV34TCL_TX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(rankine, NV34TCL_TX_CACHE_CTL, 1); - OUT_RING (1);*/ - nv30->dirty &= ~NV30_NEW_FRAGPROG; - } - - if (nv30->dirty & NV30_NEW_VERTPROG) { - nv30_vertprog_bind(nv30, nv30->vertprog.current); - nv30->dirty &= ~NV30_NEW_VERTPROG; - } - - nv30->dirty_samplers = 0; - - /* Emit relocs for every referenced buffer. - * This is to ensure the bufmgr has an accurate idea of how - * the buffer is used. This isn't very efficient, but we don't - * seem to take a significant performance hit. Will be improved - * at some point. Vertex arrays are emitted by nv30_vbo.c - */ - - /* Render targets */ - if (nv30->rt_enable & NV34TCL_RT_ENABLE_COLOR0) { - BEGIN_RING(rankine, NV34TCL_DMA_COLOR0, 1); - OUT_RELOCo(nv30->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(rankine, NV34TCL_COLOR0_OFFSET, 1); - OUT_RELOCl(nv30->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - } - - if (nv30->rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - BEGIN_RING(rankine, NV34TCL_DMA_COLOR1, 1); - OUT_RELOCo(nv30->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(rankine, NV34TCL_COLOR1_OFFSET, 1); - OUT_RELOCl(nv30->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - } - - if (nv30->zeta) { - BEGIN_RING(rankine, NV34TCL_DMA_ZETA, 1); - OUT_RELOCo(nv30->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(rankine, NV34TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv30->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - /* XXX allocate LMA */ -/* BEGIN_RING(rankine, NV34TCL_LMA_DEPTH_OFFSET, 1); - OUT_RING(0);*/ - } - - /* Texture images */ - for (i = 0; i < 16; i++) { - if (!(nv30->fp_samplers & (1 << i))) - continue; - BEGIN_RING(rankine, NV34TCL_TX_OFFSET(i), 2); - OUT_RELOCl(nv30->tex[i].buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv30->tex[i].buffer, nv30->tex[i].format, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | - NOUVEAU_BO_OR, NV34TCL_TX_FORMAT_DMA0, - NV34TCL_TX_FORMAT_DMA1); - } - - /* Fragment program */ - BEGIN_RING(rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); - OUT_RELOC (nv30->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); -} - diff --git a/src/mesa/pipe/nv30/nv30_surface.c b/src/mesa/pipe/nv30/nv30_surface.c deleted file mode 100644 index 31745e3d6e8..00000000000 --- a/src/mesa/pipe/nv30/nv30_surface.c +++ /dev/null @@ -1,136 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "nv30_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_winsys.h" -#include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" - -static boolean -nv30_surface_format_supported(struct pipe_context *pipe, - enum pipe_format format, uint type) -{ - switch (type) { - case PIPE_SURFACE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - break; - case PIPE_TEXTURE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: - case PIPE_FORMAT_U_A8_L8: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - } - break; - default: - assert(0); - }; - - return FALSE; -} - -static struct pipe_surface * -nv30_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = pipe->winsys; - struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; - struct pipe_surface *ps; - - ps = ws->surface_alloc(ws); - if (!ps) - return NULL; - pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = nv30mt->level[level].pitch / ps->cpp; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset = nv30mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ps->offset = nv30mt->level[level].image_offset[zslice]; - } else { - ps->offset = nv30mt->level[level].image_offset[0]; - } - - return ps; -} - -static void -nv30_surface_copy(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, struct pipe_surface *src, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_winsys *nvws = nv30->nvws; - - nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, - width, height); -} - -static void -nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_winsys *nvws = nv30->nvws; - - nvws->surface_fill(nvws, dest, destx, desty, width, height, value); -} - -void -nv30_init_surface_functions(struct nv30_context *nv30) -{ - nv30->pipe.is_format_supported = nv30_surface_format_supported; - nv30->pipe.get_tex_surface = nv30_get_tex_surface; - nv30->pipe.surface_copy = nv30_surface_copy; - nv30->pipe.surface_fill = nv30_surface_fill; -} diff --git a/src/mesa/pipe/nv30/nv30_vbo.c b/src/mesa/pipe/nv30/nv30_vbo.c deleted file mode 100644 index 57fb9bc8a57..00000000000 --- a/src/mesa/pipe/nv30/nv30_vbo.c +++ /dev/null @@ -1,425 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" - -#include "nv30_context.h" -#include "nv30_state.h" - -#include "pipe/nouveau/nouveau_channel.h" -#include "pipe/nouveau/nouveau_pushbuf.h" - -static INLINE int -nv30_vbo_ncomp(uint format) -{ - int ncomp = 0; - - if (pf_size_x(format)) ncomp++; - if (pf_size_y(format)) ncomp++; - if (pf_size_z(format)) ncomp++; - if (pf_size_w(format)) ncomp++; - - return ncomp; -} - -static INLINE int -nv30_vbo_type(uint format) -{ - switch (pf_type(format)) { - case PIPE_FORMAT_TYPE_FLOAT: - return NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; - case PIPE_FORMAT_TYPE_UNORM: - return NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE; - default: - NOUVEAU_ERR("Unknown format 0x%08x\n", format); - return NV40TCL_VTXFMT_TYPE_FLOAT; - } -} - -static boolean -nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib, - struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb) -{ - struct pipe_winsys *ws = nv30->pipe.winsys; - int type, ncomp; - void *map; - - type = nv30_vbo_type(ve->src_format); - ncomp = nv30_vbo_ncomp(ve->src_format); - - map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); - map += vb->buffer_offset + ve->src_offset; - - switch (type) { - case NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT: - { - float *v = map; - - BEGIN_RING(rankine, NV34TCL_VERTEX_ATTR_4F_X(attrib), 4); - switch (ncomp) { - case 4: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(v[2]); - OUT_RINGf(v[3]); - break; - case 3: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(v[2]); - OUT_RINGf(1.0); - break; - case 2: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(0.0); - OUT_RINGf(1.0); - break; - case 1: - OUT_RINGf(v[0]); - OUT_RINGf(0.0); - OUT_RINGf(0.0); - OUT_RINGf(1.0); - break; - default: - ws->buffer_unmap(ws, vb->buffer); - return FALSE; - } - } - break; - default: - ws->buffer_unmap(ws, vb->buffer); - return FALSE; - } - - ws->buffer_unmap(ws, vb->buffer); - - return TRUE; -} - -static void -nv30_vbo_arrays_update(struct nv30_context *nv30) -{ - struct nv30_vertex_program *vp = nv30->vertprog.active; - uint32_t inputs, vtxfmt[16]; - int hw, num_hw; - - nv30->vb_enable = 0; - - inputs = vp->ir; - for (hw = 0; hw < 16 && inputs; hw++) { - if (inputs & (1 << hw)) { - num_hw = hw; - inputs &= ~(1 << hw); - } - } - num_hw++; - - inputs = vp->ir; - for (hw = 0; hw < num_hw; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - - if (!(inputs & (1 << hw))) { - vtxfmt[hw] = NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; - continue; - } - - ve = &nv30->vtxelt[hw]; - vb = &nv30->vtxbuf[ve->vertex_buffer_index]; - - if (vb->pitch == 0) { - vtxfmt[hw] = NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; - if (nv30_vbo_static_attrib(nv30, hw, ve, vb) == TRUE) - continue; - } - - nv30->vb_enable |= (1 << hw); - nv30->vb[hw].delta = vb->buffer_offset + ve->src_offset; - nv30->vb[hw].buffer = vb->buffer; - - vtxfmt[hw] = ((vb->pitch << NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT) | - (nv30_vbo_ncomp(ve->src_format) << - NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT) | - nv30_vbo_type(ve->src_format)); - } - - BEGIN_RING(rankine, NV34TCL_VERTEX_ARRAY_FORMAT(0), num_hw); - OUT_RINGp (vtxfmt, num_hw); -} - -static boolean -nv30_vbo_validate_state(struct nv30_context *nv30, - struct pipe_buffer *ib, unsigned ib_format) -{ - unsigned inputs; - - nv30_emit_hw_state(nv30); - - if (nv30->dirty & NV30_NEW_ARRAYS) { - nv30_vbo_arrays_update(nv30); - nv30->dirty &= ~NV30_NEW_ARRAYS; - } - - inputs = nv30->vb_enable; - while (inputs) { - unsigned a = ffs(inputs) - 1; - - inputs &= ~(1 << a); - - BEGIN_RING(rankine, NV34TCL_VERTEX_BUFFER_ADDRESS(a), 1); - OUT_RELOC (nv30->vb[a].buffer, nv30->vb[a].delta, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0, - NV34TCL_VERTEX_BUFFER_ADDRESS_DMA1); - } - - if (ib) { - BEGIN_RING(rankine, NV40TCL_IDXBUF_ADDRESS, 2); - OUT_RELOCl(ib, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD); - OUT_RELOCd(ib, ib_format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD | NOUVEAU_BO_OR, - 0, NV40TCL_IDXBUF_FORMAT_DMA1); - } - - BEGIN_RING(rankine, 0x1710, 1); - OUT_RING (0); /* vtx cache flush */ - - return TRUE; -} - -boolean -nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, - unsigned count) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned nr; - boolean ret; - - ret = nv30_vbo_validate_state(nv30, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } - - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - - nr = (count & 0xff); - if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); - start += nr; - } - - nr = count >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); - while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); - - pipe->flush(pipe, 0); - return TRUE; -} - -static INLINE void -nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, - unsigned start, unsigned count) -{ - uint8_t *elts = (uint8_t *)ib + start; - int push, i; - - if (count & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); - elts++; count--; - } - - while (count) { - push = MIN2(count, 2047 * 2); - - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); - - count -= push; - elts += push; - } -} - -static INLINE void -nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, - unsigned start, unsigned count) -{ - uint16_t *elts = (uint16_t *)ib + start; - int push, i; - - if (count & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); - elts++; count--; - } - - while (count) { - push = MIN2(count, 2047 * 2); - - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); - - count -= push; - elts += push; - } -} - -static INLINE void -nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, - unsigned start, unsigned count) -{ - uint32_t *elts = (uint32_t *)ib + start; - int push; - - while (count) { - push = MIN2(count, 2047); - - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); - - count -= push; - elts += push; - } -} - -static boolean -nv30_draw_elements_inline(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct pipe_winsys *ws = pipe->winsys; - boolean ret; - void *map; - - ret = nv30_vbo_validate_state(nv30, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } - - map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return FALSE; - } - - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - - switch (ib_size) { - case 1: - nv30_draw_elements_u08(nv30, map, start, count); - break; - case 2: - nv30_draw_elements_u16(nv30, map, start, count); - break; - case 4: - nv30_draw_elements_u32(nv30, map, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } - - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); - - ws->buffer_unmap(ws, ib); - - return TRUE; -} - -static boolean -nv30_draw_elements_vbo(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned nr, type; - boolean ret; - - switch (ib_size) { - case 2: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - return FALSE; - } - - ret = nv30_vbo_validate_state(nv30, ib, type); - if (!ret) { - NOUVEAU_ERR("failed state validation\n"); - return FALSE; - } - - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - - nr = (count & 0xff); - if (nr) { - BEGIN_RING(rankine, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); - start += nr; - } - - nr = count >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(rankine, NV40TCL_VB_INDEX_BATCH, push); - while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); - - return TRUE; -} - -boolean -nv30_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ -/* if (indexSize != 1) { - nv30_draw_elements_vbo(pipe, indexBuffer, indexSize, - mode, start, count); - } else */{ - nv30_draw_elements_inline(pipe, indexBuffer, indexSize, - mode, start, count); - } - - pipe->flush(pipe, 0); - return TRUE; -} - - diff --git a/src/mesa/pipe/nv30/nv30_vertprog.c b/src/mesa/pipe/nv30/nv30_vertprog.c deleted file mode 100644 index c96210d3fa7..00000000000 --- a/src/mesa/pipe/nv30/nv30_vertprog.c +++ /dev/null @@ -1,777 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" - -#include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" - -#include "nv30_context.h" -#include "nv30_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 2. Arb. swz/negation - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv30_shader.h" - -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) - -struct nv30_vpc { - struct nv30_vertex_program *vp; - - struct nv30_vertex_program_exec *vpi; - - unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; - - int high_temp; - int temp_temp_count; - - struct nv30_sreg *imm; - unsigned nr_imm; -}; - -static struct nv30_sreg -temp(struct nv30_vpc *vpc) -{ - int idx; - - idx = vpc->temp_temp_count++; - idx += vpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); -} - -static struct nv30_sreg -constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nv30_vertex_program *vp = vpc->vp; - struct nv30_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nv30_sr(NV30SR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nv30_sr(NV30SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) -{ - struct nv30_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NV30SR_TEMP: - sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); - break; - case NV30SR_INPUT: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); - break; - case NV30SR_CONST: - sr |= (NV30_VP_SRC_REG_TYPE_CONST << - NV30_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NV30SR_NONE: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); - -/* - * |VVV| - * d�.�b - * \u/ - * - */ - - switch (pos) { - case 0: - hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> - NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << - NV30_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> - NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << - NV30_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) -{ - struct nv30_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NV30SR_TEMP: - hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); - break; - case NV30SR_OUTPUT: - switch (dst.index) { - case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - default: - break; - } - - hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); - hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - break; - default: - assert(0); - } -} - -static void -nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, - struct nv30_sreg s2) -{ - struct nv30_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); - - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); -// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; -// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nv30_sreg -tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv30_sreg src; - - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; - break; - case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; - return src; -} - -static INLINE struct nv30_sreg -tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv30_sreg dst; - - switch (fdst->DstRegister.File) { - case TGSI_FILE_OUTPUT: - dst = nv30_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); - - break; - case TGSI_FILE_TEMPORARY: - dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index); - if (vpc->high_temp < dst.index) - vpc->high_temp = dst.index; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nv30_sreg src[3], dst, tmp; - struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - int mask; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - vpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->FullSrcRegisters[i]; - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - /*XXX: index comparison is broken now that consts come from - * two different register files. - */ - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_VP_INST_DEST_POS; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { - hw = NV30_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.SemanticIndex == 1) { - hw = NV30_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { - hw = NV30_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.SemanticIndex == 1) { - hw = NV30_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV30_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->output_map[fdec->u.DeclarationRange.First] = hw; - return TRUE; -} - -static boolean -nv30_vertprog_prepare(struct nv30_vpc *vpc) -{ - struct tgsi_parse_context p; - int nr_imm = 0; - - tgsi_parse_init(&p, vpc->vp->pipe->tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); - assert(vpc->imm); - } - - return TRUE; -} - -void -nv30_vertprog_translate(struct nv30_context *nv30, - struct nv30_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv30_vpc *vpc = NULL; - - vpc = CALLOC(1, sizeof(struct nv30_vpc)); - if (!vpc) - return; - vpc->vp = vp; - vpc->high_temp = -1; - - if (!nv30_vertprog_prepare(vpc)) { - free(vpc); - return; - } - - tgsi_parse_init(&parse, vp->pipe->tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv30_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); -// assert(imm->Immediate.Size == 4); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u.ImmediateFloat32[0].Float, - imm->u.ImmediateFloat32[1].Float, - imm->u.ImmediateFloat32[2].Float, - imm->u.ImmediateFloat32[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv30_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - free(vpc); -} - -void -nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) -{ - struct nouveau_winsys *nvws = nv30->nvws; - struct pipe_winsys *ws = nv30->pipe.winsys; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) { - nv30_vertprog_translate(nv30, vp); - if (!vp->translated) - assert(0); - } - - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nv30->vertprog.exec_heap; - uint vplen = vp->nr_insns; - - if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nv30_vertex_program *evict; - - evict = heap->next->priv; - nvws->res_free(&evict->exec); - } - - if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv30->vertprog.data_heap; - - if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nv30_vertex_program *evict; - - evict = heap->next->priv; - nvws->res_free(&evict->data); - } - - if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv30_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv30_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV30_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (nv30->vertprog.constant_buf) { - map = ws->buffer_map(ws, nv30->vertprog.constant_buf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nv30_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); - } - - if (map) { - ws->buffer_unmap(ws, nv30->vertprog.constant_buf); - } - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); - } -#endif - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (/*vp->exec->start*/0); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); - } - } - - BEGIN_RING(rankine, NV34TCL_VP_START_FROM_ID, 1); -// OUT_RING (vp->exec->start); - OUT_RING (0); - - nv30->vertprog.active = vp; -} - -void -nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) -{ - if (vp->nr_consts) - free(vp->consts); - if (vp->nr_insns) - free(vp->insns); -} - diff --git a/src/mesa/pipe/nv40/Makefile b/src/mesa/pipe/nv40/Makefile deleted file mode 100644 index c9c3a8032e5..00000000000 --- a/src/mesa/pipe/nv40/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv40 - -DRIVER_SOURCES = \ - nv40_clear.c \ - nv40_context.c \ - nv40_draw.c \ - nv40_fragprog.c \ - nv40_fragtex.c \ - nv40_miptree.c \ - nv40_query.c \ - nv40_state.c \ - nv40_state_emit.c \ - nv40_surface.c \ - nv40_vbo.c \ - nv40_vertprog.c - -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -include ../Makefile.template - -symlinks: - diff --git a/src/mesa/pipe/nv40/nv40_clear.c b/src/mesa/pipe/nv40/nv40_clear.c deleted file mode 100644 index 2c4e8f01fda..00000000000 --- a/src/mesa/pipe/nv40/nv40_clear.c +++ /dev/null @@ -1,12 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "nv40_context.h" - -void -nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue) -{ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); -} diff --git a/src/mesa/pipe/nv40/nv40_context.c b/src/mesa/pipe/nv40/nv40_context.c deleted file mode 100644 index 6e86ca00816..00000000000 --- a/src/mesa/pipe/nv40/nv40_context.c +++ /dev/null @@ -1,312 +0,0 @@ -#include "pipe/draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_winsys.h" -#include "pipe/p_util.h" - -#include "nv40_context.h" - -#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf -#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 -#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 - -static const char * -nv40_get_name(struct pipe_context *pipe) -{ - struct nv40_context *nv40 = nv40_context(pipe); - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv40->chipset); - return buffer; -} - -static const char * -nv40_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv40_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 4; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv40_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static void -nv40_flush(struct pipe_context *pipe, unsigned flags) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_winsys *nvws = nv40->nvws; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (1); - } - - if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv40->hw->sync, 0); - BEGIN_RING(curie, 0x104, 1); - OUT_RING (0); - BEGIN_RING(curie, 0x100, 1); - OUT_RING (0); - } - - FIRE_RING(); - - if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv40->hw->sync, 0, 0, 2000); -} - -static void -nv40_channel_takedown(struct nv40_channel_context *cnv40) -{ - struct nouveau_winsys *nvws = cnv40->nvws; - - nvws->res_free(&cnv40->vp_exec_heap); - nvws->res_free(&cnv40->vp_data_heap); - nvws->res_free(&cnv40->query_heap); - nvws->notifier_free(&cnv40->query); - nvws->notifier_free(&cnv40->sync); - nvws->grobj_free(&cnv40->curie); - free(cnv40); -} - -static struct nv40_channel_context * -nv40_channel_init(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) -{ - struct nv40_channel_context *cnv40 = NULL; - struct nouveau_stateobj *so; - unsigned curie_class = 0; - int ret; - - switch (chipset & 0xf0) { - case 0x40: - if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV40TCL; - else - if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV44TCL; - break; - case 0x60: - if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV44TCL; - break; - default: - break; - } - - if (!curie_class) { - NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); - return NULL; - } - - cnv40 = CALLOC(1, sizeof(struct nv40_channel_context)); - if (!cnv40) - return NULL; - cnv40->chipset = chipset; - cnv40->nvws = nvws; - - /* Notifier for sync purposes */ - ret = nvws->notifier_alloc(nvws, 1, &cnv40->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - /* Query objects */ - ret = nvws->notifier_alloc(nvws, 32, &cnv40->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - ret = nvws->res_init(&cnv40->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - /* Vtxprog resources */ - if (nvws->res_init(&cnv40->vp_exec_heap, 0, 512) || - nvws->res_init(&cnv40->vp_data_heap, 0, 256)) { - nv40_channel_takedown(cnv40); - return NULL; - } - - /* 3D object */ - ret = nvws->grobj_alloc(nvws, curie_class, &cnv40->curie); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* Static curie initialisation */ - so = so_new(128, 0); - so_method(so, cnv40->curie, NV40TCL_DMA_NOTIFY, 1); - so_data (so, cnv40->sync->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_TEXTURE0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->gart->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR1, 1); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_VTXBUF0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->gart->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, cnv40->query->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_UNK01AC, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR2, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - - so_method(so, cnv40->curie, 0x1ea4, 3); - so_data (so, 0x00000010); - so_data (so, 0x01000100); - so_data (so, 0xff800006); - - /* vtxprog output routing */ - so_method(so, cnv40->curie, 0x1fc4, 1); - so_data (so, 0x06144321); - so_method(so, cnv40->curie, 0x1fc8, 2); - so_data (so, 0xedcba987); - so_data (so, 0x00000021); - so_method(so, cnv40->curie, 0x1fd0, 1); - so_data (so, 0x00171615); - so_method(so, cnv40->curie, 0x1fd4, 1); - so_data (so, 0x001b1a19); - - so_method(so, cnv40->curie, 0x1ef8, 1); - so_data (so, 0x0020ffff); - so_method(so, cnv40->curie, 0x1d64, 1); - so_data (so, 0x00d30000); - so_method(so, cnv40->curie, 0x1e94, 1); - so_data (so, 0x00000001); - - so_emit(nvws, so); - so_ref(NULL, &so); - nvws->push_flush(nvws->channel, 0); - - return cnv40; -} - -static void -nv40_destroy(struct pipe_context *pipe) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - if (nv40->draw) - draw_destroy(nv40->draw); - - if (nv40->hw) { - if (--nv40->hw->refcount == 0) - nv40_channel_takedown(nv40->hw); - } - - free(nv40); -} - -struct pipe_context * -nv40_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) -{ - struct nv40_context *nv40; - - nv40 = CALLOC(1, sizeof(struct nv40_context)); - if (!nv40) - return NULL; - - nv40->hw = nv40_channel_init(ws, nvws, chipset); - if (!nv40->hw) { - nv40_destroy(&nv40->pipe); - return NULL; - } - - nv40->chipset = chipset; - nv40->nvws = nvws; - - nv40->pipe.winsys = ws; - nv40->pipe.destroy = nv40_destroy; - nv40->pipe.get_name = nv40_get_name; - nv40->pipe.get_vendor = nv40_get_vendor; - nv40->pipe.get_param = nv40_get_param; - nv40->pipe.get_paramf = nv40_get_paramf; - nv40->pipe.draw_arrays = nv40_draw_arrays; - nv40->pipe.draw_elements = nv40_draw_elements; - nv40->pipe.clear = nv40_clear; - nv40->pipe.flush = nv40_flush; - - nv40_init_query_functions(nv40); - nv40_init_surface_functions(nv40); - nv40_init_state_functions(nv40); - nv40_init_miptree_functions(nv40); - - nv40->draw = draw_create(); - assert(nv40->draw); - draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); - - return &nv40->pipe; -} - diff --git a/src/mesa/pipe/nv40/nv40_context.h b/src/mesa/pipe/nv40/nv40_context.h deleted file mode 100644 index cf2a14405a4..00000000000 --- a/src/mesa/pipe/nv40/nv40_context.h +++ /dev/null @@ -1,153 +0,0 @@ -#ifndef __NV40_CONTEXT_H__ -#define __NV40_CONTEXT_H__ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "pipe/draw/draw_vertex.h" - -#include "pipe/nouveau/nouveau_winsys.h" -#include "pipe/nouveau/nouveau_gldefs.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv40_channel_context *ctx = nv40->hw -#include "pipe/nouveau/nouveau_push.h" -#include "pipe/nouveau/nouveau_stateobj.h" - -#include "nv40_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -#define NV40_NEW_BLEND (1 << 0) -#define NV40_NEW_RAST (1 << 1) -#define NV40_NEW_ZSA (1 << 2) -#define NV40_NEW_SAMPLER (1 << 3) -#define NV40_NEW_FB (1 << 4) -#define NV40_NEW_STIPPLE (1 << 5) -#define NV40_NEW_SCISSOR (1 << 6) -#define NV40_NEW_VIEWPORT (1 << 7) -#define NV40_NEW_BCOL (1 << 8) -#define NV40_NEW_VERTPROG (1 << 9) -#define NV40_NEW_FRAGPROG (1 << 10) -#define NV40_NEW_ARRAYS (1 << 11) - -struct nv40_channel_context { - struct nouveau_winsys *nvws; - unsigned refcount; - - unsigned chipset; - - /* HW graphics objects */ - struct nouveau_grobj *curie; - struct nouveau_notifier *sync; - - /* Query object resources */ - struct nouveau_notifier *query; - struct nouveau_resource *query_heap; - - /* Vtxprog resources */ - struct nouveau_resource *vp_exec_heap; - struct nouveau_resource *vp_data_heap; -}; - -struct nv40_context { - struct pipe_context pipe; - struct nouveau_winsys *nvws; - - struct nv40_channel_context *hw; - struct draw_context *draw; - - int chipset; - - uint32_t dirty; - - struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned dirty_samplers; - unsigned fp_samplers; - unsigned vp_samplers; - - struct nouveau_stateobj *so_framebuffer; - struct nouveau_stateobj *so_fragtex[16]; - struct nouveau_stateobj *so_vtxbuf; - struct nouveau_stateobj *so_blend; - struct nouveau_stateobj *so_rast; - struct nouveau_stateobj *so_zsa; - struct nouveau_stateobj *so_bcol; - struct nouveau_stateobj *so_scissor; - struct nouveau_stateobj *so_viewport; - struct nouveau_stateobj *so_stipple; - - struct { - struct nv40_vertex_program *active; - - struct nv40_vertex_program *current; - struct pipe_buffer *constant_buf; - } vertprog; - - struct { - struct nv40_fragment_program *active; - - struct nv40_fragment_program *current; - struct pipe_buffer *constant_buf; - } fragprog; - - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; -}; - -static INLINE struct nv40_context * -nv40_context(struct pipe_context *pipe) -{ - return (struct nv40_context *)pipe; -} - -extern void nv40_init_state_functions(struct nv40_context *nv40); -extern void nv40_init_surface_functions(struct nv40_context *nv40); -extern void nv40_init_miptree_functions(struct nv40_context *nv40); -extern void nv40_init_query_functions(struct nv40_context *nv40); - -/* nv40_draw.c */ -extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); - -/* nv40_vertprog.c */ -extern void nv40_vertprog_translate(struct nv40_context *, - struct nv40_vertex_program *); -extern void nv40_vertprog_bind(struct nv40_context *, - struct nv40_vertex_program *); -extern void nv40_vertprog_destroy(struct nv40_context *, - struct nv40_vertex_program *); - -/* nv40_fragprog.c */ -extern void nv40_fragprog_translate(struct nv40_context *, - struct nv40_fragment_program *); -extern void nv40_fragprog_bind(struct nv40_context *, - struct nv40_fragment_program *); -extern void nv40_fragprog_destroy(struct nv40_context *, - struct nv40_fragment_program *); - -/* nv40_fragtex.c */ -extern void nv40_fragtex_bind(struct nv40_context *); - -/* nv40_state.c and friends */ -extern void nv40_emit_hw_state(struct nv40_context *nv40); -extern void nv40_state_tex_update(struct nv40_context *nv40); - -/* nv40_vbo.c */ -extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern boolean nv40_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); - -/* nv40_clear.c */ -extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue); - -#endif diff --git a/src/mesa/pipe/nv40/nv40_dma.h b/src/mesa/pipe/nv40/nv40_dma.h deleted file mode 100644 index 1fb82677689..00000000000 --- a/src/mesa/pipe/nv40/nv40_dma.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef __NV40_DMA_H__ -#define __NV40_DMA_H__ - -#include "pipe/nouveau/nouveau_winsys.h" - -#define OUT_RING(data) do { \ - (*nv40->nvws->channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - memcpy(nv40->nvws->channel->pushbuf->cur, (src), (size) * 4); \ - nv40->nvws->channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - if (nv40->nvws->channel->pushbuf->remaining < ((size) + 1)) \ - nv40->nvws->push_flush(nv40->nvws->channel, ((size) + 1)); \ - OUT_RING((nv40->obj->subc << 13) | ((size) << 18) | (mthd)); \ - nv40->nvws->channel->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING_NI(obj,mthd,size) do { \ - BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ -} while(0) - -#define FIRE_RING() do { \ - nv40->nvws->push_flush(nv40->nvws->channel, 0); \ -} while(0) - -#define OUT_RELOC(bo,data,flags,vor,tor) do { \ - nv40->nvws->push_reloc(nv40->nvws->channel, \ - nv40->nvws->channel->pushbuf->cur, \ - (struct nouveau_bo *)(bo), \ - (data), (flags), (vor), (tor)); \ - OUT_RING(0); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - nv40->nvws->channel->vram->handle, \ - nv40->nvws->channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -#endif diff --git a/src/mesa/pipe/nv40/nv40_draw.c b/src/mesa/pipe/nv40/nv40_draw.c deleted file mode 100644 index d361d5f07d9..00000000000 --- a/src/mesa/pipe/nv40/nv40_draw.c +++ /dev/null @@ -1,62 +0,0 @@ -#include "pipe/draw/draw_private.h" -#include "pipe/p_util.h" - -#include "nv40_context.h" - -struct nv40_draw_stage { - struct draw_stage draw; - struct nv40_context *nv40; -}; - -static void -nv40_draw_point(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv40_draw_line(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv40_draw_tri(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv40_draw_flush(struct draw_stage *draw, unsigned flags) -{ -} - -static void -nv40_draw_reset_stipple_counter(struct draw_stage *draw) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv40_draw_destroy(struct draw_stage *draw) -{ - free(draw); -} - -struct draw_stage * -nv40_draw_render_stage(struct nv40_context *nv40) -{ - struct nv40_draw_stage *nv40draw = CALLOC_STRUCT(nv40_draw_stage); - - nv40draw->nv40 = nv40; - nv40draw->draw.draw = nv40->draw; - nv40draw->draw.point = nv40_draw_point; - nv40draw->draw.line = nv40_draw_line; - nv40draw->draw.tri = nv40_draw_tri; - nv40draw->draw.flush = nv40_draw_flush; - nv40draw->draw.reset_stipple_counter = nv40_draw_reset_stipple_counter; - nv40draw->draw.destroy = nv40_draw_destroy; - - return &nv40draw->draw; -} - diff --git a/src/mesa/pipe/nv40/nv40_fragprog.c b/src/mesa/pipe/nv40/nv40_fragprog.c deleted file mode 100644 index 7487fb896f3..00000000000 --- a/src/mesa/pipe/nv40/nv40_fragprog.c +++ /dev/null @@ -1,842 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" - -#include "nv40_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV40_FP_OP_COND_TR -#include "nv40_shader.h" - -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) -#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) - -#define MAX_CONSTS 128 -#define MAX_IMM 32 -struct nv40_fpc { - struct nv40_fragment_program *fp; - - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - - int high_temp; - int temp_temp_count; - int num_regs; - - uint depth_id; - uint colour_id; - - unsigned inst_offset; - - struct { - int pipe; - float vals[4]; - } consts[MAX_CONSTS]; - int nr_consts; - - struct nv40_sreg imm[MAX_IMM]; - unsigned nr_imm; -}; - -static INLINE struct nv40_sreg -temp(struct nv40_fpc *fpc) -{ - int idx; - - idx = fpc->temp_temp_count++; - idx += fpc->high_temp + 1; - return nv40_sr(NV40SR_TEMP, idx); -} - -static INLINE struct nv40_sreg -constant(struct nv40_fpc *fpc, int pipe, float vals[4]) -{ - int idx; - - if (fpc->nr_consts == MAX_CONSTS) - assert(0); - idx = fpc->nr_consts++; - - fpc->consts[idx].pipe = pipe; - if (pipe == -1) - memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv40_sr(NV40SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ - (d), (m), (s0), (s1), (s2)) -#define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ - (d), (m), (s0), none, none) - -static void -grow_insns(struct nv40_fpc *fpc, int size) -{ - struct nv40_fragment_program *fp = fpc->fp; - - fp->insn_len += size; - fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); -} - -static void -emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) -{ - struct nv40_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - uint32_t sr = 0; - - switch (src.type) { - case NV40SR_INPUT: - sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); - break; - case NV40SR_OUTPUT: - sr |= NV40_FP_REG_SRC_HALF; - /* fall-through */ - case NV40SR_TEMP: - sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV40_FP_REG_SRC_SHIFT); - break; - case NV40SR_CONST: - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - if (fpc->consts[src.index].pipe >= 0) { - struct nv40_fragment_program_data *fpd; - - fp->consts = realloc(fp->consts, ++fp->nr_consts * - sizeof(*fpd)); - fpd = &fp->consts[fp->nr_consts - 1]; - fpd->offset = fpc->inst_offset + 4; - fpd->index = fpc->consts[src.index].pipe; - memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); - } else { - memcpy(&fp->insn[fpc->inst_offset + 4], - fpc->consts[src.index].vals, - sizeof(uint32_t) * 4); - } - - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); - break; - case NV40SR_NONE: - sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV40_FP_REG_NEGATE; - - if (src.abs) - hw[1] |= (1 << (29 + pos)); - - sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); - - hw[pos + 1] |= sr; -} - -static void -emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) -{ - struct nv40_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - - switch (dst.type) { - case NV40SR_TEMP: - if (fpc->num_regs < (dst.index + 1)) - fpc->num_regs = dst.index + 1; - break; - case NV40SR_OUTPUT: - if (dst.index == 1) { - fp->fp_control |= 0xe; - } else { - hw[0] |= NV40_FP_OP_OUT_REG_HALF; - } - break; - case NV40SR_NONE: - hw[0] |= (1 << 30); - break; - default: - assert(0); - } - - hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); -} - -static void -nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) -{ - struct nv40_fragment_program *fp = fpc->fp; - uint32_t *hw; - - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - memset(hw, 0, sizeof(uint32_t) * 4); - - if (op == NV40_FP_OP_OPCODE_KIL) - fp->fp_control |= NV40TCL_FP_CONTROL_KIL; - hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); - - if (sat) - hw[0] |= NV40_FP_OP_OUT_SAT; - - if (dst.cc_update) - hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); - - emit_dst(fpc, dst); - emit_src(fpc, 0, s0); - emit_src(fpc, 1, s1); - emit_src(fpc, 2, s2); -} - -static void -nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) -{ - struct nv40_fragment_program *fp = fpc->fp; - - nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - - fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); - fp->samplers |= (1 << unit); -} - -static INLINE struct nv40_sreg -tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) -{ - struct nv40_sreg src; - - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); - break; - case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); - break; - case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; - break; - case TGSI_FILE_TEMPORARY: - src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index + 1); - if (fpc->high_temp < src.index) - fpc->high_temp = src.index; - break; - /* This is clearly insane, but gallium hands us shaders like this. - * Luckily fragprog results are just temp regs.. - */ - case TGSI_FILE_OUTPUT: - if (fsrc->SrcRegister.Index == fpc->colour_id) - return nv40_sr(NV40SR_OUTPUT, 0); - else - return nv40_sr(NV40SR_OUTPUT, 1); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; - return src; -} - -static INLINE struct nv40_sreg -tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - int idx; - - switch (fdst->DstRegister.File) { - case TGSI_FILE_OUTPUT: - if (fdst->DstRegister.Index == fpc->colour_id) - return nv40_sr(NV40SR_OUTPUT, 0); - else - return nv40_sr(NV40SR_OUTPUT, 1); - break; - case TGSI_FILE_TEMPORARY: - idx = fdst->DstRegister.Index + 1; - if (fpc->high_temp < idx) - fpc->high_temp = idx; - return nv40_sr(NV40SR_TEMP, idx); - case TGSI_FILE_NULL: - return nv40_sr(NV40SR_NONE, 0); - default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); - return nv40_sr(NV40SR_NONE, 0); - } -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv40_sreg *src) -{ - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; - uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, - fsrc->SrcRegisterExtSwz.NegateY, - fsrc->SrcRegisterExtSwz.NegateZ, - fsrc->SrcRegisterExtSwz.NegateW }; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - mask |= (1 << c); - break; - case TGSI_EXTSWIZZLE_ZERO: - zero_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; - case TGSI_EXTSWIZZLE_ONE: - one_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; - default: - assert(0); - } - - if (!tgsi.negate && neg[c]) - neg_mask |= (1 << c); - } - - if (mask == MASK_ALL && !neg_mask) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - if (zero_mask) - arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); - - if (one_mask) - arith(fpc, 0, STR, *src, one_mask, *src, none, none); - - if (neg_mask) { - struct nv40_sreg one = temp(fpc); - arith(fpc, 0, STR, one, neg_mask, one, none, none); - arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); - } - - return FALSE; -} - -static boolean -nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, - const struct tgsi_full_instruction *finst) -{ - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg src[3], dst, tmp; - int mask, sat, unit; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - fpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(fpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->FullSrcRegisters[i]; - - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - NOUVEAU_MSG("extra src attr %d\n", - fsrc->SrcRegister.Index); - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; - break; - case TGSI_FILE_OUTPUT: - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); - sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); - tmp.cc_update = 1; - arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV40_VP_INST_COND_LT; - arith(fpc, sat, MOV, dst, mask, src[1], none, none); - break; - case TGSI_OPCODE_COS: - arith(fpc, sat, COS, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); - arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), - swz(src[1], W, W, W, W), none); - break; - case TGSI_OPCODE_DST: - arith(fpc, sat, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(fpc, sat, EX2, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FLR: - arith(fpc, sat, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(fpc, sat, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_KIL: - arith(fpc, 0, KIL, none, 0, none, none, none); - break; - case TGSI_OPCODE_KILP: - dst = nv40_sr(NV40SR_NONE, 0); - dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; - arith(fpc, 0, KIL, dst, 0, none, none, none); - break; - case TGSI_OPCODE_LG2: - arith(fpc, sat, LG2, dst, mask, src[0], none, none); - break; -// case TGSI_OPCODE_LIT: - case TGSI_OPCODE_LRP: - tmp = temp(fpc); - arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); - arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); - break; - case TGSI_OPCODE_MAD: - arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(fpc, sat, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(fpc); - arith(fpc, 0, LG2, tmp, MASK_X, - swz(src[0], X, X, X, X), none, none); - arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(fpc, sat, EX2, dst, mask, - swz(tmp, X, X, X, X), none, none); - break; - case TGSI_OPCODE_RCP: - arith(fpc, sat, RCP, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_RET: - assert(0); - break; - case TGSI_OPCODE_RFL: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); - arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); - arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, - swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); - arith(fpc, sat, MAD, dst, mask, - swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); - break; - case TGSI_OPCODE_RSQ: - tmp = temp(fpc); - arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, - abs(swz(src[0], X, X, X, X)), none, none); - arith(fpc, sat, EX2, dst, mask, - neg(swz(tmp, X, X, X, X)), none, none); - break; - case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - break; - case TGSI_OPCODE_SIN: - arith(fpc, sat, SIN, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_SGE: - arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); - break; - case TGSI_OPCODE_TEX: - if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide == - TGSI_EXTSWIZZLE_W) { - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); - } else - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXB: - tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_XPD: - tmp = temp(fpc); - arith(fpc, 0, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - hw = NV40_FP_OP_INPUT_SRC_POSITION; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { - hw = NV40_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.SemanticIndex == 1) { - hw = NV40_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV40_FP_OP_INPUT_SRC_FOGC; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad input semantic\n"); - return FALSE; - } - - fpc->attrib_map[fdec->u.DeclarationRange.First] = hw; - return TRUE; -} - -static boolean -nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - switch (fdec->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->u.DeclarationRange.First; - break; - case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->u.DeclarationRange.First; - break; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - return TRUE; -} - -void -nv40_fragprog_translate(struct nv40_context *nv40, - struct nv40_fragment_program *fp) -{ - struct tgsi_parse_context parse; - struct nv40_fpc *fpc = NULL; - - fpc = CALLOC(1, sizeof(struct nv40_fpc)); - if (!fpc) - return; - fpc->fp = fp; - fpc->high_temp = -1; - fpc->num_regs = 2; - - tgsi_parse_init(&parse, fp->pipe->tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) - goto out_err; - break; - case TGSI_FILE_OUTPUT: - if (!nv40_fragprog_parse_decl_output(fpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *imm; - float vals[4]; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(fpc->nr_imm < MAX_IMM); - - vals[0] = imm->u.ImmediateFloat32[0].Float; - vals[1] = imm->u.ImmediateFloat32[1].Float; - vals[2] = imm->u.ImmediateFloat32[2].Float; - vals[3] = imm->u.ImmediateFloat32[3].Float; - fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - - finst = &parse.FullToken.FullInstruction; - if (!nv40_fragprog_parse_instruction(fpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; - - /* Terminate final instruction */ - fp->insn[fpc->inst_offset] |= 0x00000001; - - /* Append NOP + END instruction, may or may not be necessary. */ - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - fp->insn[fpc->inst_offset + 0] = 0x00000001; - fp->insn[fpc->inst_offset + 1] = 0x00000000; - fp->insn[fpc->inst_offset + 2] = 0x00000000; - fp->insn[fpc->inst_offset + 3] = 0x00000000; - - fp->translated = TRUE; - fp->on_hw = FALSE; -out_err: - tgsi_parse_free(&parse); - free(fpc); -} - -void -nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp) -{ - struct pipe_winsys *ws = nv40->pipe.winsys; - struct nouveau_stateobj *so; - int i; - - if (!fp->translated) { - nv40_fragprog_translate(nv40, fp); - if (!fp->translated) - assert(0); - } - - if (fp->nr_consts) { - float *map = ws->buffer_map(ws, nv40->fragprog.constant_buf, - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < fp->nr_consts; i++) { - struct nv40_fragment_program_data *fpd = &fp->consts[i]; - uint32_t *p = &fp->insn[fpd->offset]; - uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; - - if (!memcmp(p, cb, 4 * sizeof(float))) - continue; - memcpy(p, cb, 4 * sizeof(float)); - fp->on_hw = 0; - } - ws->buffer_unmap(ws, nv40->fragprog.constant_buf); - } - - if (!fp->on_hw) { - const uint32_t le = 1; - uint32_t *map; - - if (!fp->buffer) - fp->buffer = ws->buffer_create(ws, 0x100, 0, - fp->insn_len * 4); - map = ws->buffer_map(ws, fp->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - } -#endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - ws->buffer_unmap(ws, fp->buffer); - fp->on_hw = TRUE; - } - - so = so_new(4, 1); - so_method(so, nv40->hw->curie, NV40TCL_FP_ADDRESS, 1); - so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); - so_method(so, nv40->hw->curie, NV40TCL_FP_CONTROL, 1); - so_data (so, fp->fp_control); - - so_emit(nv40->nvws, so); - so_ref(so, &fp->so); - so_ref(NULL, &so); - - nv40->fragprog.active = fp; -} - -void -nv40_fragprog_destroy(struct nv40_context *nv40, - struct nv40_fragment_program *fp) -{ - if (fp->insn_len) - free(fp->insn); -} - diff --git a/src/mesa/pipe/nv40/nv40_fragtex.c b/src/mesa/pipe/nv40/nv40_fragtex.c deleted file mode 100644 index 5af5fbe7465..00000000000 --- a/src/mesa/pipe/nv40/nv40_fragtex.c +++ /dev/null @@ -1,151 +0,0 @@ -#include "nv40_context.h" - -#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ -{ \ - TRUE, \ - PIPE_FORMAT_##m, \ - NV40TCL_TEX_FORMAT_FORMAT_##tf, \ - (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \ - NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \ - NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \ - NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \ -} - -struct nv40_texture_format { - boolean defined; - uint pipe; - int format; - int swizzle; -}; - -static struct nv40_texture_format -nv40_texture_formats[] = { - _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), - _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), - _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), - _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), - _(U_L8 , L8 , S1, S1, S1, ONE, X, X, X, X), - _(U_A8 , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), - _(U_I8 , L8 , S1, S1, S1, S1, X, X, X, X), - _(U_A8_L8 , A8L8 , S1, S1, S1, S1, X, X, X, Y), - _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), - _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), -// _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT1 , 0x86, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT3 , 0x87, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT5 , 0x88, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), - {}, -}; - -static struct nv40_texture_format * -nv40_fragtex_format(uint pipe_format) -{ - struct nv40_texture_format *tf = nv40_texture_formats; - - while (tf->defined) { - if (tf->pipe == pipe_format) - return tf; - tf++; - } - - return NULL; -} - - -static void -nv40_fragtex_build(struct nv40_context *nv40, int unit) -{ - struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; - struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; - struct pipe_texture *pt = &nv40mt->base; - struct nv40_texture_format *tf; - struct nouveau_stateobj *so; - uint32_t txf, txs, txp; - int swizzled = 0; /*XXX: implement in region code? */ - unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - tf = nv40_fragtex_format(pt->format); - if (!tf) - assert(0); - - txf = ps->fmt; - txf |= tf->format | 0x8000; - txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); - - if (1) /* XXX */ - txf |= NV40TCL_TEX_FORMAT_NO_BORDER; - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - txf |= NV40TCL_TEX_FORMAT_CUBIC; - /* fall-through */ - case PIPE_TEXTURE_2D: - txf |= NV40TCL_TEX_FORMAT_DIMS_2D; - break; - case PIPE_TEXTURE_3D: - txf |= NV40TCL_TEX_FORMAT_DIMS_3D; - break; - case PIPE_TEXTURE_1D: - txf |= NV40TCL_TEX_FORMAT_DIMS_1D; - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; - } - - if (swizzled) { - txp = 0; - } else { - txp = nv40mt->level[0].pitch; - txf |= NV40TCL_TEX_FORMAT_LINEAR; - } - - txs = tf->swizzle; - - so = so_new(16, 2); - so_method(so, nv40->hw->curie, NV40TCL_TEX_OFFSET(unit), 8); - so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, - NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); - so_data (so, ps->wrap); - so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); - so_data (so, txs); - so_data (so, ps->filt | 0x2000 /*voodoo*/); - so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | - pt->height[0]); - so_data (so, ps->bcol); - so_method(so, nv40->hw->curie, NV40TCL_TEX_SIZE1(unit), 1); - so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); - - so_emit(nv40->nvws, so); - so_ref (so, &nv40->so_fragtex[unit]); - so_ref (NULL, &so); -} - -void -nv40_fragtex_bind(struct nv40_context *nv40) -{ - struct nv40_fragment_program *fp = nv40->fragprog.active; - unsigned samplers, unit; - - samplers = nv40->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so_ref(NULL, &nv40->so_fragtex[unit]); - BEGIN_RING(curie, NV40TCL_TEX_ENABLE(unit), 1); - OUT_RING (0); - } - - samplers = nv40->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - nv40_fragtex_build(nv40, unit); - } - - nv40->fp_samplers = fp->samplers; -} - diff --git a/src/mesa/pipe/nv40/nv40_miptree.c b/src/mesa/pipe/nv40/nv40_miptree.c deleted file mode 100644 index 92e6b3a43df..00000000000 --- a/src/mesa/pipe/nv40/nv40_miptree.c +++ /dev/null @@ -1,104 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" - -#include "nv40_context.h" - -static void -nv40_miptree_layout(struct nv40_miptree *nv40mt) -{ - struct pipe_texture *pt = &nv40mt->base; - boolean swizzled = FALSE; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; - uint offset = 0; - int nr_faces, l, f; - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else - if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth[0]; - } else { - nr_faces = 1; - } - - for (l = 0; l <= pt->last_level; l++) { - pt->width[l] = width; - pt->height[l] = height; - pt->depth[l] = depth; - - if (swizzled) - nv40mt->level[l].pitch = pt->width[l] * pt->cpp; - else - nv40mt->level[l].pitch = pt->width[0] * pt->cpp; - nv40mt->level[l].pitch = (nv40mt->level[l].pitch + 63) & ~63; - - nv40mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - - width = MAX2(1, width >> 1); - height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); - - } - - for (f = 0; f < nr_faces; f++) { - for (l = 0; l <= pt->last_level; l++) { - nv40mt->level[l].image_offset[f] = offset; - offset += nv40mt->level[l].pitch * pt->height[l]; - } - } - - nv40mt->total_size = offset; -} - -static struct pipe_texture * -nv40_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) -{ - struct pipe_winsys *ws = pipe->winsys; - struct nv40_miptree *mt; - - mt = MALLOC(sizeof(struct nv40_miptree)); - if (!mt) - return NULL; - mt->base = *pt; - nv40_miptree_layout(mt); - - mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, - mt->total_size); - if (!mt->buffer) { - free(mt); - return NULL; - } - - return &mt->base; -} - -static void -nv40_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - struct pipe_winsys *ws = pipe->winsys; - struct pipe_texture *mt = *pt; - - *pt = NULL; - if (--mt->refcount <= 0) { - struct nv40_miptree *nv40mt = (struct nv40_miptree *)mt; - int l; - - pipe_buffer_reference(ws, &nv40mt->buffer, NULL); - for (l = 0; l <= mt->last_level; l++) { - if (nv40mt->level[l].image_offset) - free(nv40mt->level[l].image_offset); - } - free(nv40mt); - } -} - -void -nv40_init_miptree_functions(struct nv40_context *nv40) -{ - nv40->pipe.texture_create = nv40_miptree_create; - nv40->pipe.texture_release = nv40_miptree_release; -} - diff --git a/src/mesa/pipe/nv40/nv40_query.c b/src/mesa/pipe/nv40/nv40_query.c deleted file mode 100644 index 8bca2788b93..00000000000 --- a/src/mesa/pipe/nv40/nv40_query.c +++ /dev/null @@ -1,113 +0,0 @@ -#include "pipe/p_context.h" - -#include "nv40_context.h" - -struct nv40_query { - struct nouveau_resource *object; - unsigned type; - boolean ready; - uint64_t result; -}; - -static INLINE struct nv40_query * -nv40_query(struct pipe_query *pipe) -{ - return (struct nv40_query *)pipe; -} - -static struct pipe_query * -nv40_query_create(struct pipe_context *pipe, unsigned query_type) -{ - struct nv40_query *q; - - q = CALLOC(1, sizeof(struct nv40_query)); - q->type = query_type; - - return (struct pipe_query *)q; -} - -static void -nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - - if (q->object) - nv40->nvws->res_free(&q->object); - free(q); -} - -static void -nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (nv40->nvws->res_alloc(nv40->hw->query_heap, 1, NULL, &q->object)) - assert(0); - nv40->nvws->notifier_reset(nv40->hw->query, q->object->start); - - BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); - OUT_RING (1); - - q->ready = FALSE; -} - -static void -nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - - BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(); -} - -static boolean -nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64 *result) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - struct nouveau_winsys *nvws = nv40->nvws; - - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (!q->ready) { - unsigned status; - - status = nvws->notifier_status(nv40->hw->query, - q->object->start); - if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { - if (wait == FALSE) - return FALSE; - nvws->notifier_wait(nv40->hw->query, q->object->start, - NV_NOTIFY_STATE_STATUS_COMPLETED, - 0); - } - - q->result = nvws->notifier_retval(nv40->hw->query, - q->object->start); - q->ready = TRUE; - nvws->res_free(&q->object); - } - - *result = q->result; - return TRUE; -} - -void -nv40_init_query_functions(struct nv40_context *nv40) -{ - nv40->pipe.create_query = nv40_query_create; - nv40->pipe.destroy_query = nv40_query_destroy; - nv40->pipe.begin_query = nv40_query_begin; - nv40->pipe.end_query = nv40_query_end; - nv40->pipe.get_query_result = nv40_query_result; -} diff --git a/src/mesa/pipe/nv40/nv40_shader.h b/src/mesa/pipe/nv40/nv40_shader.h deleted file mode 100644 index 5909c70713c..00000000000 --- a/src/mesa/pipe/nv40/nv40_shader.h +++ /dev/null @@ -1,554 +0,0 @@ -#ifndef __NV40_SHADER_H__ -#define __NV40_SHADER_H__ - -/* Vertex programs instruction set - * - * The NV40 instruction set is very similar to NV30. Most fields are in - * a slightly different position in the instruction however. - * - * Merged instructions - * In some cases it is possible to put two instructions into one opcode - * slot. The rules for when this is OK is not entirely clear to me yet. - * - * There are separate writemasks and dest temp register fields for each - * grouping of instructions. There is however only one field with the - * ID of a result register. Writing to temp/result regs is selected by - * setting VEC_RESULT/SCA_RESULT. - * - * Temporary registers - * The source/dest temp register fields have been extended by 1 bit, to - * give a total of 32 temporary registers. - * - * Relative Addressing - * NV40 can use an address register to index into vertex attribute regs. - * This is done by putting the offset value into INPUT_SRC and setting - * the INDEX_INPUT flag. - * - * Conditional execution (see NV_vertex_program{2,3} for details) - * There is a second condition code register on NV40, it's use is enabled - * by setting the COND_REG_SELECT_1 flag. - * - * Texture lookup - * TODO - */ - -/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ -#define NV40_VP_INST_VEC_RESULT (1 << 30) -/* uncertain.. */ -#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) -/* use address reg as index into attribs */ -#define NV40_VP_INST_INDEX_INPUT (1 << 27) -#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) -#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) -#define NV40_VP_INST_SRC2_ABS (1 << 23) -#define NV40_VP_INST_SRC1_ABS (1 << 22) -#define NV40_VP_INST_SRC0_ABS (1 << 21) -#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 -#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) -#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) -#define NV40_VP_INST_COND_SHIFT 10 -#define NV40_VP_INST_COND_MASK (0x7 << 10) -# define NV40_VP_INST_COND_FL 0 -# define NV40_VP_INST_COND_LT 1 -# define NV40_VP_INST_COND_EQ 2 -# define NV40_VP_INST_COND_LE 3 -# define NV40_VP_INST_COND_GT 4 -# define NV40_VP_INST_COND_NE 5 -# define NV40_VP_INST_COND_GE 6 -# define NV40_VP_INST_COND_TR 7 -#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 -#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) -#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 -#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) -#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 -#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) -#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 -#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) -#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 -#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) -#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 -#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) -#define NV40_VP_INST0_KNOWN ( \ - NV40_VP_INST_INDEX_INPUT | \ - NV40_VP_INST_COND_REG_SELECT_1 | \ - NV40_VP_INST_ADDR_REG_SELECT_1 | \ - NV40_VP_INST_SRC2_ABS | \ - NV40_VP_INST_SRC1_ABS | \ - NV40_VP_INST_SRC0_ABS | \ - NV40_VP_INST_VEC_DEST_TEMP_MASK | \ - NV40_VP_INST_COND_TEST_ENABLE | \ - NV40_VP_INST_COND_MASK | \ - NV40_VP_INST_COND_SWZ_ALL_MASK | \ - NV40_VP_INST_ADDR_SWZ_MASK) - -/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ -#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 -#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) -# define NV40_VP_INST_OP_NOP 0x00 -# define NV40_VP_INST_OP_MOV 0x01 -# define NV40_VP_INST_OP_MUL 0x02 -# define NV40_VP_INST_OP_ADD 0x03 -# define NV40_VP_INST_OP_MAD 0x04 -# define NV40_VP_INST_OP_DP3 0x05 -# define NV40_VP_INST_OP_DPH 0x06 -# define NV40_VP_INST_OP_DP4 0x07 -# define NV40_VP_INST_OP_DST 0x08 -# define NV40_VP_INST_OP_MIN 0x09 -# define NV40_VP_INST_OP_MAX 0x0A -# define NV40_VP_INST_OP_SLT 0x0B -# define NV40_VP_INST_OP_SGE 0x0C -# define NV40_VP_INST_OP_ARL 0x0D -# define NV40_VP_INST_OP_FRC 0x0E -# define NV40_VP_INST_OP_FLR 0x0F -# define NV40_VP_INST_OP_SEQ 0x10 -# define NV40_VP_INST_OP_SFL 0x11 -# define NV40_VP_INST_OP_SGT 0x12 -# define NV40_VP_INST_OP_SLE 0x13 -# define NV40_VP_INST_OP_SNE 0x14 -# define NV40_VP_INST_OP_STR 0x15 -# define NV40_VP_INST_OP_SSG 0x16 -# define NV40_VP_INST_OP_ARR 0x17 -# define NV40_VP_INST_OP_ARA 0x18 -# define NV40_VP_INST_OP_TXL 0x19 -#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 -#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) -# define NV40_VP_INST_OP_NOP 0x00 -# define NV40_VP_INST_OP_MOV 0x01 -# define NV40_VP_INST_OP_RCP 0x02 -# define NV40_VP_INST_OP_RCC 0x03 -# define NV40_VP_INST_OP_RSQ 0x04 -# define NV40_VP_INST_OP_EXP 0x05 -# define NV40_VP_INST_OP_LOG 0x06 -# define NV40_VP_INST_OP_LIT 0x07 -# define NV40_VP_INST_OP_BRA 0x09 -# define NV40_VP_INST_OP_CAL 0x0B -# define NV40_VP_INST_OP_RET 0x0C -# define NV40_VP_INST_OP_LG2 0x0D -# define NV40_VP_INST_OP_EX2 0x0E -# define NV40_VP_INST_OP_SIN 0x0F -# define NV40_VP_INST_OP_COS 0x10 -# define NV40_VP_INST_OP_PUSHA 0x13 -# define NV40_VP_INST_OP_POPA 0x14 -#define NV40_VP_INST_CONST_SRC_SHIFT 12 -#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) -#define NV40_VP_INST_INPUT_SRC_SHIFT 8 -#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) -# define NV40_VP_INST_IN_POS 0 -# define NV40_VP_INST_IN_WEIGHT 1 -# define NV40_VP_INST_IN_NORMAL 2 -# define NV40_VP_INST_IN_COL0 3 -# define NV40_VP_INST_IN_COL1 4 -# define NV40_VP_INST_IN_FOGC 5 -# define NV40_VP_INST_IN_TC0 8 -# define NV40_VP_INST_IN_TC(n) (8+n) -#define NV40_VP_INST_SRC0H_SHIFT 0 -#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) -#define NV40_VP_INST1_KNOWN ( \ - NV40_VP_INST_VEC_OPCODE_MASK | \ - NV40_VP_INST_SCA_OPCODE_MASK | \ - NV40_VP_INST_CONST_SRC_MASK | \ - NV40_VP_INST_INPUT_SRC_MASK | \ - NV40_VP_INST_SRC0H_MASK \ - ) - -/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ -#define NV40_VP_INST_SRC0L_SHIFT 23 -#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) -#define NV40_VP_INST_SRC1_SHIFT 6 -#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) -#define NV40_VP_INST_SRC2H_SHIFT 0 -#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) -#define NV40_VP_INST_IADDRH_SHIFT 0 -#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) - -/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ -#define NV40_VP_INST_IADDRL_SHIFT 29 -#define NV40_VP_INST_IADDRL_MASK (7 << 29) -#define NV40_VP_INST_SRC2L_SHIFT 21 -#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) -#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 -#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) -# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) -# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) -# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) -# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) -#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 -#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) -# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) -# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) -# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) -# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) -#define NV40_VP_INST_SCA_RESULT (1 << 12) -#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 -#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) -#define NV40_VP_INST_DEST_SHIFT 2 -#define NV40_VP_INST_DEST_MASK (31 << 2) -# define NV40_VP_INST_DEST_POS 0 -# define NV40_VP_INST_DEST_COL0 1 -# define NV40_VP_INST_DEST_COL1 2 -# define NV40_VP_INST_DEST_BFC0 3 -# define NV40_VP_INST_DEST_BFC1 4 -# define NV40_VP_INST_DEST_FOGC 5 -# define NV40_VP_INST_DEST_PSZ 6 -# define NV40_VP_INST_DEST_TC0 7 -# define NV40_VP_INST_DEST_TC(n) (7+n) -# define NV40_VP_INST_DEST_TEMP 0x1F -#define NV40_VP_INST_INDEX_CONST (1 << 1) -#define NV40_VP_INST_LAST (1 << 0) -#define NV40_VP_INST3_KNOWN ( \ - NV40_VP_INST_SRC2L_MASK |\ - NV40_VP_INST_SCA_WRITEMASK_MASK |\ - NV40_VP_INST_VEC_WRITEMASK_MASK |\ - NV40_VP_INST_SCA_DEST_TEMP_MASK |\ - NV40_VP_INST_DEST_MASK |\ - NV40_VP_INST_INDEX_CONST) - -/* Useful to split the source selection regs into their pieces */ -#define NV40_VP_SRC0_HIGH_SHIFT 9 -#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 -#define NV40_VP_SRC0_LOW_MASK 0x000001FF -#define NV40_VP_SRC2_HIGH_SHIFT 11 -#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 -#define NV40_VP_SRC2_LOW_MASK 0x000007FF - -/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ -#define NV40_VP_SRC_NEGATE (1 << 16) -#define NV40_VP_SRC_SWZ_X_SHIFT 14 -#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) -#define NV40_VP_SRC_SWZ_Y_SHIFT 12 -#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) -#define NV40_VP_SRC_SWZ_Z_SHIFT 10 -#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) -#define NV40_VP_SRC_SWZ_W_SHIFT 8 -#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) -#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 -#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) -#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 -#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) -#define NV40_VP_SRC_REG_TYPE_SHIFT 0 -#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) -# define NV40_VP_SRC_REG_TYPE_UNK0 0 -# define NV40_VP_SRC_REG_TYPE_TEMP 1 -# define NV40_VP_SRC_REG_TYPE_INPUT 2 -# define NV40_VP_SRC_REG_TYPE_CONST 3 - - -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - * result.color == R0.xyzw - * result.depth == R1.z - * When the fragprog contains instructions to write depth, - * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - * - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords. As such instructions such as: - * - * ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and - * SWIZZLE_ONE. - * - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as - * SWIZZLE_ZERO is implemented simply by not writing to the relevant components - * of the destination. - * - * Looping - * Loops appear to be fairly expensive on NV40 at least, the proprietary - * driver goes to a lot of effort to avoid using the native looping - * instructions. If the total number of *executed* instructions between - * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. - * The maximum loop count is 255. - * - * Conditional execution - * TODO - * - * Non-native instructions: - * LIT - * LRP - MAD+MAD - * SUB - ADD, negate second source - * RSQ - LG2 + EX2 - * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD - * DP2 - MUL + ADD - * NRM - */ - -//== Opcode / Destination selection == -#define NV40_FP_OP_PROGRAM_END (1 << 0) -#define NV40_FP_OP_OUT_REG_SHIFT 1 -#define NV40_FP_OP_OUT_REG_MASK (63 << 1) -/* Needs to be set when writing outputs to get expected result.. */ -#define NV40_FP_OP_OUT_REG_HALF (1 << 7) -#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) -#define NV40_FP_OP_OUTMASK_SHIFT 9 -#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) -# define NV40_FP_OP_OUT_X (1 << 9) -# define NV40_FP_OP_OUT_Y (1 <<10) -# define NV40_FP_OP_OUT_Z (1 <<11) -# define NV40_FP_OP_OUT_W (1 <<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV40_FP_OP_INPUT_SRC_SHIFT 13 -#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) -# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 -# define NV40_FP_OP_INPUT_SRC_COL0 0x1 -# define NV40_FP_OP_INPUT_SRC_COL1 0x2 -# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 -# define NV40_FP_OP_INPUT_SRC_TC0 0x4 -# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) -# define NV40_FP_OP_INPUT_SRC_FACING 0xE -#define NV40_FP_OP_TEX_UNIT_SHIFT 17 -#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) -#define NV40_FP_OP_PRECISION_SHIFT 22 -#define NV40_FP_OP_PRECISION_MASK (3 << 22) -# define NV40_FP_PRECISION_FP32 0 -# define NV40_FP_PRECISION_FP16 1 -# define NV40_FP_PRECISION_FX12 2 -#define NV40_FP_OP_OPCODE_SHIFT 24 -#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) -# define NV40_FP_OP_OPCODE_NOP 0x00 -# define NV40_FP_OP_OPCODE_MOV 0x01 -# define NV40_FP_OP_OPCODE_MUL 0x02 -# define NV40_FP_OP_OPCODE_ADD 0x03 -# define NV40_FP_OP_OPCODE_MAD 0x04 -# define NV40_FP_OP_OPCODE_DP3 0x05 -# define NV40_FP_OP_OPCODE_DP4 0x06 -# define NV40_FP_OP_OPCODE_DST 0x07 -# define NV40_FP_OP_OPCODE_MIN 0x08 -# define NV40_FP_OP_OPCODE_MAX 0x09 -# define NV40_FP_OP_OPCODE_SLT 0x0A -# define NV40_FP_OP_OPCODE_SGE 0x0B -# define NV40_FP_OP_OPCODE_SLE 0x0C -# define NV40_FP_OP_OPCODE_SGT 0x0D -# define NV40_FP_OP_OPCODE_SNE 0x0E -# define NV40_FP_OP_OPCODE_SEQ 0x0F -# define NV40_FP_OP_OPCODE_FRC 0x10 -# define NV40_FP_OP_OPCODE_FLR 0x11 -# define NV40_FP_OP_OPCODE_KIL 0x12 -# define NV40_FP_OP_OPCODE_PK4B 0x13 -# define NV40_FP_OP_OPCODE_UP4B 0x14 -/* DDX/DDY can only write to XY */ -# define NV40_FP_OP_OPCODE_DDX 0x15 -# define NV40_FP_OP_OPCODE_DDY 0x16 -# define NV40_FP_OP_OPCODE_TEX 0x17 -# define NV40_FP_OP_OPCODE_TXP 0x18 -# define NV40_FP_OP_OPCODE_TXD 0x19 -# define NV40_FP_OP_OPCODE_RCP 0x1A -# define NV40_FP_OP_OPCODE_EX2 0x1C -# define NV40_FP_OP_OPCODE_LG2 0x1D -# define NV40_FP_OP_OPCODE_STR 0x20 -# define NV40_FP_OP_OPCODE_SFL 0x21 -# define NV40_FP_OP_OPCODE_COS 0x22 -# define NV40_FP_OP_OPCODE_SIN 0x23 -# define NV40_FP_OP_OPCODE_PK2H 0x24 -# define NV40_FP_OP_OPCODE_UP2H 0x25 -# define NV40_FP_OP_OPCODE_PK4UB 0x27 -# define NV40_FP_OP_OPCODE_UP4UB 0x28 -# define NV40_FP_OP_OPCODE_PK2US 0x29 -# define NV40_FP_OP_OPCODE_UP2US 0x2A -# define NV40_FP_OP_OPCODE_DP2A 0x2E -# define NV40_FP_OP_OPCODE_TXL 0x2F -# define NV40_FP_OP_OPCODE_TXB 0x31 -# define NV40_FP_OP_OPCODE_DIV 0x3A -# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C -/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ -# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 -# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 -# define NV40_FP_OP_BRA_OPCODE_IF 0x2 -# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 -# define NV40_FP_OP_BRA_OPCODE_REP 0x4 -# define NV40_FP_OP_BRA_OPCODE_RET 0x5 -#define NV40_FP_OP_OUT_SAT (1 << 31) - -/* high order bits of SRC0 */ -#define NV40_FP_OP_OUT_ABS (1 << 29) -#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 -#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) -#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 -#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) -#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 -#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) -#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 -#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) -#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 -#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) -#define NV40_FP_OP_COND_SHIFT 18 -#define NV40_FP_OP_COND_MASK (0x07 << 18) -# define NV40_FP_OP_COND_FL 0 -# define NV40_FP_OP_COND_LT 1 -# define NV40_FP_OP_COND_EQ 2 -# define NV40_FP_OP_COND_LE 3 -# define NV40_FP_OP_COND_GT 4 -# define NV40_FP_OP_COND_NE 5 -# define NV40_FP_OP_COND_GE 6 -# define NV40_FP_OP_COND_TR 7 - -/* high order bits of SRC1 */ -#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) -#define NV40_FP_OP_DST_SCALE_SHIFT 28 -#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) -#define NV40_FP_OP_DST_SCALE_1X 0 -#define NV40_FP_OP_DST_SCALE_2X 1 -#define NV40_FP_OP_DST_SCALE_4X 2 -#define NV40_FP_OP_DST_SCALE_8X 3 -#define NV40_FP_OP_DST_SCALE_INV_2X 5 -#define NV40_FP_OP_DST_SCALE_INV_4X 6 -#define NV40_FP_OP_DST_SCALE_INV_8X 7 - -/* SRC1 LOOP */ -#define NV40_FP_OP_LOOP_INCR_SHIFT 19 -#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) -#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 -#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) -#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 -#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) - -/* SRC1 IF */ -#define NV40_FP_OP_ELSE_ID_SHIFT 2 -#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) - -/* SRC1 CAL */ -#define NV40_FP_OP_IADDR_SHIFT 2 -#define NV40_FP_OP_IADDR_MASK (0xFF << 2) - -/* SRC1 REP - * I have no idea why there are 3 count values here.. but they - * have always been filled with the same value in my tests so - * far.. - */ -#define NV40_FP_OP_REP_COUNT1_SHIFT 2 -#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) -#define NV40_FP_OP_REP_COUNT2_SHIFT 10 -#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) -#define NV40_FP_OP_REP_COUNT3_SHIFT 19 -#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) - -/* SRC2 REP/IF */ -#define NV40_FP_OP_END_ID_SHIFT 2 -#define NV40_FP_OP_END_ID_MASK (0xFF << 2) - -// SRC2 high-order -#define NV40_FP_OP_INDEX_INPUT (1 << 30) -#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 -#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) - -//== Register selection == -#define NV40_FP_REG_TYPE_SHIFT 0 -#define NV40_FP_REG_TYPE_MASK (3 << 0) -# define NV40_FP_REG_TYPE_TEMP 0 -# define NV40_FP_REG_TYPE_INPUT 1 -# define NV40_FP_REG_TYPE_CONST 2 -#define NV40_FP_REG_SRC_SHIFT 2 -#define NV40_FP_REG_SRC_MASK (63 << 2) -#define NV40_FP_REG_SRC_HALF (1 << 8) -#define NV40_FP_REG_SWZ_ALL_SHIFT 9 -#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) -#define NV40_FP_REG_SWZ_X_SHIFT 9 -#define NV40_FP_REG_SWZ_X_MASK (3 << 9) -#define NV40_FP_REG_SWZ_Y_SHIFT 11 -#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) -#define NV40_FP_REG_SWZ_Z_SHIFT 13 -#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) -#define NV40_FP_REG_SWZ_W_SHIFT 15 -#define NV40_FP_REG_SWZ_W_MASK (3 << 15) -# define NV40_FP_SWIZZLE_X 0 -# define NV40_FP_SWIZZLE_Y 1 -# define NV40_FP_SWIZZLE_Z 2 -# define NV40_FP_SWIZZLE_W 3 -#define NV40_FP_REG_NEGATE (1 << 17) - -#define NV40SR_NONE 0 -#define NV40SR_OUTPUT 1 -#define NV40SR_INPUT 2 -#define NV40SR_TEMP 3 -#define NV40SR_CONST 4 - -struct nv40_sreg { - int type; - int index; - - int dst_scale; - - int negate; - int abs; - int swz[4]; - - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; -}; - -static INLINE struct nv40_sreg -nv40_sr(int type, int index) -{ - struct nv40_sreg temp = { - .type = type, - .index = index, - .dst_scale = DEF_SCALE, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, - .cc_update = 0, - .cc_update_reg = 0, - .cc_test = DEF_CTEST, - .cc_test_reg = 0, - .cc_swz = { 0, 1, 2, 3 }, - }; - return temp; -} - -static INLINE struct nv40_sreg -nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) -{ - struct nv40_sreg dst = src; - - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; - return dst; -} - -static INLINE struct nv40_sreg -nv40_sr_neg(struct nv40_sreg src) -{ - src.negate = !src.negate; - return src; -} - -static INLINE struct nv40_sreg -nv40_sr_abs(struct nv40_sreg src) -{ - src.abs = 1; - return src; -} - -static INLINE struct nv40_sreg -nv40_sr_scale(struct nv40_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} - -#endif diff --git a/src/mesa/pipe/nv40/nv40_state.c b/src/mesa/pipe/nv40/nv40_state.c deleted file mode 100644 index 713f31dbb12..00000000000 --- a/src/mesa/pipe/nv40/nv40_state.c +++ /dev/null @@ -1,823 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" - -#include "nv40_context.h" -#include "nv40_state.h" - -static void * -nv40_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_grobj *curie = nv40->hw->curie; - struct nouveau_stateobj *so = so_new(16, 0); - - if (cso->blend_enable) { - so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); - so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | - nvgl_blend_func(cso->rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rgb_dst_factor)); - so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 | - nvgl_blend_eqn(cso->rgb_func)); - } else { - so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, curie, NV40TCL_COLOR_MASK, 1); - so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); - - if (cso->logicop_enable) { - so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); - } else { - so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); - so_data (so, cso->dither ? 1 : 0); - - return (void *)so; -} - -static void -nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - so_ref(hwcso, &nv40->so_blend); - nv40->dirty |= NV40_NEW_BLEND; -} - -static void -nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nouveau_stateobj *so = hwcso; - - so_ref(NULL, &so); -} - - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV40TCL_TEX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV40TCL_TEX_WRAP_S_CLAMP; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; - break; - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV40TCL_TEX_WRAP_S_REPEAT; - break; - } - - return ret >> NV40TCL_TEX_WRAP_S_SHIFT; -} - -static void * -nv40_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nv40_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nv40_sampler_state)); - - ps->fmt = 0; - if (!cso->normalized_coords) - ps->fmt |= NV40TCL_TEX_FORMAT_RECT; - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); - - ps->en = 0; - if (cso->max_anisotropy >= 2.0) { - /* no idea, binary driver sets it, works without it.. meh.. */ - ps->wrap |= (1 << 5); - - if (cso->max_anisotropy >= 16.0) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12.0) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10.0) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8.0) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6.0) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4.0) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; - } else { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; - } - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 7; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 19; - } - - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - -static void -nv40_sampler_state_bind(struct pipe_context *pipe, unsigned unit, - void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_sampler_state *ps = hwcso; - - nv40->tex_sampler[unit] = ps; - nv40->dirty_samplers |= (1 << unit); -} - -static void -nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - free(hwcso); -} - -static void -nv40_set_sampler_texture(struct pipe_context *pipe, unsigned unit, - struct pipe_texture *miptree) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->tex_miptree[unit] = (struct nv40_miptree *)miptree; - nv40->dirty_samplers |= (1 << unit); -} - -static void * -nv40_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(32, 0); - - /*XXX: ignored: - * light_twoside - * offset_cw/ccw -nohw - * scissor - * point_smooth -nohw - * multisample - * offset_units / offset_scale - */ - - so_method(so, nv40->hw->curie, NV40TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : - NV40TCL_SHADE_MODEL_SMOOTH); - - so_method(so, nv40->hw->curie, NV40TCL_LINE_WIDTH, 2); - so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); - so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); - so_data (so, cso->line_stipple_enable ? 1 : 0); - so_data (so, (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor); - - so_method(so, nv40->hw->curie, NV40TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); - - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_MODE_FRONT, 6); - if (cso->front_winding == PIPE_WINDING_CCW) { - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV40TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_CW: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, 0); - break; - } - so_data(so, NV40TCL_FRONT_FACE_CCW); - } else { - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_CW: - so_data(so, NV40TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, 0); - break; - } - so_data(so, NV40TCL_FRONT_FACE_CW); - } - so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, cso->cull_mode != PIPE_WINDING_NONE ? 1 : 0); - - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); - - so_method(so, nv40->hw->curie, NV40TCL_POINT_SPRITE, 1); - if (cso->point_sprite) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) - psctl |= (1 << (8 + i)); - } - - so_data(so, psctl); - } else { - so_data(so, 0); - } - - return (void *)so; -} - -static void -nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - so_ref(hwcso, &nv40->so_rast); - nv40->dirty |= NV40_NEW_RAST; -} - -static void -nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nouveau_stateobj *so = hwcso; - - so_ref(NULL, &so); -} - -static void * -nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(32, 0); - - so_method(so, nv40->hw->curie, NV40TCL_DEPTH_FUNC, 3); - so_data (so, nvgl_comparison_op(cso->depth.func)); - so_data (so, cso->depth.writemask ? 1 : 0); - so_data (so, cso->depth.enabled ? 1 : 0); - - so_method(so, nv40->hw->curie, NV40TCL_ALPHA_TEST_ENABLE, 3); - so_data (so, cso->alpha.enabled ? 1 : 0); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - so_data (so, float_to_ubyte(cso->alpha.ref)); - - if (cso->stencil[0].enabled) { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); - so_data (so, cso->stencil[0].enabled ? 1 : 0); - so_data (so, cso->stencil[0].write_mask); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_data (so, cso->stencil[0].ref_value); - so_data (so, cso->stencil[0].value_mask); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); - } else { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[1].enabled) { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_BACK_ENABLE, 8); - so_data (so, cso->stencil[1].enabled ? 1 : 0); - so_data (so, cso->stencil[1].write_mask); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_data (so, cso->stencil[1].ref_value); - so_data (so, cso->stencil[1].value_mask); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); - } else { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); - } - - return (void *)so; -} - -static void -nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - so_ref(hwcso, &nv40->so_zsa); - nv40->dirty |= NV40_NEW_ZSA; -} - -static void -nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nouveau_stateobj *so = hwcso; - - so_ref(NULL, &so); -} - -static void * -nv40_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv40_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nv40_vertex_program)); - vp->pipe = cso; - - return (void *)vp; -} - -static void -nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp = hwcso; - - nv40->vertprog.current = vp; - nv40->dirty |= NV40_NEW_VERTPROG; -} - -static void -nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp = hwcso; - - nv40_vertprog_destroy(nv40, vp); - free(vp); -} - -static void * -nv40_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv40_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nv40_fragment_program)); - fp->pipe = cso; - - return (void *)fp; -} - -static void -nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_fragment_program *fp = hwcso; - - nv40->fragprog.current = fp; - nv40->dirty |= NV40_NEW_FRAGPROG; -} - -static void -nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_fragment_program *fp = hwcso; - - nv40_fragprog_destroy(nv40, fp); - free(fp); -} - -static void -nv40_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(2, 0); - - so_method(so, nv40->hw->curie, NV40TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - - so_ref(so, &nv40->so_bcol); - so_ref(NULL, &so); - nv40->dirty |= NV40_NEW_BCOL; -} - -static void -nv40_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ -} - -static void -nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - if (shader == PIPE_SHADER_VERTEX) { - nv40->vertprog.constant_buf = buf->buffer; - nv40->dirty |= NV40_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nv40->fragprog.constant_buf = buf->buffer; - nv40->dirty |= NV40_NEW_FRAGPROG; - } -} - -static void -nv40_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct pipe_surface *rt[4], *zeta; - uint32_t rt_enable, rt_format, w, h; - int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(64, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - - rt_enable = 0; - for (i = 0; i < 4; i++) { - if (!fb->cbufs[i]) - continue; - - if (colour_format) { - assert(w == fb->cbufs[i]->width); - assert(h == fb->cbufs[i]->height); - assert(colour_format == fb->cbufs[i]->format); - } else { - w = fb->cbufs[i]->width; - h = fb->cbufs[i]->height; - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); - rt[i] = fb->cbufs[i]; - } - } - - if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | - NV40TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV40TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - - zeta_format = fb->zsbuf->format; - zeta = fb->zsbuf; - } - - rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR0, 1); - so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR0_PITCH, 2); - so_data (so, rt[0]->pitch * rt[0]->cpp); - so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR1, 1); - so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR1_OFFSET, 2); - so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch * rt[1]->cpp); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch * rt[2]->cpp); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch * rt[3]->cpp); - } - - if (zeta_format) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_ZETA, 1); - so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_ZETA_OFFSET, 1); - so_reloc (so, zeta->buffer, zeta->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch * zeta->cpp); - } - - so_method(so, nv40->hw->curie, NV40TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, nv40->hw->curie, NV40TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - - so_ref(so, &nv40->so_framebuffer); - so_ref(NULL, &so); - nv40->dirty |= NV40_NEW_FB; -} - -static void -nv40_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(33, 0); - unsigned i; - - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, stipple->stipple[i]); - - so_ref(so, &nv40->so_stipple); - so_ref(NULL, &so); - nv40->dirty |= NV40_NEW_STIPPLE; -} - -static void -nv40_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(3, 0); - - so_method(so, nv40->hw->curie, NV40TCL_SCISSOR_HORIZ, 2); - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - - so_ref(so, &nv40->so_scissor); - so_ref(NULL, &so); - nv40->dirty |= NV40_NEW_SCISSOR; -} - -static void -nv40_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(9, 0); - - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - - so_ref(so, &nv40->so_viewport); - so_ref(NULL, &so); - nv40->dirty |= NV40_NEW_VIEWPORT; -} - -static void -nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_buffer *vb) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->vtxbuf[index] = *vb; - - nv40->dirty |= NV40_NEW_ARRAYS; -} - -static void -nv40_set_vertex_element(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_element *ve) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->vtxelt[index] = *ve; - - nv40->dirty |= NV40_NEW_ARRAYS; -} - -void -nv40_init_state_functions(struct nv40_context *nv40) -{ - nv40->pipe.create_blend_state = nv40_blend_state_create; - nv40->pipe.bind_blend_state = nv40_blend_state_bind; - nv40->pipe.delete_blend_state = nv40_blend_state_delete; - - nv40->pipe.create_sampler_state = nv40_sampler_state_create; - nv40->pipe.bind_sampler_state = nv40_sampler_state_bind; - nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; - nv40->pipe.set_sampler_texture = nv40_set_sampler_texture; - - nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; - nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; - nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; - - nv40->pipe.create_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_create; - nv40->pipe.bind_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_bind; - nv40->pipe.delete_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_delete; - - nv40->pipe.create_vs_state = nv40_vp_state_create; - nv40->pipe.bind_vs_state = nv40_vp_state_bind; - nv40->pipe.delete_vs_state = nv40_vp_state_delete; - - nv40->pipe.create_fs_state = nv40_fp_state_create; - nv40->pipe.bind_fs_state = nv40_fp_state_bind; - nv40->pipe.delete_fs_state = nv40_fp_state_delete; - - nv40->pipe.set_blend_color = nv40_set_blend_color; - nv40->pipe.set_clip_state = nv40_set_clip_state; - nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; - nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; - nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; - nv40->pipe.set_scissor_state = nv40_set_scissor_state; - nv40->pipe.set_viewport_state = nv40_set_viewport_state; - - nv40->pipe.set_vertex_buffer = nv40_set_vertex_buffer; - nv40->pipe.set_vertex_element = nv40_set_vertex_element; -} - diff --git a/src/mesa/pipe/nv40/nv40_state.h b/src/mesa/pipe/nv40/nv40_state.h deleted file mode 100644 index e82ab9de98a..00000000000 --- a/src/mesa/pipe/nv40/nv40_state.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef __NV40_STATE_H__ -#define __NV40_STATE_H__ - -#include "pipe/p_state.h" - -struct nv40_sampler_state { - uint32_t fmt; - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - -struct nv40_vertex_program_exec { - uint32_t data[4]; - boolean has_branch_offset; - int const_index; -}; - -struct nv40_vertex_program_data { - int index; /* immediates == -1 */ - float value[4]; -}; - -struct nv40_vertex_program { - const struct pipe_shader_state *pipe; - - boolean translated; - struct nv40_vertex_program_exec *insns; - unsigned nr_insns; - struct nv40_vertex_program_data *consts; - unsigned nr_consts; - - struct nouveau_resource *exec; - unsigned exec_start; - struct nouveau_resource *data; - unsigned data_start; - unsigned data_start_min; - - uint32_t ir; - uint32_t or; -}; - -struct nv40_fragment_program_data { - unsigned offset; - unsigned index; -}; - -struct nv40_fragment_program { - const struct pipe_shader_state *pipe; - - boolean translated; - boolean on_hw; - unsigned samplers; - - uint32_t *insn; - int insn_len; - - struct nv40_fragment_program_data *consts; - unsigned nr_consts; - - struct pipe_buffer *buffer; - - uint32_t fp_control; - struct nouveau_stateobj *so; -}; - -struct nv40_miptree { - struct pipe_texture base; - - struct pipe_buffer *buffer; - uint total_size; - - struct { - uint pitch; - uint *image_offset; - } level[PIPE_MAX_TEXTURE_LEVELS]; -}; - -#endif diff --git a/src/mesa/pipe/nv40/nv40_state_emit.c b/src/mesa/pipe/nv40/nv40_state_emit.c deleted file mode 100644 index a10c9955480..00000000000 --- a/src/mesa/pipe/nv40/nv40_state_emit.c +++ /dev/null @@ -1,77 +0,0 @@ -#include "nv40_context.h" -#include "nv40_state.h" - -/* Emit relocs for every referenced buffer. - * - * This is to ensure the bufmgr has an accurate idea of how - * the buffer is used. These relocs appear in the push buffer as - * NOPs, and will only be turned into state changes if a buffer - * actually moves. - */ -static void -nv40_state_emit_dummy_relocs(struct nv40_context *nv40) -{ - unsigned i; - - so_emit_reloc_markers(nv40->nvws, nv40->so_framebuffer); - for (i = 0; i < 16; i++) { - if (!(nv40->fp_samplers & (1 << i))) - continue; - so_emit_reloc_markers(nv40->nvws, nv40->so_fragtex[i]); - } - so_emit_reloc_markers(nv40->nvws, nv40->fragprog.active->so); -} - -void -nv40_emit_hw_state(struct nv40_context *nv40) -{ - if (nv40->dirty & NV40_NEW_FB) - so_emit(nv40->nvws, nv40->so_framebuffer); - - if (nv40->dirty & NV40_NEW_BLEND) - so_emit(nv40->nvws, nv40->so_blend); - - if (nv40->dirty & NV40_NEW_RAST) - so_emit(nv40->nvws, nv40->so_rast); - - if (nv40->dirty & NV40_NEW_ZSA) - so_emit(nv40->nvws, nv40->so_zsa); - - if (nv40->dirty & NV40_NEW_BCOL) - so_emit(nv40->nvws, nv40->so_bcol); - - if (nv40->dirty & NV40_NEW_SCISSOR) - so_emit(nv40->nvws, nv40->so_scissor); - - if (nv40->dirty & NV40_NEW_VIEWPORT) - so_emit(nv40->nvws, nv40->so_viewport); - - if (nv40->dirty & NV40_NEW_STIPPLE) - so_emit(nv40->nvws, nv40->so_stipple); - - if (nv40->dirty & NV40_NEW_FRAGPROG) { - nv40_fragprog_bind(nv40, nv40->fragprog.current); - /*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */ - } - - if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { - nv40_fragtex_bind(nv40); - - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); - nv40->dirty &= ~NV40_NEW_FRAGPROG; - } - - if (nv40->dirty & NV40_NEW_VERTPROG) { - nv40_vertprog_bind(nv40, nv40->vertprog.current); - nv40->dirty &= ~NV40_NEW_VERTPROG; - } - - nv40->dirty_samplers = 0; - nv40->dirty = 0; - - nv40_state_emit_dummy_relocs(nv40); -} - diff --git a/src/mesa/pipe/nv40/nv40_surface.c b/src/mesa/pipe/nv40/nv40_surface.c deleted file mode 100644 index d8f87d9adc2..00000000000 --- a/src/mesa/pipe/nv40/nv40_surface.c +++ /dev/null @@ -1,136 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "nv40_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_winsys.h" -#include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" - -static boolean -nv40_surface_format_supported(struct pipe_context *pipe, - enum pipe_format format, uint type) -{ - switch (type) { - case PIPE_SURFACE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - break; - case PIPE_TEXTURE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: - case PIPE_FORMAT_U_A8_L8: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - } - break; - default: - assert(0); - }; - - return FALSE; -} - -static struct pipe_surface * -nv40_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = pipe->winsys; - struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; - struct pipe_surface *ps; - - ps = ws->surface_alloc(ws); - if (!ps) - return NULL; - pipe_buffer_reference(ws, &ps->buffer, nv40mt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = nv40mt->level[level].pitch / ps->cpp; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset = nv40mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ps->offset = nv40mt->level[level].image_offset[zslice]; - } else { - ps->offset = nv40mt->level[level].image_offset[0]; - } - - return ps; -} - -static void -nv40_surface_copy(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, struct pipe_surface *src, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_winsys *nvws = nv40->nvws; - - nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, - width, height); -} - -static void -nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_winsys *nvws = nv40->nvws; - - nvws->surface_fill(nvws, dest, destx, desty, width, height, value); -} - -void -nv40_init_surface_functions(struct nv40_context *nv40) -{ - nv40->pipe.is_format_supported = nv40_surface_format_supported; - nv40->pipe.get_tex_surface = nv40_get_tex_surface; - nv40->pipe.surface_copy = nv40_surface_copy; - nv40->pipe.surface_fill = nv40_surface_fill; -} diff --git a/src/mesa/pipe/nv40/nv40_vbo.c b/src/mesa/pipe/nv40/nv40_vbo.c deleted file mode 100644 index 6b1ac65b49e..00000000000 --- a/src/mesa/pipe/nv40/nv40_vbo.c +++ /dev/null @@ -1,424 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" - -#include "nv40_context.h" -#include "nv40_state.h" - -#include "pipe/nouveau/nouveau_channel.h" -#include "pipe/nouveau/nouveau_pushbuf.h" - -static INLINE int -nv40_vbo_ncomp(uint format) -{ - int ncomp = 0; - - if (pf_size_x(format)) ncomp++; - if (pf_size_y(format)) ncomp++; - if (pf_size_z(format)) ncomp++; - if (pf_size_w(format)) ncomp++; - - return ncomp; -} - -static INLINE int -nv40_vbo_type(uint format) -{ - switch (pf_type(format)) { - case PIPE_FORMAT_TYPE_FLOAT: - return NV40TCL_VTXFMT_TYPE_FLOAT; - case PIPE_FORMAT_TYPE_UNORM: - return NV40TCL_VTXFMT_TYPE_UBYTE; - default: - NOUVEAU_ERR("Unknown format 0x%08x\n", format); - return NV40TCL_VTXFMT_TYPE_FLOAT; - } -} - -static boolean -nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, - struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb) -{ - struct pipe_winsys *ws = nv40->pipe.winsys; - int type, ncomp; - void *map; - - type = nv40_vbo_type(ve->src_format); - ncomp = nv40_vbo_ncomp(ve->src_format); - - map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); - map += vb->buffer_offset + ve->src_offset; - - switch (type) { - case NV40TCL_VTXFMT_TYPE_FLOAT: - { - float *v = map; - - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); - switch (ncomp) { - case 4: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(v[2]); - OUT_RINGf(v[3]); - break; - case 3: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(v[2]); - OUT_RINGf(1.0); - break; - case 2: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(0.0); - OUT_RINGf(1.0); - break; - case 1: - OUT_RINGf(v[0]); - OUT_RINGf(0.0); - OUT_RINGf(0.0); - OUT_RINGf(1.0); - break; - default: - ws->buffer_unmap(ws, vb->buffer); - return FALSE; - } - } - break; - default: - ws->buffer_unmap(ws, vb->buffer); - return FALSE; - } - - ws->buffer_unmap(ws, vb->buffer); - - return TRUE; -} - -static void -nv40_vbo_arrays_update(struct nv40_context *nv40, struct pipe_buffer *ib, - unsigned ib_format) -{ - struct nv40_vertex_program *vp = nv40->vertprog.active; - struct nouveau_stateobj *vtxbuf, *vtxfmt; - unsigned inputs, hw, num_hw; - unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - inputs = vp->ir; - for (hw = 0; hw < 16 && inputs; hw++) { - if (inputs & (1 << hw)) { - num_hw = hw; - inputs &= ~(1 << hw); - } - } - num_hw++; - - vtxbuf = so_new(20, 18); - so_method(vtxbuf, nv40->hw->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); - vtxfmt = so_new(17, 0); - so_method(vtxfmt, nv40->hw->curie, NV40TCL_VTXFMT(0), num_hw); - - inputs = vp->ir; - for (hw = 0; hw < num_hw; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - - if (!(inputs & (1 << hw))) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - - ve = &nv40->vtxelt[hw]; - vb = &nv40->vtxbuf[ve->vertex_buffer_index]; - - if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - - so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV40TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | - (nv40_vbo_ncomp(ve->src_format) << - NV40TCL_VTXFMT_SIZE_SHIFT) | - nv40_vbo_type(ve->src_format))); - } - - if (ib) { - so_method(vtxbuf, nv40->hw->curie, NV40TCL_IDXBUF_ADDRESS, 2); - so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, - 0, NV40TCL_IDXBUF_FORMAT_DMA1); - } - - so_emit(nv40->nvws, vtxfmt); - so_emit(nv40->nvws, vtxbuf); - so_ref (vtxbuf, &nv40->so_vtxbuf); - so_ref (NULL, &vtxbuf); - so_ref (NULL, &vtxfmt); -} - -static boolean -nv40_vbo_validate_state(struct nv40_context *nv40, - struct pipe_buffer *ib, unsigned ib_format) -{ - unsigned vdn = nv40->dirty & NV40_NEW_ARRAYS; - - nv40_emit_hw_state(nv40); - if (vdn || ib) { - nv40_vbo_arrays_update(nv40, ib, ib_format); - nv40->dirty &= ~NV40_NEW_ARRAYS; - } - - so_emit_reloc_markers(nv40->nvws, nv40->so_vtxbuf); - - BEGIN_RING(curie, 0x1710, 1); - OUT_RING (0); /* vtx cache flush */ - - return TRUE; -} - -boolean -nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, - unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - unsigned nr; - boolean ret; - - ret = nv40_vbo_validate_state(nv40, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } - - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - - nr = (count & 0xff); - if (nr) { - BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); - start += nr; - } - - nr = count >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); - while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); - - pipe->flush(pipe, 0); - return TRUE; -} - -static INLINE void -nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, - unsigned start, unsigned count) -{ - uint8_t *elts = (uint8_t *)ib + start; - int push, i; - - if (count & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); - elts++; count--; - } - - while (count) { - push = MIN2(count, 2047 * 2); - - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); - - count -= push; - elts += push; - } -} - -static INLINE void -nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, - unsigned start, unsigned count) -{ - uint16_t *elts = (uint16_t *)ib + start; - int push, i; - - if (count & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); - elts++; count--; - } - - while (count) { - push = MIN2(count, 2047 * 2); - - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); - - count -= push; - elts += push; - } -} - -static INLINE void -nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, - unsigned start, unsigned count) -{ - uint32_t *elts = (uint32_t *)ib + start; - int push; - - while (count) { - push = MIN2(count, 2047); - - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); - - count -= push; - elts += push; - } -} - -static boolean -nv40_draw_elements_inline(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct pipe_winsys *ws = pipe->winsys; - boolean ret; - void *map; - - ret = nv40_vbo_validate_state(nv40, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } - - map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return FALSE; - } - - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - - switch (ib_size) { - case 1: - nv40_draw_elements_u08(nv40, map, start, count); - break; - case 2: - nv40_draw_elements_u16(nv40, map, start, count); - break; - case 4: - nv40_draw_elements_u32(nv40, map, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } - - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); - - ws->buffer_unmap(ws, ib); - - return TRUE; -} - -static boolean -nv40_draw_elements_vbo(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - unsigned nr, type; - boolean ret; - - switch (ib_size) { - case 2: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - return FALSE; - } - - ret = nv40_vbo_validate_state(nv40, ib, type); - if (!ret) { - NOUVEAU_ERR("failed state validation\n"); - return FALSE; - } - - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - - nr = (count & 0xff); - if (nr) { - BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); - start += nr; - } - - nr = count >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); - while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); - - return TRUE; -} - -boolean -nv40_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - /* 0x4497 doesn't support real index buffers, and there doesn't appear - * to be support on any chipset for 8-bit indices. - */ - if (nv40->hw->curie->grclass == NV44TCL || indexSize == 1) { - nv40_draw_elements_inline(pipe, indexBuffer, indexSize, - mode, start, count); - } else { - nv40_draw_elements_vbo(pipe, indexBuffer, indexSize, - mode, start, count); - } - - pipe->flush(pipe, 0); - return TRUE; -} - - diff --git a/src/mesa/pipe/nv40/nv40_vertprog.c b/src/mesa/pipe/nv40/nv40_vertprog.c deleted file mode 100644 index d57e3ca350b..00000000000 --- a/src/mesa/pipe/nv40/nv40_vertprog.c +++ /dev/null @@ -1,790 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" - -#include "nv40_context.h" -#include "nv40_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 2. Arb. swz/negation - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv40_shader.h" - -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) - -struct nv40_vpc { - struct nv40_vertex_program *vp; - - struct nv40_vertex_program_exec *vpi; - - unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; - - int high_temp; - int temp_temp_count; - - struct nv40_sreg *imm; - unsigned nr_imm; -}; - -static struct nv40_sreg -temp(struct nv40_vpc *vpc) -{ - int idx; - - idx = vpc->temp_temp_count++; - idx += vpc->high_temp + 1; - return nv40_sr(NV40SR_TEMP, idx); -} - -static struct nv40_sreg -constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nv40_vertex_program *vp = vpc->vp; - struct nv40_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nv40_sr(NV40SR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nv40_sr(NV40SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) -{ - struct nv40_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NV40SR_TEMP: - sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); - break; - case NV40SR_INPUT: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); - break; - case NV40SR_CONST: - sr |= (NV40_VP_SRC_REG_TYPE_CONST << - NV40_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NV40SR_NONE: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV40_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); - - switch (pos) { - case 0: - hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> - NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << - NV40_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> - NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << - NV40_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) -{ - struct nv40_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NV40SR_TEMP: - hw[3] |= NV40_VP_INST_DEST_MASK; - if (slot == 0) { - hw[0] |= (dst.index << - NV40_VP_INST_VEC_DEST_TEMP_SHIFT); - } else { - hw[3] |= (dst.index << - NV40_VP_INST_SCA_DEST_TEMP_SHIFT); - } - break; - case NV40SR_OUTPUT: - switch (dst.index) { - case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - default: - break; - } - - hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); - if (slot == 0) { - hw[0] |= NV40_VP_INST_VEC_RESULT; - hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - } else { - hw[3] |= NV40_VP_INST_SCA_RESULT; - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - } - break; - default: - assert(0); - } -} - -static void -nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, - struct nv40_sreg s2) -{ - struct nv40_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); - - if (slot == 0) { - hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); - } else { - hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); - hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); - hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); - } - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nv40_sreg -tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv40_sreg src; - - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->SrcRegister.Index]; - break; - case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; - return src; -} - -static INLINE struct nv40_sreg -tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv40_sreg dst; - - switch (fdst->DstRegister.File) { - case TGSI_FILE_OUTPUT: - dst = nv40_sr(NV40SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); - - break; - case TGSI_FILE_TEMPORARY: - dst = nv40_sr(NV40SR_TEMP, fdst->DstRegister.Index); - if (vpc->high_temp < dst.index) - vpc->high_temp = dst.index; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nv40_sreg src[3], dst, tmp; - struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - int mask; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - vpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->FullSrcRegisters[i]; - switch (fsrc->SrcRegister.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - /*XXX: index comparison is broken now that consts come from - * two different register files. - */ - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - hw = NV40_VP_INST_DEST_POS; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { - hw = NV40_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.SemanticIndex == 1) { - hw = NV40_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.SemanticIndex == 0) { - hw = NV40_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.SemanticIndex == 1) { - hw = NV40_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV40_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV40_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->output_map[fdec->u.DeclarationRange.First] = hw; - return TRUE; -} - -static boolean -nv40_vertprog_prepare(struct nv40_vpc *vpc) -{ - struct tgsi_parse_context p; - int nr_imm = 0; - - tgsi_parse_init(&p, vpc->vp->pipe->tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); - assert(vpc->imm); - } - - return TRUE; -} - -void -nv40_vertprog_translate(struct nv40_context *nv40, - struct nv40_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv40_vpc *vpc = NULL; - - vpc = CALLOC(1, sizeof(struct nv40_vpc)); - if (!vpc) - return; - vpc->vp = vp; - vpc->high_temp = -1; - - if (!nv40_vertprog_prepare(vpc)) { - free(vpc); - return; - } - - tgsi_parse_init(&parse, vp->pipe->tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv40_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); -// assert(imm->Immediate.Size == 4); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u.ImmediateFloat32[0].Float, - imm->u.ImmediateFloat32[1].Float, - imm->u.ImmediateFloat32[2].Float, - imm->u.ImmediateFloat32[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv40_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - free(vpc); -} - -void -nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) -{ - struct nouveau_winsys *nvws = nv40->nvws; - struct pipe_winsys *ws = nv40->pipe.winsys; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) { - nv40_vertprog_translate(nv40, vp); - if (!vp->translated) - assert(0); - } - - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nv40->hw->vp_exec_heap; - uint vplen = vp->nr_insns; - - if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nv40_vertex_program *evict; - - evict = heap->next->priv; - nvws->res_free(&evict->exec); - } - - if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv40->hw->vp_data_heap; - - if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nv40_vertex_program *evict; - - evict = heap->next->priv; - nvws->res_free(&evict->data); - } - - if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv40_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv40_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV40_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (nv40->vertprog.constant_buf) { - map = ws->buffer_map(ws, nv40->vertprog.constant_buf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nv40_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); - } - - if (map) { - ws->buffer_unmap(ws, nv40->vertprog.constant_buf); - } - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); - } -#endif - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); - } - } - - BEGIN_RING(curie, NV40TCL_VP_START_FROM_ID, 1); - OUT_RING (vp->exec->start); - BEGIN_RING(curie, NV40TCL_VP_ATTRIB_EN, 2); - OUT_RING (vp->ir); - OUT_RING (vp->or); - - nv40->vertprog.active = vp; -} - -void -nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) -{ - if (vp->nr_consts) - free(vp->consts); - if (vp->nr_insns) - free(vp->insns); -} - diff --git a/src/mesa/pipe/nv50/Makefile b/src/mesa/pipe/nv50/Makefile deleted file mode 100644 index d3d011b14b7..00000000000 --- a/src/mesa/pipe/nv50/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv50 - -DRIVER_SOURCES = \ - nv50_clear.c \ - nv50_context.c \ - nv50_draw.c \ - nv50_miptree.c \ - nv50_query.c \ - nv50_state.c \ - nv50_surface.c \ - nv50_vbo.c - -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -include ../Makefile.template - -symlinks: - diff --git a/src/mesa/pipe/nv50/nv50_clear.c b/src/mesa/pipe/nv50/nv50_clear.c deleted file mode 100644 index 552b92f72e2..00000000000 --- a/src/mesa/pipe/nv50/nv50_clear.c +++ /dev/null @@ -1,12 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "nv50_context.h" - -void -nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue) -{ - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); -} diff --git a/src/mesa/pipe/nv50/nv50_context.c b/src/mesa/pipe/nv50/nv50_context.c deleted file mode 100644 index 33c8eebb0bc..00000000000 --- a/src/mesa/pipe/nv50/nv50_context.c +++ /dev/null @@ -1,202 +0,0 @@ -#include "pipe/draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_winsys.h" -#include "pipe/p_util.h" - -#include "nv50_context.h" - -static boolean -nv50_is_format_supported(struct pipe_context *pipe, enum pipe_format format, - uint type) -{ - return FALSE; -} - -static const char * -nv50_get_name(struct pipe_context *pipe) -{ - struct nv50_context *nv50 = (struct nv50_context *)pipe; - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv50->chipset); - return buffer; -} - -static const char * -nv50_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv50_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 32; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 8; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 0; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv50_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static void -nv50_flush(struct pipe_context *pipe, unsigned flags) -{ - struct nv50_context *nv50 = (struct nv50_context *)pipe; - struct nouveau_winsys *nvws = nv50->nvws; - - if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv50->sync, 0); - BEGIN_RING(tesla, 0x104, 1); - OUT_RING (0); - BEGIN_RING(tesla, 0x100, 1); - OUT_RING (0); - } - - FIRE_RING(); - - if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv50->sync, 0, 0, 2000); -} - -static void -nv50_destroy(struct pipe_context *pipe) -{ - struct nv50_context *nv50 = (struct nv50_context *)pipe; - - draw_destroy(nv50->draw); - free(nv50); -} - -static boolean -nv50_init_hwctx(struct nv50_context *nv50, int tesla_class) -{ - struct nouveau_winsys *nvws = nv50->nvws; - int ret; - - if ((ret = nvws->grobj_alloc(nvws, tesla_class, &nv50->tesla))) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - BEGIN_RING(tesla, NV50TCL_DMA_NOTIFY, 1); - OUT_RING (nv50->sync->handle); - - FIRE_RING (); - return TRUE; -} - -#define GRCLASS5097_CHIPSETS 0x00000000 -#define GRCLASS8297_CHIPSETS 0x00000010 -struct pipe_context * -nv50_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, - unsigned chipset) -{ - struct nv50_context *nv50; - int tesla_class, ret; - - if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) { - NOUVEAU_ERR("Not a G8x chipset\n"); - return NULL; - } - - if (GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) { - tesla_class = 0x5097; - } else - if (GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) { - tesla_class = 0x8297; - } else { - NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); - return NULL; - } - - nv50 = CALLOC_STRUCT(nv50_context); - if (!nv50) - return NULL; - nv50->chipset = chipset; - nv50->nvws = nvws; - - if ((ret = nvws->notifier_alloc(nvws, 1, &nv50->sync))) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - free(nv50); - return NULL; - } - - if (!nv50_init_hwctx(nv50, tesla_class)) { - free(nv50); - return NULL; - } - - nv50->pipe.winsys = pipe_winsys; - - nv50->pipe.destroy = nv50_destroy; - nv50->pipe.is_format_supported = nv50_is_format_supported; - nv50->pipe.get_name = nv50_get_name; - nv50->pipe.get_vendor = nv50_get_vendor; - nv50->pipe.get_param = nv50_get_param; - nv50->pipe.get_paramf = nv50_get_paramf; - - nv50->pipe.draw_arrays = nv50_draw_arrays; - nv50->pipe.draw_elements = nv50_draw_elements; - nv50->pipe.clear = nv50_clear; - - nv50->pipe.flush = nv50_flush; - - nv50_init_miptree_functions(nv50); - nv50_init_surface_functions(nv50); - nv50_init_state_functions(nv50); - nv50_init_query_functions(nv50); - - nv50->draw = draw_create(); - assert(nv50->draw); - draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); - - return &nv50->pipe; -} - - diff --git a/src/mesa/pipe/nv50/nv50_context.h b/src/mesa/pipe/nv50/nv50_context.h deleted file mode 100644 index 5491c0cbb5d..00000000000 --- a/src/mesa/pipe/nv50/nv50_context.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef __NV50_CONTEXT_H__ -#define __NV50_CONTEXT_H__ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "pipe/draw/draw_vertex.h" - -#include "pipe/nouveau/nouveau_winsys.h" -#include "pipe/nouveau/nouveau_gldefs.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv50_context *ctx = nv50 -#include "pipe/nouveau/nouveau_push.h" - -#include "nv50_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -struct nv50_context { - struct pipe_context pipe; - struct nouveau_winsys *nvws; - - struct draw_context *draw; - - int chipset; - struct nouveau_grobj *tesla; - struct nouveau_notifier *sync; -}; - - -extern void nv50_init_miptree_functions(struct nv50_context *nv50); -extern void nv50_init_surface_functions(struct nv50_context *nv50); -extern void nv50_init_state_functions(struct nv50_context *nv50); -extern void nv50_init_query_functions(struct nv50_context *nv50); - -/* nv50_draw.c */ -extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); - -/* nv50_vbo.c */ -extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern boolean nv50_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); - -/* nv50_clear.c */ -extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue); - -#endif diff --git a/src/mesa/pipe/nv50/nv50_draw.c b/src/mesa/pipe/nv50/nv50_draw.c deleted file mode 100644 index 85d347f5e00..00000000000 --- a/src/mesa/pipe/nv50/nv50_draw.c +++ /dev/null @@ -1,55 +0,0 @@ -#include "pipe/draw/draw_private.h" -#include "pipe/p_util.h" - -#include "nv50_context.h" - -struct nv50_draw_stage { - struct draw_stage draw; - struct nv50_context *nv50; -}; - -static void -nv50_draw_point(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv50_draw_line(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv50_draw_tri(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv50_draw_flush(struct draw_stage *draw, unsigned flags) -{ -} - -static void -nv50_draw_reset_stipple_counter(struct draw_stage *draw) -{ - NOUVEAU_ERR("\n"); -} - -struct draw_stage * -nv50_draw_render_stage(struct nv50_context *nv50) -{ - struct nv50_draw_stage *nv50draw = CALLOC_STRUCT(nv50_draw_stage); - - nv50draw->nv50 = nv50; - nv50draw->draw.draw = nv50->draw; - nv50draw->draw.point = nv50_draw_point; - nv50draw->draw.line = nv50_draw_line; - nv50draw->draw.tri = nv50_draw_tri; - nv50draw->draw.flush = nv50_draw_flush; - nv50draw->draw.reset_stipple_counter = nv50_draw_reset_stipple_counter; - - return &nv50draw->draw; -} - diff --git a/src/mesa/pipe/nv50/nv50_miptree.c b/src/mesa/pipe/nv50/nv50_miptree.c deleted file mode 100644 index 0c034ed4387..00000000000 --- a/src/mesa/pipe/nv50/nv50_miptree.c +++ /dev/null @@ -1,25 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" - -#include "nv50_context.h" - -static struct pipe_texture * -nv50_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) -{ - NOUVEAU_ERR("unimplemented\n"); - return NULL; -} - -static void -nv50_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - NOUVEAU_ERR("unimplemented\n"); -} - -void -nv50_init_miptree_functions(struct nv50_context *nv50) -{ - nv50->pipe.texture_create = nv50_miptree_create; - nv50->pipe.texture_release = nv50_miptree_release; -} diff --git a/src/mesa/pipe/nv50/nv50_query.c b/src/mesa/pipe/nv50/nv50_query.c deleted file mode 100644 index d8c3491c2c5..00000000000 --- a/src/mesa/pipe/nv50/nv50_query.c +++ /dev/null @@ -1,47 +0,0 @@ -#include "pipe/p_context.h" - -#include "nv50_context.h" - -static struct pipe_query * -nv50_query_create(struct pipe_context *pipe, unsigned type) -{ - NOUVEAU_ERR("unimplemented\n"); - return NULL; -} - -static void -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *q) -{ - NOUVEAU_ERR("unimplemented\n"); -} - -static void -nv50_query_begin(struct pipe_context *pipe, struct pipe_query *q) -{ - NOUVEAU_ERR("unimplemented\n"); -} - -static void -nv50_query_end(struct pipe_context *pipe, struct pipe_query *q) -{ - NOUVEAU_ERR("unimplemented\n"); -} - -static boolean -nv50_query_result(struct pipe_context *pipe, struct pipe_query *q, - boolean wait, uint64 *result) -{ - NOUVEAU_ERR("unimplemented\n"); - *result = 0xdeadcafe; - return TRUE; -} - -void -nv50_init_query_functions(struct nv50_context *nv50) -{ - nv50->pipe.create_query = nv50_query_create; - nv50->pipe.destroy_query = nv50_query_destroy; - nv50->pipe.begin_query = nv50_query_begin; - nv50->pipe.end_query = nv50_query_end; - nv50->pipe.get_query_result = nv50_query_result; -} diff --git a/src/mesa/pipe/nv50/nv50_state.c b/src/mesa/pipe/nv50/nv50_state.c deleted file mode 100644 index 99dcab51b26..00000000000 --- a/src/mesa/pipe/nv50/nv50_state.c +++ /dev/null @@ -1,213 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" - -#include "nv50_context.h" -#include "nv50_state.h" - -static void * -nv50_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - return NULL; -} - -static void -nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ -} - -static void -nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ -} - -static void * -nv50_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - return NULL; -} - -static void -nv50_sampler_state_bind(struct pipe_context *pipe, unsigned unit, - void *hwcso) -{ -} - -static void -nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ -} - -static void -nv50_set_sampler_texture(struct pipe_context *pipe, unsigned unit, - struct pipe_texture *pt) -{ -} - -static void * -nv50_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - return NULL; -} - -static void -nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ -} - -static void -nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ -} - -static void * -nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - return NULL; -} - -static void -nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ -} - -static void -nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ -} - -static void * -nv50_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - return NULL; -} - -static void -nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ -} - -static void -nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ -} - -static void * -nv50_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - return NULL; -} - -static void -nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ -} - -static void -nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ -} - -static void -nv50_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ -} - -static void -nv50_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ -} - -static void -nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) -{ -} - -static void -nv50_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ -} - -static void -nv50_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ -} - -static void -nv50_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ -} - -static void -nv50_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ -} - -static void -nv50_set_vertex_buffer(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_buffer *vb) -{ -} - -static void -nv50_set_vertex_element(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_element *ve) -{ -} - -void -nv50_init_state_functions(struct nv50_context *nv50) -{ - nv50->pipe.create_blend_state = nv50_blend_state_create; - nv50->pipe.bind_blend_state = nv50_blend_state_bind; - nv50->pipe.delete_blend_state = nv50_blend_state_delete; - - nv50->pipe.create_sampler_state = nv50_sampler_state_create; - nv50->pipe.bind_sampler_state = nv50_sampler_state_bind; - nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; - nv50->pipe.set_sampler_texture = nv50_set_sampler_texture; - - nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; - nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; - nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete; - - nv50->pipe.create_depth_stencil_alpha_state = - nv50_depth_stencil_alpha_state_create; - nv50->pipe.bind_depth_stencil_alpha_state = - nv50_depth_stencil_alpha_state_bind; - nv50->pipe.delete_depth_stencil_alpha_state = - nv50_depth_stencil_alpha_state_delete; - - nv50->pipe.create_vs_state = nv50_vp_state_create; - nv50->pipe.bind_vs_state = nv50_vp_state_bind; - nv50->pipe.delete_vs_state = nv50_vp_state_delete; - - nv50->pipe.create_fs_state = nv50_fp_state_create; - nv50->pipe.bind_fs_state = nv50_fp_state_bind; - nv50->pipe.delete_fs_state = nv50_fp_state_delete; - - nv50->pipe.set_blend_color = nv50_set_blend_color; - nv50->pipe.set_clip_state = nv50_set_clip_state; - nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; - nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state; - nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple; - nv50->pipe.set_scissor_state = nv50_set_scissor_state; - nv50->pipe.set_viewport_state = nv50_set_viewport_state; - - nv50->pipe.set_vertex_buffer = nv50_set_vertex_buffer; - nv50->pipe.set_vertex_element = nv50_set_vertex_element; -} - diff --git a/src/mesa/pipe/nv50/nv50_state.h b/src/mesa/pipe/nv50/nv50_state.h deleted file mode 100644 index a3b781d4c61..00000000000 --- a/src/mesa/pipe/nv50/nv50_state.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __NV50_STATE_H__ -#define __NV50_STATE_H__ - -#include "pipe/p_state.h" - - -#endif diff --git a/src/mesa/pipe/nv50/nv50_surface.c b/src/mesa/pipe/nv50/nv50_surface.c deleted file mode 100644 index cfb370da77e..00000000000 --- a/src/mesa/pipe/nv50/nv50_surface.c +++ /dev/null @@ -1,74 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "nv50_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_winsys.h" -#include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" - -static struct pipe_surface * -nv50_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - NOUVEAU_ERR("unimplemented\n"); - return NULL; -} - -static void -nv50_surface_copy(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, struct pipe_surface *src, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - struct nv50_context *nv50 = (struct nv50_context *)pipe; - struct nouveau_winsys *nvws = nv50->nvws; - - nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, - width, height); -} - -static void -nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nv50_context *nv50 = (struct nv50_context *)pipe; - struct nouveau_winsys *nvws = nv50->nvws; - - nvws->surface_fill(nvws, dest, destx, desty, width, height, value); -} - -void -nv50_init_surface_functions(struct nv50_context *nv50) -{ - nv50->pipe.get_tex_surface = nv50_get_tex_surface; - nv50->pipe.surface_copy = nv50_surface_copy; - nv50->pipe.surface_fill = nv50_surface_fill; -} diff --git a/src/mesa/pipe/nv50/nv50_vbo.c b/src/mesa/pipe/nv50/nv50_vbo.c deleted file mode 100644 index 6c0dc23a439..00000000000 --- a/src/mesa/pipe/nv50/nv50_vbo.c +++ /dev/null @@ -1,24 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" - -#include "nv50_context.h" -#include "nv50_state.h" - -boolean -nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, - unsigned count) -{ - NOUVEAU_ERR("unimplemented\n"); - return TRUE; -} - -boolean -nv50_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - NOUVEAU_ERR("unimplemented\n"); - return TRUE; -} - -- cgit v1.2.3 From 08c9534107fcaf06f9b801551524ed5dc724db13 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Sat, 16 Feb 2008 10:05:01 -0700 Subject: gallium: implement min vs. mag filter determination for non-mipmapped textures Fixes tests/minmag.c --- src/gallium/drivers/softpipe/sp_tex_sample.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 2f82fd6abea..c0128f81d71 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -476,6 +476,19 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, /* no mipmap selection needed */ *imgFilter = sampler->state->mag_img_filter; *level0 = *level1 = (int) sampler->state->min_lod; + + if (sampler->state->min_img_filter != sampler->state->mag_img_filter) { + /* non-mipmapped texture, but still need to determine if doing + * minification or magnification. + */ + float lambda = compute_lambda(sampler, s, t, p, lodbias); + if (lambda < 0.5) { /* XXX this may need tweaking... */ + *imgFilter = sampler->state->mag_img_filter; + } + else { + *imgFilter = sampler->state->min_img_filter; + } + } } else { float lambda; -- cgit v1.2.3 From 3b2a291888d8e62787de03f8529806fb562bd186 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Sat, 16 Feb 2008 13:50:31 -0700 Subject: gallium: tweak texture filter min/mag thresholds --- src/gallium/drivers/softpipe/sp_tex_sample.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index c0128f81d71..a15bd43166d 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -474,7 +474,6 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, { if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* no mipmap selection needed */ - *imgFilter = sampler->state->mag_img_filter; *level0 = *level1 = (int) sampler->state->min_lod; if (sampler->state->min_img_filter != sampler->state->mag_img_filter) { @@ -482,13 +481,16 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, * minification or magnification. */ float lambda = compute_lambda(sampler, s, t, p, lodbias); - if (lambda < 0.5) { /* XXX this may need tweaking... */ + if (lambda <= 0.0) { *imgFilter = sampler->state->mag_img_filter; } else { *imgFilter = sampler->state->min_img_filter; } } + else { + *imgFilter = sampler->state->mag_img_filter; + } } else { float lambda; @@ -500,7 +502,7 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, /* vertex shader */ lambda = lodbias; /* not really a bias, but absolute LOD */ - if (lambda < 0.0) { /* XXX threshold depends on the filter */ + if (lambda <= 0.0) { /* XXX threshold depends on the filter */ /* magnifying */ *imgFilter = sampler->state->mag_img_filter; *level0 = *level1 = 0; -- cgit v1.2.3 From 0c6bbd41bd6dc1041eaca7c907d3768d107c1afa Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Sat, 16 Feb 2008 13:55:47 -0700 Subject: gallium: add missing mip level clamp --- src/gallium/drivers/softpipe/sp_tex_sample.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index a15bd43166d..c54e9d385c2 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -474,7 +474,8 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, { if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* no mipmap selection needed */ - *level0 = *level1 = (int) sampler->state->min_lod; + *level0 = *level1 = CLAMP((int) sampler->state->min_lod, + 0, (int) sampler->texture->last_level); if (sampler->state->min_img_filter != sampler->state->mag_img_filter) { /* non-mipmapped texture, but still need to determine if doing -- cgit v1.2.3 From 5e091b573aa0a0c45f8ff34429f2a9d4198bb80a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 16 Feb 2008 21:27:53 +1100 Subject: nv40: ensure scissor gets disabled where necessary Fixes progs/demos/lodbias. Makes a complete mess of things, but now there's a motivation to finish this off :) --- src/gallium/drivers/nv40/nv40_context.h | 19 ++++++++- src/gallium/drivers/nv40/nv40_state.c | 23 +++++------ src/gallium/drivers/nv40/nv40_state_emit.c | 65 +++++++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index f511759e3be..c4523112dbc 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -54,6 +54,11 @@ struct nv40_channel_context { struct nouveau_resource *vp_data_heap; }; +struct nv40_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + struct nv40_context { struct pipe_context pipe; struct nouveau_winsys *nvws; @@ -63,7 +68,8 @@ struct nv40_context { int chipset; - uint32_t dirty; + unsigned dirty; + unsigned hw_dirty; struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; @@ -71,14 +77,23 @@ struct nv40_context { unsigned fp_samplers; unsigned vp_samplers; + struct { + struct pipe_scissor_state scissor; + } pipe_state; + + struct { + unsigned scissor_enabled; + struct nouveau_stateobj *scissor; + } state; + struct nouveau_stateobj *so_framebuffer; struct nouveau_stateobj *so_fragtex[16]; struct nouveau_stateobj *so_vtxbuf; struct nouveau_stateobj *so_blend; + struct nv40_rasterizer_state *rasterizer; struct nouveau_stateobj *so_rast; struct nouveau_stateobj *so_zsa; struct nouveau_stateobj *so_bcol; - struct nouveau_stateobj *so_scissor; struct nouveau_stateobj *so_viewport; struct nouveau_stateobj *so_stipple; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 713f31dbb12..bb9b6b139f0 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -278,12 +278,12 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_rasterizer_state *rsso = MALLOC(sizeof(*rsso)); struct nouveau_stateobj *so = so_new(32, 0); /*XXX: ignored: * light_twoside * offset_cw/ccw -nohw - * scissor * point_smooth -nohw * multisample * offset_units / offset_scale @@ -362,24 +362,29 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, so_data(so, 0); } - return (void *)so; + rsso->so = so; + rsso->pipe = *cso; + return (void *)rsso; } static void nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_rasterizer_state *rsso = hwcso; - so_ref(hwcso, &nv40->so_rast); + so_ref(rsso->so, &nv40->so_rast); + nv40->rasterizer = rsso; nv40->dirty |= NV40_NEW_RAST; } static void nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nouveau_stateobj *so = hwcso; + struct nv40_rasterizer_state *rsso = hwcso; - so_ref(NULL, &so); + so_ref(NULL, &rsso->so); + free(rsso); } static void * @@ -723,14 +728,8 @@ nv40_set_scissor_state(struct pipe_context *pipe, const struct pipe_scissor_state *s) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(3, 0); - so_method(so, nv40->hw->curie, NV40TCL_SCISSOR_HORIZ, 2); - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - - so_ref(so, &nv40->so_scissor); - so_ref(NULL, &so); + nv40->pipe_state.scissor = *s; nv40->dirty |= NV40_NEW_SCISSOR; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index a10c9955480..74306fe22b3 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -22,9 +22,68 @@ nv40_state_emit_dummy_relocs(struct nv40_context *nv40) so_emit_reloc_markers(nv40->nvws, nv40->fragprog.active->so); } +static boolean +nv40_state_scissor_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_scissor_state *s = &nv40->pipe_state.scissor; + struct nouveau_stateobj *so; + + if (nv40->state.scissor && + (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) + return FALSE; + + so = so_new(3, 0); + so_method(so, nv40->hw->curie, NV40TCL_SCISSOR_HORIZ, 2); + if (rast->scissor) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv40->state.scissor); + so_ref(NULL, &so); + return TRUE; +} + +struct nv40_state_atom { + boolean (*validate)(struct nv40_context *nv40); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +static struct nv40_state_atom states[] = { + { + .validate = nv40_state_scissor_validate, + .dirty = { + .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, + .hw = NV40_NEW_SCISSOR, + } + } +}; + +static void +nv40_state_validate(struct nv40_context *nv40) +{ + unsigned i; + + for (i = 0; i < sizeof(states) / sizeof(states[0]); i++) { + if (nv40->dirty & states[i].dirty.pipe) { + if (states[i].validate(nv40)) + nv40->hw_dirty |= states[i].dirty.hw; + } + } +} + void nv40_emit_hw_state(struct nv40_context *nv40) { + nv40_state_validate(nv40); + if (nv40->dirty & NV40_NEW_FB) so_emit(nv40->nvws, nv40->so_framebuffer); @@ -40,8 +99,10 @@ nv40_emit_hw_state(struct nv40_context *nv40) if (nv40->dirty & NV40_NEW_BCOL) so_emit(nv40->nvws, nv40->so_bcol); - if (nv40->dirty & NV40_NEW_SCISSOR) - so_emit(nv40->nvws, nv40->so_scissor); + if (nv40->hw_dirty & NV40_NEW_SCISSOR) { + so_emit(nv40->nvws, nv40->state.scissor); + nv40->hw_dirty &= ~NV40_NEW_SCISSOR; + } if (nv40->dirty & NV40_NEW_VIEWPORT) so_emit(nv40->nvws, nv40->so_viewport); -- cgit v1.2.3 From 12e0aa7b1d587b7c30897762d2f8f368a4a7d453 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 18 Feb 2008 14:12:58 +1100 Subject: nv40: similar changes to polygon stipple as were done for scissor --- src/gallium/drivers/nv40/nv40_context.h | 18 ++++++++--- src/gallium/drivers/nv40/nv40_state.c | 9 +----- src/gallium/drivers/nv40/nv40_state_emit.c | 52 ++++++++++++++++++++++++++---- 3 files changed, 61 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index c4523112dbc..7d5806f5f73 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -59,6 +59,18 @@ struct nv40_rasterizer_state { struct nouveau_stateobj *so; }; +struct nv40_state { + struct { + unsigned enabled; + struct nouveau_stateobj *so; + } scissor; + + struct { + unsigned enabled; + struct nouveau_stateobj *so; + } stipple; +}; + struct nv40_context { struct pipe_context pipe; struct nouveau_winsys *nvws; @@ -79,12 +91,10 @@ struct nv40_context { struct { struct pipe_scissor_state scissor; + unsigned stipple[32]; } pipe_state; - struct { - unsigned scissor_enabled; - struct nouveau_stateobj *scissor; - } state; + struct nv40_state state; struct nouveau_stateobj *so_framebuffer; struct nouveau_stateobj *so_fragtex[16]; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index bb9b6b139f0..a36efd37f6a 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -711,15 +711,8 @@ nv40_set_polygon_stipple(struct pipe_context *pipe, const struct pipe_poly_stipple *stipple) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(33, 0); - unsigned i; - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, stipple->stipple[i]); - - so_ref(so, &nv40->so_stipple); - so_ref(NULL, &so); + memcpy(nv40->pipe_state.stipple, stipple->stipple, 4 * 32); nv40->dirty |= NV40_NEW_STIPPLE; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 74306fe22b3..244c6838f34 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -29,8 +29,8 @@ nv40_state_scissor_validate(struct nv40_context *nv40) struct pipe_scissor_state *s = &nv40->pipe_state.scissor; struct nouveau_stateobj *so; - if (nv40->state.scissor && - (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) + if (nv40->state.scissor.so && + (rast->scissor == 0 && nv40->state.scissor.enabled == 0)) return FALSE; so = so_new(3, 0); @@ -43,7 +43,38 @@ nv40_state_scissor_validate(struct nv40_context *nv40) so_data (so, 4096 << 16); } - so_ref(so, &nv40->state.scissor); + so_ref(so, &nv40->state.scissor.so); + so_ref(NULL, &so); + return TRUE; +} + +static boolean +nv40_state_stipple_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct nouveau_grobj *curie = nv40->hw->curie; + struct nouveau_stateobj *so; + + if (nv40->state.stipple.so && (rast->poly_stipple_enable == 0 && + nv40->state.stipple.enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv40->pipe_state.stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv40->state.stipple.so); so_ref(NULL, &so); return TRUE; } @@ -63,6 +94,13 @@ static struct nv40_state_atom states[] = { .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, .hw = NV40_NEW_SCISSOR, } + }, + { + .validate = nv40_state_stipple_validate, + .dirty = { + .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, + .hw = NV40_NEW_STIPPLE, + } } }; @@ -100,15 +138,17 @@ nv40_emit_hw_state(struct nv40_context *nv40) so_emit(nv40->nvws, nv40->so_bcol); if (nv40->hw_dirty & NV40_NEW_SCISSOR) { - so_emit(nv40->nvws, nv40->state.scissor); + so_emit(nv40->nvws, nv40->state.scissor.so); nv40->hw_dirty &= ~NV40_NEW_SCISSOR; } if (nv40->dirty & NV40_NEW_VIEWPORT) so_emit(nv40->nvws, nv40->so_viewport); - if (nv40->dirty & NV40_NEW_STIPPLE) - so_emit(nv40->nvws, nv40->so_stipple); + if (nv40->hw_dirty & NV40_NEW_STIPPLE) { + so_emit(nv40->nvws, nv40->state.stipple.so); + nv40->hw_dirty &= ~NV40_NEW_STIPPLE; + } if (nv40->dirty & NV40_NEW_FRAGPROG) { nv40_fragprog_bind(nv40, nv40->fragprog.current); -- cgit v1.2.3 From 037570fa5a6cf83d3aaaa6cdacc10eb0b5da45bf Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 18 Feb 2008 15:17:39 +1100 Subject: nv40: obey polygon offset state Almost sure this isn't entirely correct. However, I'm not sure what gallium expects yet, and this fixes some bugs, so it'll do for now. --- src/gallium/drivers/nv40/nv40_state.c | 39 +++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index a36efd37f6a..fec8f946c35 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -280,31 +280,30 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = MALLOC(sizeof(*rsso)); struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->hw->curie; /*XXX: ignored: * light_twoside - * offset_cw/ccw -nohw * point_smooth -nohw * multisample - * offset_units / offset_scale */ - so_method(so, nv40->hw->curie, NV40TCL_SHADE_MODEL, 1); + so_method(so, curie, NV40TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : NV40TCL_SHADE_MODEL_SMOOTH); - so_method(so, nv40->hw->curie, NV40TCL_LINE_WIDTH, 2); + so_method(so, curie, NV40TCL_LINE_WIDTH, 2); so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); + so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); so_data (so, cso->line_stipple_enable ? 1 : 0); so_data (so, (cso->line_stipple_pattern << 16) | cso->line_stipple_factor); - so_method(so, nv40->hw->curie, NV40TCL_POINT_SIZE, 1); + so_method(so, curie, NV40TCL_POINT_SIZE, 1); so_data (so, fui(cso->point_size)); - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_MODE_FRONT, 6); + so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); if (cso->front_winding == PIPE_WINDING_CCW) { so_data(so, nvgl_polygon_mode(cso->fill_ccw)); so_data(so, nvgl_polygon_mode(cso->fill_cw)); @@ -345,10 +344,32 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, so_data(so, cso->poly_smooth ? 1 : 0); so_data(so, cso->cull_mode != PIPE_WINDING_NONE ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, cso->poly_stipple_enable ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_POINT_SPRITE, 1); + so_method(so, curie, 0x0a60, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, curie, NV40TCL_POINT_SPRITE, 1); if (cso->point_sprite) { unsigned psctl = (1 << 0), i; -- cgit v1.2.3 From f911235f64d610e57da88487133d0483c7a094e7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 18 Feb 2008 15:31:55 +1100 Subject: nouveau: header update --- src/gallium/drivers/nouveau/nouveau_class.h | 4 ++-- src/gallium/drivers/nv40/nv40_state.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 5998945677b..c80461038b7 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -4926,9 +4926,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_VIEWPORT_SCALE_Y 0x00000a34 #define NV40TCL_VIEWPORT_SCALE_Z 0x00000a38 #define NV40TCL_VIEWPORT_SCALE_W 0x00000a3c -#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a60 +#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60 #define NV40TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 -#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a68 +#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68 #define NV40TCL_DEPTH_FUNC 0x00000a6c #define NV40TCL_DEPTH_FUNC_NEVER 0x00000200 #define NV40TCL_DEPTH_FUNC_LESS 0x00000201 diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index fec8f946c35..d7379e90905 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -347,7 +347,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, cso->poly_stipple_enable ? 1 : 0); - so_method(so, curie, 0x0a60, 3); + so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) so_data(so, 1); -- cgit v1.2.3 From bfd5916eafb9a97ad10f1d4a8738e7dcb02e04f4 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 18 Feb 2008 14:25:04 +0900 Subject: Code reorganization: split gallium and mesa makefiles. In other words, don't build src/gallium source code from within src/mesa/Makefile. Also, allow to customize which gallium auxiliary dirs, driver driver, winsys dirs get built from the config/* files. --- configs/default | 12 +++- configs/linux-cell | 3 + configs/linux-dri | 6 +- configs/linux-llvm | 2 + src/gallium/Makefile.template | 1 + src/gallium/auxiliary/Makefile | 6 +- src/gallium/auxiliary/cso_cache/Makefile | 18 +++++ src/gallium/auxiliary/draw/Makefile | 41 ++++++++++- src/gallium/auxiliary/tgsi/Makefile | 24 ++++++- src/gallium/auxiliary/tgsi/exec/Makefile | 3 - src/gallium/auxiliary/util/Makefile | 20 ++++++ src/gallium/drivers/Makefile | 6 +- src/gallium/winsys/Makefile | 20 ++++++ src/gallium/winsys/dri/Makefile.template | 6 +- src/gallium/winsys/xlib/Makefile | 113 +++++++++++++++++++++++++++++++ src/mesa/Makefile | 74 ++------------------ src/mesa/sources | 56 +-------------- 17 files changed, 264 insertions(+), 147 deletions(-) create mode 100644 src/gallium/auxiliary/cso_cache/Makefile delete mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile create mode 100644 src/gallium/auxiliary/util/Makefile create mode 100644 src/gallium/winsys/Makefile create mode 100644 src/gallium/winsys/xlib/Makefile (limited to 'src/gallium/drivers') diff --git a/configs/default b/configs/default index 25a87e66a1b..7f659725cba 100644 --- a/configs/default +++ b/configs/default @@ -60,13 +60,21 @@ GLW_SOURCES = GLwDrawA.c # Directories to build LIB_DIR = lib -SRC_DIRS = gallium mesa glu glut/glx glw +SRC_DIRS = gallium mesa gallium/winsys glu glut/glx glw GLU_DIRS = sgi -DRIVER_DIRS = x11 osmesa +DRIVER_DIRS = # Which subdirs under $(TOP)/progs/ to enter: PROGRAM_DIRS = demos redbook samples glsl xdemos +# Gallium directories and +GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi util +GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) +GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple failover +GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) +GALLIUM_WINSYS_DIRS = xlib + + # Library/program dependencies #EXTRA_LIB_PATH ?= GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread diff --git a/configs/linux-cell b/configs/linux-cell index 09f62fc1ff0..807ede75a7e 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -5,6 +5,9 @@ include $(TOP)/configs/default CONFIG_NAME = linux-cell +GALLIUM_DRIVER_DIRS += cell + + # Compiler and flags CC = ppu32-gcc CXX = ppu32-g++ diff --git a/configs/linux-dri b/configs/linux-dri index e6135856fc1..68afcb1d28d 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -54,15 +54,13 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw +SRC_DIRS := egl $(SRC_DIRS) PROGRAM_DIRS = egl -else -SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw -PROGRAM_DIRS = endif DRIVER_DIRS = dri WINDOW_SYSTEM=dri +GALLIUM_WINSYS_DIRS = dri # gamma are missing because they have not been converted to use the new # interface. diff --git a/configs/linux-llvm b/configs/linux-llvm index 915189f4625..a8889321a02 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -5,6 +5,8 @@ include $(TOP)/configs/linux CONFIG_NAME = linux-llvm +GALLIUM_AUXILIARY_DIRS += llvm + OPT_FLAGS = -g -ansi -pedantic DEFINES += -DDEBUG -DDEBUG_MATH -DMESA_LLVM=1 diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 83b25f9b47c..6ad58c205c2 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -20,6 +20,7 @@ INCLUDES = \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ -I$(TOP)/include \ $(DRIVER_INCLUDES) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index da68498aa1f..eaa0f2fe4e8 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -2,11 +2,7 @@ TOP = ../../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_DIR = llvm -endif - -SUBDIRS = pipebuffer $(LLVM_DIR) +SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) default: subdirs diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile new file mode 100644 index 00000000000..8248b097fde --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -0,0 +1,18 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = cso_cache + +DRIVER_SOURCES = \ + cso_cache.c \ + cso_hash.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index fe9b150f304..c56b63d85b8 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -1,2 +1,39 @@ -default: - cd ../../../mesa ; make +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = draw + +DRIVER_SOURCES = \ + draw_clip.c \ + draw_vs_exec.c \ + draw_vs_sse.c \ + draw_vs_llvm.c \ + draw_context.c\ + draw_cull.c \ + draw_debug.c \ + draw_flatshade.c \ + draw_offset.c \ + draw_prim.c \ + draw_stipple.c \ + draw_twoside.c \ + draw_unfilled.c \ + draw_validate.c \ + draw_vbuf.c \ + draw_vertex.c \ + draw_vertex_cache.c \ + draw_vertex_fetch.c \ + draw_vertex_shader.c \ + draw_vf.c \ + draw_vf_generic.c \ + draw_vf_sse.c \ + draw_wide_prims.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 12a8bd0409e..c10ab396d8d 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -1,3 +1,23 @@ -default: - cd ../.. ; make + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = tgsi + +DRIVER_SOURCES = \ + exec/tgsi_exec.c \ + exec/tgsi_sse2.c \ + util/tgsi_build.c \ + util/tgsi_dump.c \ + util/tgsi_parse.c \ + util/tgsi_util.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile deleted file mode 100644 index eb8b14e0e89..00000000000 --- a/src/gallium/auxiliary/tgsi/exec/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -default: - cd ../../.. ; make - diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile new file mode 100644 index 00000000000..b8cb148c4f5 --- /dev/null +++ b/src/gallium/auxiliary/util/Makefile @@ -0,0 +1,20 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = util + +DRIVER_SOURCES = \ + p_debug.c \ + p_tile.c \ + p_util.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile index c0345a9cb54..8dcfb18a16f 100644 --- a/src/gallium/drivers/Makefile +++ b/src/gallium/drivers/Makefile @@ -2,11 +2,7 @@ TOP = ../../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-cell) -CELL_DIR = cell -endif - -SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) +SUBDIRS = softpipe i915simple i965simple failover default: subdirs diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile new file mode 100644 index 00000000000..3dc5ea57441 --- /dev/null +++ b/src/gallium/winsys/Makefile @@ -0,0 +1,20 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_WINSYS_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template index 2a261ed6694..65a93bd53e3 100644 --- a/src/gallium/winsys/dri/Makefile.template +++ b/src/gallium/winsys/dri/Makefile.template @@ -1,7 +1,9 @@ # -*-makefile-*- -MESA_MODULES = $(TOP)/src/mesa/libmesa.a - +MESA_MODULES = \ + $(TOP)/src/mesa/libmesa.a \ + $(GALLIUM_AUXILIARIES) + COMMON_GALLIUM_SOURCES = \ $(TOP)/src/mesa/drivers/dri/common/utils.c \ $(TOP)/src/mesa/drivers/dri/common/vblank.c \ diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile new file mode 100644 index 00000000000..2664ac47ceb --- /dev/null +++ b/src/gallium/winsys/xlib/Makefile @@ -0,0 +1,113 @@ +# src/mesa/Makefile + +TOP = ../../../.. +include $(TOP)/configs/current + + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary + +X11_DRIVER_SOURCES = \ + glxapi.c \ + fakeglx.c \ + xfonts.c \ + xm_api.c \ + xm_winsys.c \ + xm_winsys_aub.c \ + brw_aub.c + + +GL_MAJOR = 1 +GL_MINOR = 5 +GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) + + +PIPE_LIB = \ + $(GALLIUM_DRIVERS) \ + $(TOP)/src/mesa/libglapi.a \ + $(TOP)/src/mesa/libmesa.a \ + $(GALLIUM_AUXILIARIES) + +ifeq ($(CONFIG_NAME), linux-cell) +CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a +CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a +endif + +ifeq ($(CONFIG_NAME), linux-llvm) +LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a +endif + + +.SUFFIXES : .cpp + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) + + +###################################################################### +# Stand-alone Mesa libGL and libOSMesa +STAND_ALONE_DRIVER_SOURCES = \ + $(X11_DRIVER_SOURCES) + +STAND_ALONE_DRIVER_OBJECTS = $(STAND_ALONE_DRIVER_SOURCES:.c=.o) + +STAND_ALONE_OBJECTS = \ + $(STAND_ALONE_DRIVER_OBJECTS) + +# Make the GL library +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(LLVM_LIB) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) + $(TOP)/bin/mklib -o $(GL_LIB) \ + -linker "$(CC)" \ + -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ + -install $(TOP)/$(LIB_DIR) \ + $(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \ + --start-group $(PIPE_LIB) $(LLVM_LIB) --end-group $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS) + + +###################################################################### +# Generic stuff + +depend: $(ALL_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend # workaround oops on gutsy?!? + @ touch depend + @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ + > /dev/null 2>/dev/null + + +install: default + $(INSTALL) -d $(INSTALL_DIR)/include/GL + $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) + $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL + @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \ + $(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \ + fi + +## NOT INSTALLED YET: +## $(INSTALL) -d $(INSTALL_DIR)/include/GLES +## $(INSTALL) -m 644 include/GLES/*.h $(INSTALL_DIR)/include/GLES + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + +clean: + -rm -f *.o + + +include depend diff --git a/src/mesa/Makefile b/src/mesa/Makefile index c8cb2b592fe..86a9cf0b88c 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -11,19 +11,6 @@ GL_MINOR = 5 GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) -PIPE_LIB = \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/i965simple/libi965simple.a - -ifeq ($(CONFIG_NAME), linux-cell) -CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a -CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a -endif - -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a -endif - .SUFFIXES : .cpp .c.o: @@ -36,33 +23,14 @@ endif $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ -# Figure out what to make here -default: - @if [ "${DRIVER_DIRS}" = "dri" ] ; then \ - $(MAKE) linux-solo ; \ - elif [ "${DRIVER_DIRS}" = "osmesa" ] ; then \ - $(MAKE) osmesa-only ; \ - elif [ "$(DRIVER_DIRS)" = "beos" ]; then \ - $(MAKE) beos ; \ - elif [ "$(DRIVER_DIRS)" = "directfb" ]; then \ - $(MAKE) directfb ; \ - elif [ "$(DRIVER_DIRS)" = "fbdev osmesa" ]; then \ - $(MAKE) fbdev ; $(MAKE) osmesa-only ; \ - else \ - $(MAKE) stand-alone ; \ - fi - +default: depend subdirs libmesa.a -###################################################################### -# BeOS driver target - -beos: depend subdirs libmesa.a - cd drivers/beos; $(MAKE) +ifneq ($(DRIVER_DIRS),dri) +default: libglapi.a +endif ###################################################################### -# Linux DRI drivers - # Make archive of core object files libmesa.a: $(SOLO_OBJECTS) @ $(TOP)/bin/mklib -o mesa -static $(SOLO_OBJECTS); @@ -70,32 +38,8 @@ libmesa.a: $(SOLO_OBJECTS) mimeset -f "$@" ; \ fi -linux-solo: depend subdirs libmesa.a - cd $(TOP)/src/gallium/winsys/dri ; $(MAKE) - - -##################################################################### -# Stand-alone Mesa libGL, no built-in drivers (DirectFB) - -libgl-core: $(CORE_OBJECTS) - @ $(TOP)/bin/mklib -o $(GL_LIB) \ - -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(CORE_OBJECTS) \ - $(GL_LIB_DEPS) - -directfb: depend subdirs libgl-core - cd drivers/directfb ; $(MAKE) - - -##################################################################### -# fbdev Mesa driver (libGL.so) - -fbdev: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) - @ $(TOP)/bin/mklib -o $(GL_LIB) \ - -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ - $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) \ - $(COMMON_DRIVER_OBJECTS) $(GL_LIB_DEPS) +libglapi.a: $(GLAPI_OBJECTS) + @ $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS) ###################################################################### @@ -176,9 +120,6 @@ install: default @if [ -e $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) ]; then \ $(INSTALL) $(TOP)/$(LIB_DIR)/libOSMesa* $(INSTALL_DIR)/$(LIB_DIR); \ fi - @if [ "${DRIVER_DIRS}" = "dri" ] ; then \ - cd $(TOP)/gallium/winsys/dri ; $(MAKE) install ; \ - fi ## NOT INSTALLED YET: ## $(INSTALL) -d $(INSTALL_DIR)/include/GLES @@ -192,9 +133,8 @@ tags: clean: -rm -f */*.o -rm -f */*/*.o - -rm -f depend depend.bak libmesa.a + -rm -f depend depend.bak libmesa.a libglapi.a -rm -f drivers/*/*.o - (cd drivers/dri && $(MAKE) clean) (cd x86 && $(MAKE) clean) (cd x86-64 && $(MAKE) clean) diff --git a/src/mesa/sources b/src/mesa/sources index f83d247a1e2..0d185fd5f3f 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -156,51 +156,6 @@ VF_SOURCES = \ vf/vf_generic.c \ vf/vf_sse.c - -DRAW_SOURCES = \ - $(TOP)/src/gallium/auxiliary/draw/draw_clip.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vs_exec.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vs_sse.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vs_llvm.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_context.c\ - $(TOP)/src/gallium/auxiliary/draw/draw_cull.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_debug.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_flatshade.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_offset.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_prim.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_stipple.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_twoside.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_unfilled.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_validate.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vbuf.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vertex.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vertex_cache.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vertex_fetch.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vertex_shader.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vf.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vf_generic.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vf_sse.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_wide_prims.c - -TGSIEXEC_SOURCES = \ - $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c \ - $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c - -TGSIUTIL_SOURCES = \ - $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_build.c \ - $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_dump.c \ - $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_parse.c \ - $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_util.c - -STATECACHE_SOURCES = \ - $(TOP)/src/gallium/auxiliary/cso_cache/cso_hash.c \ - $(TOP)/src/gallium/auxiliary/cso_cache/cso_cache.c - -PIPEUTIL_SOURCES = \ - $(TOP)/src/gallium/auxiliary/util/p_debug.c \ - $(TOP)/src/gallium/auxiliary/util/p_tile.c \ - $(TOP)/src/gallium/auxiliary/util/p_util.c - STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ state_tracker/st_atom_blend.c \ @@ -229,7 +184,7 @@ STATETRACKER_SOURCES = \ state_tracker/st_cb_readpixels.c \ state_tracker/st_cb_strings.c \ state_tracker/st_cb_texture.c \ - state_tracker/st_cache.c \ + state_tracker/st_cache.c \ state_tracker/st_context.c \ state_tracker/st_debug.c \ state_tracker/st_draw.c \ @@ -333,15 +288,6 @@ SPARC_API = \ __COMMON_DRIVER_SOURCES = \ drivers/common/driverfuncs.c -X11_DRIVER_SOURCES = \ - $(TOP)/src/gallium/winsys/xlib/glxapi.c \ - $(TOP)/src/gallium/winsys/xlib/fakeglx.c \ - $(TOP)/src/gallium/winsys/xlib/xfonts.c \ - $(TOP)/src/gallium/winsys/xlib/xm_api.c \ - $(TOP)/src/gallium/winsys/xlib/xm_winsys.c \ - $(TOP)/src/gallium/winsys/xlib/xm_winsys_aub.c \ - $(TOP)/src/gallium/winsys/xlib/brw_aub.c - OSMESA_DRIVER_SOURCES = \ drivers/osmesa/osmesa.c -- cgit v1.2.3 From 112ba3355a3fa53768efb9a9fb0eeb677bd28d47 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 18 Feb 2008 16:26:33 +1100 Subject: nv40: until gallium is fixed we'll need a fallback for user clip planes --- src/gallium/drivers/nv40/nv40_context.h | 13 +++++++++++ src/gallium/drivers/nv40/nv40_state.c | 4 ++++ src/gallium/drivers/nv40/nv40_state_emit.c | 37 ++++++++++++++++++++++-------- 3 files changed, 45 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 7d5806f5f73..e9ed7ea3fb4 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -34,6 +34,9 @@ #define NV40_NEW_VERTPROG (1 << 9) #define NV40_NEW_FRAGPROG (1 << 10) #define NV40_NEW_ARRAYS (1 << 11) +#define NV40_NEW_UCP (1 << 12) + +#define NV40_FALLBACK_TNL (1 << 0) struct nv40_channel_context { struct nouveau_winsys *nvws; @@ -92,9 +95,11 @@ struct nv40_context { struct { struct pipe_scissor_state scissor; unsigned stipple[32]; + struct pipe_clip_state clip; } pipe_state; struct nv40_state state; + unsigned fallback; struct nouveau_stateobj *so_framebuffer; struct nouveau_stateobj *so_fragtex[16]; @@ -131,6 +136,14 @@ nv40_context(struct pipe_context *pipe) return (struct nv40_context *)pipe; } +struct nv40_state_entry { + boolean (*validate)(struct nv40_context *nv40); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + extern void nv40_init_state_functions(struct nv40_context *nv40); extern void nv40_init_surface_functions(struct nv40_context *nv40); extern void nv40_init_miptree_functions(struct nv40_context *nv40); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index d7379e90905..c203b002403 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -561,6 +561,10 @@ static void nv40_set_clip_state(struct pipe_context *pipe, const struct pipe_clip_state *clip) { + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->pipe_state.clip = *clip; + nv40->dirty |= NV40_NEW_UCP; } static void diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 244c6838f34..ce52a3863e7 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -79,15 +79,15 @@ nv40_state_stipple_validate(struct nv40_context *nv40) return TRUE; } -struct nv40_state_atom { - boolean (*validate)(struct nv40_context *nv40); - struct { - unsigned pipe; - unsigned hw; - } dirty; -}; +static boolean +nv40_state_clip_validate(struct nv40_context *nv40) +{ + if (nv40->pipe_state.clip.nr) + nv40->fallback |= NV40_FALLBACK_TNL; + return FALSE; +} -static struct nv40_state_atom states[] = { +static struct nv40_state_entry states[] = { { .validate = nv40_state_scissor_validate, .dirty = { @@ -101,13 +101,23 @@ static struct nv40_state_atom states[] = { .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, .hw = NV40_NEW_STIPPLE, } + }, + { + .validate = nv40_state_clip_validate, + .dirty = { + .pipe = NV40_NEW_UCP, + .hw = 0, + } } }; static void nv40_state_validate(struct nv40_context *nv40) { - unsigned i; + unsigned i, last_fallback; + + last_fallback = nv40->fallback; + nv40->fallback = 0; for (i = 0; i < sizeof(states) / sizeof(states[0]); i++) { if (nv40->dirty & states[i].dirty.pipe) { @@ -115,6 +125,15 @@ nv40_state_validate(struct nv40_context *nv40) nv40->hw_dirty |= states[i].dirty.hw; } } + + if (nv40->fallback & NV40_FALLBACK_TNL && + !(last_fallback & NV40_FALLBACK_TNL)) { + NOUVEAU_ERR("XXX: hwtnl->swtnl\n"); + } else + if (last_fallback & NV40_FALLBACK_TNL && + !(nv40->fallback & NV40_FALLBACK_TNL)) { + NOUVEAU_ERR("XXX: swtnl->hwtnl\n"); + } } void -- cgit v1.2.3 From 56045da083d6530a56a2a7585e3121df0b07bac4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 18 Feb 2008 16:38:27 +1100 Subject: nv40: move some things around --- src/gallium/drivers/nv40/Makefile | 3 + src/gallium/drivers/nv40/nv40_context.h | 3 + src/gallium/drivers/nv40/nv40_state_clip.c | 18 +++++ src/gallium/drivers/nv40/nv40_state_emit.c | 99 ++++----------------------- src/gallium/drivers/nv40/nv40_state_scissor.c | 35 ++++++++++ src/gallium/drivers/nv40/nv40_state_stipple.c | 40 +++++++++++ 6 files changed, 114 insertions(+), 84 deletions(-) create mode 100644 src/gallium/drivers/nv40/nv40_state_clip.c create mode 100644 src/gallium/drivers/nv40/nv40_state_scissor.c create mode 100644 src/gallium/drivers/nv40/nv40_state_stipple.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 2a9de4a2dcb..12b8eef2590 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -13,6 +13,9 @@ DRIVER_SOURCES = \ nv40_query.c \ nv40_state.c \ nv40_state_emit.c \ + nv40_state_clip.c \ + nv40_state_scissor.c \ + nv40_state_stipple.c \ nv40_surface.c \ nv40_vbo.c \ nv40_vertprog.c diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index e9ed7ea3fb4..432204b8252 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -174,6 +174,9 @@ extern void nv40_fragtex_bind(struct nv40_context *); /* nv40_state.c and friends */ extern void nv40_emit_hw_state(struct nv40_context *nv40); extern void nv40_state_tex_update(struct nv40_context *nv40); +extern struct nv40_state_entry nv40_state_clip; +extern struct nv40_state_entry nv40_state_scissor; +extern struct nv40_state_entry nv40_state_stipple; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_state_clip.c b/src/gallium/drivers/nv40/nv40_state_clip.c new file mode 100644 index 00000000000..19f1c3b36de --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_clip.c @@ -0,0 +1,18 @@ +#include "nv40_context.h" + +static boolean +nv40_state_clip_validate(struct nv40_context *nv40) +{ + if (nv40->pipe_state.clip.nr) + nv40->fallback |= NV40_FALLBACK_TNL; + + return FALSE; +} + +struct nv40_state_entry nv40_state_clip = { + .validate = nv40_state_clip_validate, + .dirty = { + .pipe = NV40_NEW_UCP, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index ce52a3863e7..b5d0d68d6b1 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -22,63 +22,6 @@ nv40_state_emit_dummy_relocs(struct nv40_context *nv40) so_emit_reloc_markers(nv40->nvws, nv40->fragprog.active->so); } -static boolean -nv40_state_scissor_validate(struct nv40_context *nv40) -{ - struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; - struct pipe_scissor_state *s = &nv40->pipe_state.scissor; - struct nouveau_stateobj *so; - - if (nv40->state.scissor.so && - (rast->scissor == 0 && nv40->state.scissor.enabled == 0)) - return FALSE; - - so = so_new(3, 0); - so_method(so, nv40->hw->curie, NV40TCL_SCISSOR_HORIZ, 2); - if (rast->scissor) { - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - } else { - so_data (so, 4096 << 16); - so_data (so, 4096 << 16); - } - - so_ref(so, &nv40->state.scissor.so); - so_ref(NULL, &so); - return TRUE; -} - -static boolean -nv40_state_stipple_validate(struct nv40_context *nv40) -{ - struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; - struct nouveau_grobj *curie = nv40->hw->curie; - struct nouveau_stateobj *so; - - if (nv40->state.stipple.so && (rast->poly_stipple_enable == 0 && - nv40->state.stipple.enabled == 0)) - return FALSE; - - if (rast->poly_stipple_enable) { - unsigned i; - - so = so_new(35, 0); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, nv40->pipe_state.stipple[i]); - } else { - so = so_new(2, 0); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &nv40->state.stipple.so); - so_ref(NULL, &so); - return TRUE; -} - static boolean nv40_state_clip_validate(struct nv40_context *nv40) { @@ -87,43 +30,31 @@ nv40_state_clip_validate(struct nv40_context *nv40) return FALSE; } -static struct nv40_state_entry states[] = { - { - .validate = nv40_state_scissor_validate, - .dirty = { - .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, - .hw = NV40_NEW_SCISSOR, - } - }, - { - .validate = nv40_state_stipple_validate, - .dirty = { - .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, - .hw = NV40_NEW_STIPPLE, - } - }, - { - .validate = nv40_state_clip_validate, - .dirty = { - .pipe = NV40_NEW_UCP, - .hw = 0, - } - } +static struct nv40_state_entry *render_states[] = { + &nv40_state_clip, + &nv40_state_scissor, + &nv40_state_stipple, + NULL }; static void nv40_state_validate(struct nv40_context *nv40) { - unsigned i, last_fallback; + struct nv40_state_entry **states = render_states; + unsigned last_fallback; last_fallback = nv40->fallback; nv40->fallback = 0; - for (i = 0; i < sizeof(states) / sizeof(states[0]); i++) { - if (nv40->dirty & states[i].dirty.pipe) { - if (states[i].validate(nv40)) - nv40->hw_dirty |= states[i].dirty.hw; + while (*states) { + struct nv40_state_entry *e = *states; + + if (nv40->dirty & e->dirty.pipe) { + if (e->validate(nv40)) + nv40->hw_dirty |= e->dirty.hw; } + + states++; } if (nv40->fallback & NV40_FALLBACK_TNL && diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c new file mode 100644 index 00000000000..556b820e581 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv40_context.h" + +static boolean +nv40_state_scissor_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_scissor_state *s = &nv40->pipe_state.scissor; + struct nouveau_stateobj *so; + + if (nv40->state.scissor.so && + (rast->scissor == 0 && nv40->state.scissor.enabled == 0)) + return FALSE; + + so = so_new(3, 0); + so_method(so, nv40->hw->curie, NV40TCL_SCISSOR_HORIZ, 2); + if (rast->scissor) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv40->state.scissor.so); + so_ref(NULL, &so); + return TRUE; +} + +struct nv40_state_entry nv40_state_scissor = { + .validate = nv40_state_scissor_validate, + .dirty = { + .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, + .hw = NV40_NEW_SCISSOR + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c new file mode 100644 index 00000000000..52462a0b502 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -0,0 +1,40 @@ +#include "nv40_context.h" + +static boolean +nv40_state_stipple_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct nouveau_grobj *curie = nv40->hw->curie; + struct nouveau_stateobj *so; + + if (nv40->state.stipple.so && (rast->poly_stipple_enable == 0 && + nv40->state.stipple.enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv40->pipe_state.stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv40->state.stipple.so); + so_ref(NULL, &so); + return TRUE; +} + +struct nv40_state_entry nv40_state_stipple = { + .validate = nv40_state_stipple_validate, + .dirty = { + .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, + .hw = NV40_NEW_STIPPLE + } +}; -- cgit v1.2.3 From 39fe5851a57a7218eafce7dab971738bdb780166 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 18 Feb 2008 15:07:17 +0900 Subject: Actually use GALLIUM_DRIVER_DIRS. --- src/gallium/drivers/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile index 8dcfb18a16f..6161cb6ff8f 100644 --- a/src/gallium/drivers/Makefile +++ b/src/gallium/drivers/Makefile @@ -2,7 +2,7 @@ TOP = ../../.. include $(TOP)/configs/current -SUBDIRS = softpipe i915simple i965simple failover +SUBDIRS = $(GALLIUM_DRIVER_DIRS) default: subdirs -- cgit v1.2.3 From 4a9cb97bbf6961cc4106c4c54e59296a74e889e9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 18 Feb 2008 17:07:59 +1100 Subject: nv40: get fragprog onto new state mechanism --- src/gallium/drivers/nv40/nv40_context.h | 17 ++-- src/gallium/drivers/nv40/nv40_fragprog.c | 127 +++++++++++++++++------------ src/gallium/drivers/nv40/nv40_fragtex.c | 2 +- src/gallium/drivers/nv40/nv40_state.c | 5 +- src/gallium/drivers/nv40/nv40_state.h | 1 - src/gallium/drivers/nv40/nv40_state_emit.c | 26 +++--- 6 files changed, 97 insertions(+), 81 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 432204b8252..ce0933fea4a 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -37,6 +37,7 @@ #define NV40_NEW_UCP (1 << 12) #define NV40_FALLBACK_TNL (1 << 0) +#define NV40_FALLBACK_RAST (1 << 1) struct nv40_channel_context { struct nouveau_winsys *nvws; @@ -72,6 +73,8 @@ struct nv40_state { unsigned enabled; struct nouveau_stateobj *so; } stipple; + + struct nouveau_stateobj *fragprog; }; struct nv40_context { @@ -96,6 +99,8 @@ struct nv40_context { struct pipe_scissor_state scissor; unsigned stipple[32]; struct pipe_clip_state clip; + struct nv40_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; } pipe_state; struct nv40_state state; @@ -119,13 +124,6 @@ struct nv40_context { struct pipe_buffer *constant_buf; } vertprog; - struct { - struct nv40_fragment_program *active; - - struct nv40_fragment_program *current; - struct pipe_buffer *constant_buf; - } fragprog; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; }; @@ -161,10 +159,6 @@ extern void nv40_vertprog_destroy(struct nv40_context *, struct nv40_vertex_program *); /* nv40_fragprog.c */ -extern void nv40_fragprog_translate(struct nv40_context *, - struct nv40_fragment_program *); -extern void nv40_fragprog_bind(struct nv40_context *, - struct nv40_fragment_program *); extern void nv40_fragprog_destroy(struct nv40_context *, struct nv40_fragment_program *); @@ -177,6 +171,7 @@ extern void nv40_state_tex_update(struct nv40_context *nv40); extern struct nv40_state_entry nv40_state_clip; extern struct nv40_state_entry nv40_state_scissor; extern struct nv40_state_entry nv40_state_stipple; +extern struct nv40_state_entry nv40_state_fragprog; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 07a418c1e9f..bfc75eb4620 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -668,7 +668,7 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, return TRUE; } -void +static void nv40_fragprog_translate(struct nv40_context *nv40, struct nv40_fragment_program *fp) { @@ -750,72 +750,66 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->insn[fpc->inst_offset + 3] = 0x00000000; fp->translated = TRUE; - fp->on_hw = FALSE; out_err: tgsi_parse_free(&parse); free(fpc); } -void -nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp) +static void +nv40_fragprog_upload(struct nv40_context *nv40, + struct nv40_fragment_program *fp) { struct pipe_winsys *ws = nv40->pipe.winsys; - struct nouveau_stateobj *so; + const uint32_t le = 1; + uint32_t *map; int i; - if (!fp->translated) { - nv40_fragprog_translate(nv40, fp); - if (!fp->translated) - assert(0); - } + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - if (fp->nr_consts) { - float *map = ws->buffer_map(ws, nv40->fragprog.constant_buf, - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < fp->nr_consts; i++) { - struct nv40_fragment_program_data *fpd = &fp->consts[i]; - uint32_t *p = &fp->insn[fpd->offset]; - uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; +#if 0 + for (i = 0; i < fp->insn_len; i++) { + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + } +#endif - if (!memcmp(p, cb, 4 * sizeof(float))) - continue; - memcpy(p, cb, 4 * sizeof(float)); - fp->on_hw = 0; + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); } - ws->buffer_unmap(ws, nv40->fragprog.constant_buf); } - if (!fp->on_hw) { - const uint32_t le = 1; - uint32_t *map; - - if (!fp->buffer) - fp->buffer = ws->buffer_create(ws, 0x100, 0, - fp->insn_len * 4); - map = ws->buffer_map(ws, fp->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); + ws->buffer_unmap(ws, fp->buffer); +} -#if 0 - for (i = 0; i < fp->insn_len; i++) { - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - } -#endif +static boolean +nv40_fragprog_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->pipe_state.fragprog; + struct pipe_buffer *constbuf = + nv40->pipe_state.constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_stateobj *so; + unsigned new_program = FALSE; + int i; - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } + if (fp->translated) + goto update_constants; - ws->buffer_unmap(ws, fp->buffer); - fp->on_hw = TRUE; + nv40_fragprog_translate(nv40, fp); + if (!fp->translated) { + nv40->fallback |= NV40_FALLBACK_RAST; + return FALSE; } + new_program = TRUE; + + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + nv40_fragprog_upload(nv40, fp); so = so_new(4, 1); so_method(so, nv40->hw->curie, NV40TCL_FP_ADDRESS, 1); @@ -824,12 +818,33 @@ nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp) NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); so_method(so, nv40->hw->curie, NV40TCL_FP_CONTROL, 1); so_data (so, fp->fp_control); - - so_emit(nv40->nvws, so); so_ref(so, &fp->so); so_ref(NULL, &so); - nv40->fragprog.active = fp; +update_constants: + if (fp->nr_consts) { + boolean new_consts = FALSE; + float *map; + + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nv40_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + new_consts = TRUE; + } + ws->buffer_unmap(ws, constbuf); + + if (new_consts) + nv40_fragprog_upload(nv40, fp); + } + + so_ref(fp->so, &nv40->state.fragprog); + return new_program; } void @@ -840,3 +855,11 @@ nv40_fragprog_destroy(struct nv40_context *nv40, free(fp->insn); } +struct nv40_state_entry nv40_state_fragprog = { + .validate = nv40_fragprog_validate, + .dirty = { + .pipe = NV40_NEW_FRAGPROG, + .hw = NV40_NEW_FRAGPROG + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 5af5fbe7465..811f3098ba7 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -125,7 +125,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) void nv40_fragtex_bind(struct nv40_context *nv40) { - struct nv40_fragment_program *fp = nv40->fragprog.active; + struct nv40_fragment_program *fp = nv40->pipe_state.fragprog; unsigned samplers, unit; samplers = nv40->fp_samplers & ~fp->samplers; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index c203b002403..2886c6b0dc4 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -523,9 +523,8 @@ static void nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_fragment_program *fp = hwcso; - nv40->fragprog.current = fp; + nv40->pipe_state.fragprog = hwcso; nv40->dirty |= NV40_NEW_FRAGPROG; } @@ -578,7 +577,7 @@ nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nv40->dirty |= NV40_NEW_VERTPROG; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv40->fragprog.constant_buf = buf->buffer; + nv40->pipe_state.constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; nv40->dirty |= NV40_NEW_FRAGPROG; } } diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index e82ab9de98a..2701294a077 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -50,7 +50,6 @@ struct nv40_fragment_program { const struct pipe_shader_state *pipe; boolean translated; - boolean on_hw; unsigned samplers; uint32_t *insn; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index b5d0d68d6b1..e10e178432e 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -19,21 +19,14 @@ nv40_state_emit_dummy_relocs(struct nv40_context *nv40) continue; so_emit_reloc_markers(nv40->nvws, nv40->so_fragtex[i]); } - so_emit_reloc_markers(nv40->nvws, nv40->fragprog.active->so); -} - -static boolean -nv40_state_clip_validate(struct nv40_context *nv40) -{ - if (nv40->pipe_state.clip.nr) - nv40->fallback |= NV40_FALLBACK_TNL; - return FALSE; + so_emit_reloc_markers(nv40->nvws, nv40->state.fragprog); } static struct nv40_state_entry *render_states[] = { &nv40_state_clip, &nv40_state_scissor, &nv40_state_stipple, + &nv40_state_fragprog, NULL }; @@ -65,6 +58,15 @@ nv40_state_validate(struct nv40_context *nv40) !(nv40->fallback & NV40_FALLBACK_TNL)) { NOUVEAU_ERR("XXX: swtnl->hwtnl\n"); } + + if (nv40->fallback & NV40_FALLBACK_RAST && + !(last_fallback & NV40_FALLBACK_RAST)) { + NOUVEAU_ERR("XXX: hwrast->swrast\n"); + } else + if (last_fallback & NV40_FALLBACK_RAST && + !(nv40->fallback & NV40_FALLBACK_RAST)) { + NOUVEAU_ERR("XXX: swrast->hwrast\n"); + } } void @@ -100,10 +102,8 @@ nv40_emit_hw_state(struct nv40_context *nv40) nv40->hw_dirty &= ~NV40_NEW_STIPPLE; } - if (nv40->dirty & NV40_NEW_FRAGPROG) { - nv40_fragprog_bind(nv40, nv40->fragprog.current); - /*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */ - } + if (nv40->hw_dirty & NV40_NEW_FRAGPROG) + so_emit(nv40->nvws, nv40->state.fragprog); if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { nv40_fragtex_bind(nv40); -- cgit v1.2.3 From 9a5dd26fe22c37b85787130a2b724e6e8ef4f553 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 18 Feb 2008 17:36:24 +1100 Subject: nv40: and vertprog.. --- src/gallium/drivers/nv40/nv40_context.h | 14 ++------ src/gallium/drivers/nv40/nv40_fragprog.c | 10 +++--- src/gallium/drivers/nv40/nv40_state.c | 5 ++- src/gallium/drivers/nv40/nv40_state.h | 1 + src/gallium/drivers/nv40/nv40_state_emit.c | 3 +- src/gallium/drivers/nv40/nv40_vbo.c | 2 +- src/gallium/drivers/nv40/nv40_vertprog.c | 57 +++++++++++++++++++++--------- 7 files changed, 55 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index ce0933fea4a..28a0274d771 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -75,6 +75,7 @@ struct nv40_state { } stipple; struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *vertprog; }; struct nv40_context { @@ -99,6 +100,7 @@ struct nv40_context { struct pipe_scissor_state scissor; unsigned stipple[32]; struct pipe_clip_state clip; + struct nv40_vertex_program *vertprog; struct nv40_fragment_program *fragprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; } pipe_state; @@ -117,13 +119,6 @@ struct nv40_context { struct nouveau_stateobj *so_viewport; struct nouveau_stateobj *so_stipple; - struct { - struct nv40_vertex_program *active; - - struct nv40_vertex_program *current; - struct pipe_buffer *constant_buf; - } vertprog; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; }; @@ -151,10 +146,6 @@ extern void nv40_init_query_functions(struct nv40_context *nv40); extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); /* nv40_vertprog.c */ -extern void nv40_vertprog_translate(struct nv40_context *, - struct nv40_vertex_program *); -extern void nv40_vertprog_bind(struct nv40_context *, - struct nv40_vertex_program *); extern void nv40_vertprog_destroy(struct nv40_context *, struct nv40_vertex_program *); @@ -172,6 +163,7 @@ extern struct nv40_state_entry nv40_state_clip; extern struct nv40_state_entry nv40_state_scissor; extern struct nv40_state_entry nv40_state_stipple; extern struct nv40_state_entry nv40_state_fragprog; +extern struct nv40_state_entry nv40_state_vertprog; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index bfc75eb4620..77ac8ab2c61 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -795,7 +795,6 @@ nv40_fragprog_validate(struct nv40_context *nv40) nv40->pipe_state.constbuf[PIPE_SHADER_FRAGMENT]; struct pipe_winsys *ws = nv40->pipe.winsys; struct nouveau_stateobj *so; - unsigned new_program = FALSE; int i; if (fp->translated) @@ -806,7 +805,6 @@ nv40_fragprog_validate(struct nv40_context *nv40) nv40->fallback |= NV40_FALLBACK_RAST; return FALSE; } - new_program = TRUE; fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); nv40_fragprog_upload(nv40, fp); @@ -843,8 +841,12 @@ update_constants: nv40_fragprog_upload(nv40, fp); } - so_ref(fp->so, &nv40->state.fragprog); - return new_program; + if (fp->so != nv40->state.fragprog) { + so_ref(fp->so, &nv40->state.fragprog); + return TRUE; + } + + return FALSE; } void diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 2886c6b0dc4..8ffbb131f73 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -491,9 +491,8 @@ static void nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp = hwcso; - nv40->vertprog.current = vp; + nv40->pipe_state.vertprog = hwcso; nv40->dirty |= NV40_NEW_VERTPROG; } @@ -573,7 +572,7 @@ nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct nv40_context *nv40 = nv40_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv40->vertprog.constant_buf = buf->buffer; + nv40->pipe_state.constbuf[PIPE_SHADER_VERTEX] = buf->buffer; nv40->dirty |= NV40_NEW_VERTPROG; } else if (shader == PIPE_SHADER_FRAGMENT) { diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index 2701294a077..e5217fe91c4 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -39,6 +39,7 @@ struct nv40_vertex_program { uint32_t ir; uint32_t or; + struct nouveau_stateobj *so; }; struct nv40_fragment_program_data { diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index e10e178432e..e702b103236 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -27,6 +27,7 @@ static struct nv40_state_entry *render_states[] = { &nv40_state_scissor, &nv40_state_stipple, &nv40_state_fragprog, + &nv40_state_vertprog, NULL }; @@ -116,7 +117,7 @@ nv40_emit_hw_state(struct nv40_context *nv40) } if (nv40->dirty & NV40_NEW_VERTPROG) { - nv40_vertprog_bind(nv40, nv40->vertprog.current); + so_emit(nv40->nvws, nv40->state.vertprog); nv40->dirty &= ~NV40_NEW_VERTPROG; } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index fa827ef0c5d..3bfcb264db1 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -101,7 +101,7 @@ static void nv40_vbo_arrays_update(struct nv40_context *nv40, struct pipe_buffer *ib, unsigned ib_format) { - struct nv40_vertex_program *vp = nv40->vertprog.active; + struct nv40_vertex_program *vp = nv40->pipe_state.vertprog; struct nouveau_stateobj *vtxbuf, *vtxfmt; unsigned inputs, hw, num_hw; unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 9f4738b8308..4a15e51eb33 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -558,7 +558,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) return TRUE; } -void +static void nv40_vertprog_translate(struct nv40_context *nv40, struct nv40_vertex_program *vp) { @@ -631,24 +631,32 @@ out_err: free(vpc); } -void -nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) +static boolean +nv40_vertprog_validate(struct nv40_context *nv40) { + struct nv40_vertex_program *vp = nv40->pipe_state.vertprog; + struct pipe_buffer *constbuf = + nv40->pipe_state.constbuf[PIPE_SHADER_VERTEX]; struct nouveau_winsys *nvws = nv40->nvws; struct pipe_winsys *ws = nv40->pipe.winsys; boolean upload_code = FALSE, upload_data = FALSE; int i; /* Translate TGSI shader into hw bytecode */ + if (vp->translated) + goto check_gpu_resources; + + nv40_vertprog_translate(nv40, vp); if (!vp->translated) { - nv40_vertprog_translate(nv40, vp); - if (!vp->translated) - assert(0); + nv40->fallback |= NV40_FALLBACK_TNL; + return FALSE; } +check_gpu_resources: /* Allocate hw vtxprog exec slots */ if (!vp->exec) { struct nouveau_resource *heap = nv40->hw->vp_exec_heap; + struct nouveau_stateobj *so; uint vplen = vp->nr_insns; if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { @@ -663,6 +671,15 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) assert(0); } + so = so_new(5, 0); + so_method(so, nv40->hw->curie, NV40TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_method(so, nv40->hw->curie, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + so_ref(so, &vp->so); + so_ref(NULL, &so); + upload_code = TRUE; } @@ -725,8 +742,8 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) if (vp->nr_consts) { float *map = NULL; - if (nv40->vertprog.constant_buf) { - map = ws->buffer_map(ws, nv40->vertprog.constant_buf, + if (constbuf) { + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -747,9 +764,8 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) OUT_RINGp ((uint32_t *)vpd->value, 4); } - if (map) { - ws->buffer_unmap(ws, nv40->vertprog.constant_buf); - } + if (constbuf) + ws->buffer_unmap(ws, constbuf); } /* Upload vtxprog */ @@ -770,13 +786,12 @@ nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) } } - BEGIN_RING(curie, NV40TCL_VP_START_FROM_ID, 1); - OUT_RING (vp->exec->start); - BEGIN_RING(curie, NV40TCL_VP_ATTRIB_EN, 2); - OUT_RING (vp->ir); - OUT_RING (vp->or); + if (vp->so != nv40->state.vertprog) { + so_ref(vp->so, &nv40->state.vertprog); + return TRUE; + } - nv40->vertprog.active = vp; + return FALSE; } void @@ -788,3 +803,11 @@ nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) free(vp->insns); } +struct nv40_state_entry nv40_state_vertprog = { + .validate = nv40_vertprog_validate, + .dirty = { + .pipe = NV40_NEW_VERTPROG, + .hw = NV40_NEW_VERTPROG + } +}; + -- cgit v1.2.3 From c0f9cab905f3f54cc01bf947665f8a731b8cb347 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 18 Feb 2008 18:23:12 +1100 Subject: nouveau: cleanups + fixes --- src/gallium/drivers/nouveau/nouveau_stateobj.h | 21 +++++++++------------ src/gallium/drivers/nv40/nv40_state_emit.c | 8 +++++--- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 07c31b014a5..459cc7d77ae 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -45,22 +45,19 @@ so_new(unsigned push, unsigned reloc) static INLINE void so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) { - struct nouveau_stateobj *so; - - so = *pso; - if (so) { - if (--so->refcount <= 0) { - free(so->push); - free(so->reloc); - free(so); - } - *pso = NULL; - } + struct nouveau_stateobj *so = *pso; if (ref) { ref->refcount++; - *pso = ref; } + + if (so && --so->refcount <= 0) { + free(so->push); + free(so->reloc); + free(so); + } + + *pso = ref; } static INLINE void diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index e702b103236..e8230111bb4 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -103,8 +103,10 @@ nv40_emit_hw_state(struct nv40_context *nv40) nv40->hw_dirty &= ~NV40_NEW_STIPPLE; } - if (nv40->hw_dirty & NV40_NEW_FRAGPROG) + if (nv40->hw_dirty & NV40_NEW_FRAGPROG) { so_emit(nv40->nvws, nv40->state.fragprog); + nv40->hw_dirty &= ~NV40_NEW_FRAGPROG; + } if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { nv40_fragtex_bind(nv40); @@ -116,9 +118,9 @@ nv40_emit_hw_state(struct nv40_context *nv40) nv40->dirty &= ~NV40_NEW_FRAGPROG; } - if (nv40->dirty & NV40_NEW_VERTPROG) { + if (nv40->hw_dirty & NV40_NEW_VERTPROG) { so_emit(nv40->nvws, nv40->state.vertprog); - nv40->dirty &= ~NV40_NEW_VERTPROG; + nv40->hw_dirty &= ~NV40_NEW_VERTPROG; } nv40->dirty_samplers = 0; -- cgit v1.2.3 From 33ceb6716a2166db75659fa66d85fb4cfb9633c7 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 18 Feb 2008 10:52:44 +0000 Subject: Update scons build for new code layout. --- SConstruct | 7 +- src/SConscript | 7 ++ src/gallium/SConscript | 23 +++- src/gallium/auxiliary/cso_cache/SConscript | 10 ++ src/gallium/auxiliary/draw/SConscript | 31 +++++ src/gallium/auxiliary/pipebuffer/SConscript | 14 +++ src/gallium/auxiliary/tgsi/SConscript | 14 +++ src/gallium/auxiliary/util/SConscript | 11 ++ src/gallium/drivers/failover/SConscript | 13 +++ src/gallium/winsys/SConscript | 10 ++ src/gallium/winsys/dri/SConscript | 51 +++++++++ src/gallium/winsys/dri/intel/SConscript | 14 +-- src/gallium/winsys/xlib/SConscript | 28 +++++ src/mesa/SConscript | 170 ++-------------------------- src/mesa/drivers/dri/SConscript | 48 -------- 15 files changed, 229 insertions(+), 222 deletions(-) create mode 100644 src/SConscript create mode 100644 src/gallium/auxiliary/cso_cache/SConscript create mode 100644 src/gallium/auxiliary/draw/SConscript create mode 100644 src/gallium/auxiliary/pipebuffer/SConscript create mode 100644 src/gallium/auxiliary/tgsi/SConscript create mode 100644 src/gallium/auxiliary/util/SConscript create mode 100644 src/gallium/drivers/failover/SConscript create mode 100644 src/gallium/winsys/SConscript create mode 100644 src/gallium/winsys/dri/SConscript create mode 100644 src/gallium/winsys/xlib/SConscript delete mode 100644 src/mesa/drivers/dri/SConscript (limited to 'src/gallium/drivers') diff --git a/SConstruct b/SConstruct index 22a4072c933..1126aff21b5 100644 --- a/SConstruct +++ b/SConstruct @@ -108,7 +108,10 @@ env.Append(CPPPATH = [ '#/include', '#/src/mesa', '#/src/mesa/main', - '#/src/mesa/pipe', + '#/src/gallium/include/pipe', + '#/src/gallium/include', + '#/src/gallium/auxiliary', + '#/src/gallium/drivers', ]) @@ -222,7 +225,7 @@ build_dir = os.path.join(build_topdir, build_subdir) # http://www.scons.org/wiki/SimultaneousVariantBuilds SConscript( - 'src/mesa/SConscript', + 'src/SConscript', build_dir = build_dir, duplicate = 0 # http://www.scons.org/doc/0.97/HTML/scons-user/x2261.html ) diff --git a/src/SConscript b/src/SConscript new file mode 100644 index 00000000000..5b099438940 --- /dev/null +++ b/src/SConscript @@ -0,0 +1,7 @@ +Import('*') + +SConscript([ + 'gallium/SConscript', + 'mesa/SConscript', + 'gallium/winsys/SConscript', +]) diff --git a/src/gallium/SConscript b/src/gallium/SConscript index d9c20e01007..a835f6d6614 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -1,9 +1,24 @@ Import('*') -#env = env.Clone() +env = env.Clone() + +auxiliaries = [] + +Export('auxiliaries') + SConscript([ - 'softpipe/SConscript', - 'i915simple/SConscript', - 'i965simple/SConscript', + # NOTE: order matters! + 'auxiliary/util/SConscript', + 'auxiliary/tgsi/SConscript', + 'auxiliary/cso_cache/SConscript', + 'auxiliary/draw/SConscript', + #'auxiliary/llvm/SConscript', + 'auxiliary/pipebuffer/SConscript', + + 'drivers/softpipe/SConscript', + 'drivers/i915simple/SConscript', + 'drivers/i965simple/SConscript', + 'drivers/failover/SConscript', + #'drivers/cell/SConscript', ]) diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript new file mode 100644 index 00000000000..9751881613e --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/SConscript @@ -0,0 +1,10 @@ +Import('*') + +cso_cache = env.ConvenienceLibrary( + target = 'cso_cache', + source = [ + 'cso_cache.c', + 'cso_hash.c', + ]) + +auxiliaries.insert(0, cso_cache) diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript new file mode 100644 index 00000000000..8e3a8caa74c --- /dev/null +++ b/src/gallium/auxiliary/draw/SConscript @@ -0,0 +1,31 @@ +Import('*') + +draw = env.ConvenienceLibrary( + target = 'draw', + source = [ + 'draw_clip.c', + 'draw_vs_exec.c', + 'draw_vs_sse.c', + 'draw_vs_llvm.c', + 'draw_context.c', + 'draw_cull.c', + 'draw_debug.c', + 'draw_flatshade.c', + 'draw_offset.c', + 'draw_prim.c', + 'draw_stipple.c', + 'draw_twoside.c', + 'draw_unfilled.c', + 'draw_validate.c', + 'draw_vbuf.c', + 'draw_vertex.c', + 'draw_vertex_cache.c', + 'draw_vertex_fetch.c', + 'draw_vertex_shader.c', + 'draw_vf.c', + 'draw_vf_generic.c', + 'draw_vf_sse.c', + 'draw_wide_prims.c', + ]) + +auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript new file mode 100644 index 00000000000..3d41fd84a6c --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -0,0 +1,14 @@ +Import('*') + +pipebuffer = env.ConvenienceLibrary( + target = 'pipebuffer', + source = [ + 'pb_buffer_fenced.c', + 'pb_buffer_malloc.c', + 'pb_bufmgr_fenced.c', + 'pb_bufmgr_mm.c', + 'pb_bufmgr_pool.c', + 'pb_winsys.c', + ]) + +auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript new file mode 100644 index 00000000000..8464bfe9444 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -0,0 +1,14 @@ +Import('*') + +tgsi = env.ConvenienceLibrary( + target = 'tgsi', + source = [ + 'exec/tgsi_exec.c', + 'exec/tgsi_sse2.c', + 'util/tgsi_build.c', + 'util/tgsi_dump.c', + 'util/tgsi_parse.c', + 'util/tgsi_util.c', + ]) + +auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript new file mode 100644 index 00000000000..b126cf44d6f --- /dev/null +++ b/src/gallium/auxiliary/util/SConscript @@ -0,0 +1,11 @@ +Import('*') + +util = env.ConvenienceLibrary( + target = 'util', + source = [ + 'p_debug.c', + 'p_tile.c', + 'p_util.c', + ]) + +auxiliaries.insert(0, util) diff --git a/src/gallium/drivers/failover/SConscript b/src/gallium/drivers/failover/SConscript new file mode 100644 index 00000000000..f8e9b1b4911 --- /dev/null +++ b/src/gallium/drivers/failover/SConscript @@ -0,0 +1,13 @@ +Import('*') + +env = env.Clone() + +failover = env.ConvenienceLibrary( + target = 'failover', + source = [ + 'fo_state.c', + 'fo_state_emit.c', + 'fo_context.c', + ]) + +Export('failover') diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript new file mode 100644 index 00000000000..32215d8d586 --- /dev/null +++ b/src/gallium/winsys/SConscript @@ -0,0 +1,10 @@ +Import('*') + +if dri: + SConscript([ + 'dri/SConscript', + ]) +else: + SConscript([ + 'xlib/SConscript', + ]) diff --git a/src/gallium/winsys/dri/SConscript b/src/gallium/winsys/dri/SConscript new file mode 100644 index 00000000000..8c56ce917c7 --- /dev/null +++ b/src/gallium/winsys/dri/SConscript @@ -0,0 +1,51 @@ +Import('*') + +drienv = env.Clone() + +drienv.Replace(CPPPATH = [ + '#src/mesa/drivers/dri/common', + '#include', + '#include/GL/internal', + '#src/gallium/include', + '#src/gallium/auxiliary', + '#src/gallium/drivers', + '#src/mesa', + '#src/mesa/main', + '#src/mesa/glapi', + '#src/mesa/math', + '#src/mesa/transform', + '#src/mesa/shader', + '#src/mesa/swrast', + '#src/mesa/swrast_setup', + '#src/egl/main', + '#src/egl/drivers/dri', +]) + +drienv.ParseConfig('pkg-config --cflags --libs libdrm') + +COMMON_GALLIUM_SOURCES = [ + '#src/mesa/drivers/dri/common/utils.c', + '#src/mesa/drivers/dri/common/vblank.c', + '#src/mesa/drivers/dri/common/dri_util.c', + '#src/mesa/drivers/dri/common/xmlconfig.c', +] + +COMMON_BM_SOURCES = [ + '#src/mesa/drivers/dri/common/dri_bufmgr.c', + '#src/mesa/drivers/dri/common/dri_drmpool.c', +] + +Export([ + 'drienv', + 'COMMON_GALLIUM_SOURCES', + 'COMMON_BM_SOURCES', +]) + +# TODO: Installation +#install: $(LIBNAME) +# $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) +# $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + +SConscript([ + 'intel/SConscript', +]) diff --git a/src/gallium/winsys/dri/intel/SConscript b/src/gallium/winsys/dri/intel/SConscript index a7cc10450e3..525ba580e8e 100644 --- a/src/gallium/winsys/dri/intel/SConscript +++ b/src/gallium/winsys/dri/intel/SConscript @@ -9,11 +9,6 @@ env.Append(CPPPATH = [ #MINIGLX_SOURCES = server/intel_dri.c -pipe_drivers = [ - softpipe, - i915simple -] - DRIVER_SOURCES = [ 'intel_winsys_pipe.c', 'intel_winsys_softpipe.c', @@ -31,11 +26,14 @@ sources = \ COMMON_BM_SOURCES + \ DRIVER_SOURCES -# DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ -# && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") +drivers = [ + softpipe, + i915simple +] +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions env.SharedLibrary( target ='i915tex_dri.so', source = sources, - LIBS = pipe_drivers + env['LIBS'], + LIBS = mesa + drivers + auxiliaries + env['LIBS'], ) \ No newline at end of file diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript new file mode 100644 index 00000000000..f8aa5ef945d --- /dev/null +++ b/src/gallium/winsys/xlib/SConscript @@ -0,0 +1,28 @@ +####################################################################### +# SConscript for xlib winsys + +Import('*') + + +sources = [ + 'glxapi.c', + 'fakeglx.c', + 'xfonts.c', + 'xm_api.c', + 'xm_winsys.c', + 'xm_winsys_aub.c', + 'brw_aub.c', +] + +drivers = [ + softpipe, + i915simple, + i965simple, +] + +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions +env.SharedLibrary( + target ='GL', + source = sources, + LIBS = glapi + mesa + drivers + auxiliaries + env['LIBS'], +) diff --git a/src/mesa/SConscript b/src/mesa/SConscript index faf8c84872c..a828133580a 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -1,7 +1,5 @@ ####################################################################### -# SConscript for mesa -# -# TODO: Split this into per-module SConscripts +# SConscript for Mesa Import('*') @@ -116,53 +114,6 @@ VF_SOURCES = [ 'vf/vf_sse.c', ] -DRAW_SOURCES = [ - 'pipe/draw/draw_clip.c', - 'pipe/draw/draw_context.c', - 'pipe/draw/draw_cull.c', - 'pipe/draw/draw_debug.c', - 'pipe/draw/draw_flatshade.c', - 'pipe/draw/draw_offset.c', - 'pipe/draw/draw_prim.c', - 'pipe/draw/draw_stipple.c', - 'pipe/draw/draw_twoside.c', - 'pipe/draw/draw_unfilled.c', - 'pipe/draw/draw_validate.c', - 'pipe/draw/draw_vbuf.c', - 'pipe/draw/draw_vertex.c', - 'pipe/draw/draw_vertex_cache.c', - 'pipe/draw/draw_vertex_fetch.c', - 'pipe/draw/draw_vertex_shader.c', - 'pipe/draw/draw_vertex_shader_llvm.c', - 'pipe/draw/draw_vf.c', - 'pipe/draw/draw_vf_generic.c', - 'pipe/draw/draw_vf_sse.c', - 'pipe/draw/draw_wide_prims.c', -] - -TGSIEXEC_SOURCES = [ - 'pipe/tgsi/exec/tgsi_exec.c', - 'pipe/tgsi/exec/tgsi_sse2.c', -] - -TGSIUTIL_SOURCES = [ - 'pipe/tgsi/util/tgsi_build.c', - 'pipe/tgsi/util/tgsi_dump.c', - 'pipe/tgsi/util/tgsi_parse.c', - 'pipe/tgsi/util/tgsi_util.c', -] - -STATECACHE_SOURCES = [ - 'pipe/cso_cache/cso_hash.c', - 'pipe/cso_cache/cso_cache.c', -] - -PIPEUTIL_SOURCES = [ - 'pipe/util/p_debug.c', - 'pipe/util/p_tile.c', - 'pipe/util/p_util.c', -] - STATETRACKER_SOURCES = [ 'state_tracker/st_atom.c', 'state_tracker/st_atom_blend.c', @@ -311,126 +262,25 @@ else: ASM_SOURCES = [] API_SOURCES = [] - -####################################################################### -# Driver sources - - -X11_DRIVER_SOURCES = [ - 'pipe/xlib/glxapi.c', - 'pipe/xlib/fakeglx.c', - 'pipe/xlib/xfonts.c', - 'pipe/xlib/xm_api.c', - 'pipe/xlib/xm_winsys.c', - 'pipe/xlib/xm_winsys_aub.c', - 'pipe/xlib/brw_aub.c', -] - -OSMESA_DRIVER_SOURCES = [ - 'drivers/osmesa/osmesa.c', -] - -GLIDE_DRIVER_SOURCES = [ - 'drivers/glide/fxapi.c', - 'drivers/glide/fxdd.c', - 'drivers/glide/fxddspan.c', - 'drivers/glide/fxddtex.c', - 'drivers/glide/fxsetup.c', - 'drivers/glide/fxtexman.c', - 'drivers/glide/fxtris.c', - 'drivers/glide/fxvb.c', - 'drivers/glide/fxglidew.c', - 'drivers/glide/fxg.c', -] - -SVGA_DRIVER_SOURCES = [ - 'drivers/svga/svgamesa.c', - 'drivers/svga/svgamesa8.c', - 'drivers/svga/svgamesa15.c', - 'drivers/svga/svgamesa16.c', - 'drivers/svga/svgamesa24.c', - 'drivers/svga/svgamesa32.c', -] - -FBDEV_DRIVER_SOURCES = [ - 'drivers/fbdev/glfbdev.c', -] - - -### All the core C sources - SOLO_SOURCES = \ MAIN_SOURCES + \ MATH_SOURCES + \ VBO_SOURCES + \ VF_SOURCES + \ - DRAW_SOURCES + \ - TGSIEXEC_SOURCES + \ - TGSIUTIL_SOURCES + \ - PIPEUTIL_SOURCES + \ - STATECACHE_SOURCES + \ STATETRACKER_SOURCES + \ SHADER_SOURCES + \ ASM_SOURCES + \ SLANG_SOURCES -CORE_SOURCES = \ - GLAPI_SOURCES + API_SOURCES + \ - SOLO_SOURCES - -ALL_SOURCES = \ - GLAPI_SOURCES + API_SOURCES + \ - SOLO_SOURCES + \ - ASM_SOURCES + \ - X11_DRIVER_SOURCES + \ - FBDEV_DRIVER_SOURCES + \ - OSMESA_DRIVER_SOURCES - - -###################################################################### -# Gallium sources - -SConscript([ - 'pipe/SConscript', -]) - - -###################################################################### -# libGL +mesa = env.ConvenienceLibrary( + target = 'mesa', + source = SOLO_SOURCES, +) +Export('mesa') if not dri: - STAND_ALONE_DRIVER_SOURCES = \ - CORE_SOURCES + \ - X11_DRIVER_SOURCES - - Import( - 'softpipe', - 'i915simple', - 'i965simple' + glapi = env.ConvenienceLibrary( + target = 'glapi', + source = GLAPI_SOURCES + API_SOURCES, ) - - pipe_drivers = [ - softpipe, - i965simple - ] - - env.SharedLibrary( - target ='GL', - source = STAND_ALONE_DRIVER_SOURCES, - LIBS = [softpipe, i965simple] + env['LIBS'], - ) - - -###################################################################### -# Driver sources - -if dri: - mesa = env.ConvenienceLibrary( - target = 'mesa', - source = SOLO_SOURCES, - ) - env.Prepend(LIBS = [mesa]) - - SConscript([ - 'drivers/dri/SConscript', - ]) + Export('glapi') diff --git a/src/mesa/drivers/dri/SConscript b/src/mesa/drivers/dri/SConscript deleted file mode 100644 index d32bd086696..00000000000 --- a/src/mesa/drivers/dri/SConscript +++ /dev/null @@ -1,48 +0,0 @@ -Import('*') - -drienv = env.Clone() - -drienv.Replace(CPPPATH = [ - '#src/mesa/drivers/dri/common', - '#include', - '#include/GL/internal', - '#src/mesa', - '#src/mesa/main', - '#src/mesa/glapi', - '#src/mesa/math', - '#src/mesa/transform', - '#src/mesa/shader', - '#src/mesa/swrast', - '#src/mesa/swrast_setup', - '#src/egl/main', - '#src/egl/drivers/dri', -]) - -drienv.ParseConfig('pkg-config --cflags --libs libdrm') - -COMMON_GALLIUM_SOURCES = [ - '../common/utils.c', - '../common/vblank.c', - '../common/dri_util.c', - '../common/xmlconfig.c', -] - -COMMON_BM_SOURCES = [ - '../common/dri_bufmgr.c', - '../common/dri_drmpool.c', -] - -Export([ - 'drienv', - 'COMMON_GALLIUM_SOURCES', - 'COMMON_BM_SOURCES', -]) - -# TODO: Installation -#install: $(LIBNAME) -# $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) -# $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) - -SConscript([ - 'intel_winsys/SConscript', -]) -- cgit v1.2.3 From 687a8b96ef13658bbe779d0011ce1144844f1972 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 18 Feb 2008 20:02:42 +0900 Subject: Standardize on using the pipe/ include prefix. --- SConstruct | 1 - src/gallium/Makefile.template | 1 - src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 10 +++++----- src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c | 4 ++-- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 8 ++++---- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 10 +++++----- src/gallium/drivers/i915simple/i915_state_immediate.c | 2 +- src/gallium/drivers/i915simple/i915_state_inlines.h | 4 ++-- src/gallium/drivers/softpipe/sp_state_surface.c | 2 +- 9 files changed, 20 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/SConstruct b/SConstruct index 1126aff21b5..4fb51f2e6df 100644 --- a/SConstruct +++ b/SConstruct @@ -108,7 +108,6 @@ env.Append(CPPPATH = [ '#/include', '#/src/mesa', '#/src/mesa/main', - '#/src/gallium/include/pipe', '#/src/gallium/include', '#/src/gallium/auxiliary', '#/src/gallium/drivers', diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 6ad58c205c2..6698212e77e 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -16,7 +16,6 @@ OBJECTS = $(C_SOURCES:.c=.o) \ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/include/pipe \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index f4fc3f6d714..bc85c4b19fd 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -36,11 +36,11 @@ #include "linked_list.h" -#include "p_compiler.h" -#include "p_debug.h" -#include "p_winsys.h" -#include "p_thread.h" -#include "p_util.h" +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index c535d3276c9..bffca5b2449 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -34,8 +34,8 @@ */ -#include "p_debug.h" -#include "p_util.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 8b1b51c0e28..969aab51b57 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -36,10 +36,10 @@ #include "linked_list.h" -#include "p_defines.h" -#include "p_debug.h" -#include "p_thread.h" -#include "p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 04477a865a5..beb145b7cbe 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -37,11 +37,11 @@ #include "linked_list.h" -#include "p_compiler.h" -#include "p_debug.h" -#include "p_thread.h" -#include "p_defines.h" -#include "p_util.h" +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index 07031fc6c5b..dfbbcab624a 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -33,7 +33,7 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" -#include "p_util.h" +#include "pipe/p_util.h" /* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h index 0934ac79a41..378de8f9c48 100644 --- a/src/gallium/drivers/i915simple/i915_state_inlines.h +++ b/src/gallium/drivers/i915simple/i915_state_inlines.h @@ -28,8 +28,8 @@ #ifndef I915_STATE_INLINES_H #define I915_STATE_INLINES_H -#include "p_compiler.h" -#include "p_defines.h" +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index e2c6893e9f5..124b18b7081 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -27,7 +27,7 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "p_inlines.h" +#include "pipe/p_inlines.h" #include "sp_context.h" #include "sp_state.h" -- cgit v1.2.3 From aceeb80d4f706980aaf71b8e098d4c6718d8ac90 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 18 Feb 2008 16:19:05 -0700 Subject: gallium: antialiased line drawing New draw/prim stage: draw_aaline. When installed, lines are replaced by textured quads to do antialiasing. The current user-defined fragment shader is modified to do a texture fetch and modulate fragment alpha. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_aaline.c | 832 ++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_context.c | 22 + src/gallium/auxiliary/draw/draw_context.h | 21 +- src/gallium/auxiliary/draw/draw_prim.c | 7 + src/gallium/auxiliary/draw/draw_private.h | 12 +- src/gallium/auxiliary/draw/draw_validate.c | 8 +- src/gallium/auxiliary/tgsi/Makefile | 1 + src/gallium/drivers/softpipe/sp_context.c | 3 + src/gallium/drivers/softpipe/sp_state_derived.c | 17 +- 10 files changed, 914 insertions(+), 10 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_aaline.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index c56b63d85b8..c8000cbe9cb 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = draw DRIVER_SOURCES = \ + draw_aaline.c \ draw_clip.c \ draw_vs_exec.c \ draw_vs_sse.c \ diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c new file mode 100644 index 00000000000..f1fee4f8ba0 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -0,0 +1,832 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA line stage: AA lines are converted to texture mapped triangles. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/** + * Max texture level for the alpha texture used for antialiasing + */ +#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aaline_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *aaline_fs; + void *aapoint_fs; /* not yet */ + void *sprite_fs; /* not yet */ +}; + + +/** + * Subclass of draw_stage + */ +struct aaline_stage +{ + struct draw_stage stage; + + float half_line_width; + + /** For AA lines, this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + + /* + * Currently bound state + */ + struct aaline_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxSampler; /**< max sampler index found */ + int maxInput, maxGeneric; /**< max input index found */ + int colorTemp, texTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if (decl->u.DeclarationRange.Last > aactx->maxSampler) + aactx->maxSampler = decl->u.DeclarationRange.Last + 1; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if (decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else if (aactx->texTemp < 0) + aactx->texTemp = i; + else + break; + } + } + assert(aactx->colorTemp >= 0); + assert(aactx->texTemp >= 0); + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxInput + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxSampler + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->texTemp; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END && + aactx->colorOutput != -1) { + struct tgsi_full_instruction newInst; + + /* TEX */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->maxSampler + 1; + + ctx->emit_instruction(ctx, &newInst); + + /* MOV rgb */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL alpha */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + ctx->emit_instruction(ctx, &newInst); + + /* END */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_END; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + + ctx->emit_instruction(ctx, inst); + } +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aaline_fs(struct aaline_stage *aaline) +{ + const struct pipe_shader_state *orig_fs = &aaline->fs->state; + struct draw_context *draw = aaline->stage.draw; + struct pipe_shader_state aaline_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aaline_fs = *orig_fs; /* copy to init */ + aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxSampler = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aaline_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aaline_fs.tokens, 0); +#endif + + aaline_fs.input_semantic_name[aaline_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; + aaline_fs.input_semantic_index[aaline_fs.num_inputs] = transform.maxGeneric + 1; + aaline_fs.num_inputs++; + + aaline->fs->aaline_fs + = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + + /* advertise the extra post-transform vertex attributes which will have + * the texcoords. + */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; +} + + +/** + * Create the texture map we'll use for antialiasing the lines. + */ +static void +aaline_create_texture(struct aaline_stage *aaline) +{ + struct pipe_context *pipe = aaline->pipe; + struct pipe_texture texTemp; + uint level; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = MAX_TEXTURE_LEVEL; + texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + aaline->texture = pipe->texture_create(pipe, &texTemp); + + /* Fill in mipmap images. + * Basically each level is solid opaque, except for the outermost + * texels which are zero. Special case the 1x1 and 2x2 levels. + */ + for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { + struct pipe_surface *surface; + const uint size = aaline->texture->width[level]; + ubyte *data; + uint i, j; + + assert(aaline->texture->width[level] == aaline->texture->height[level]); + + surface = pipe->get_tex_surface(pipe, aaline->texture, 0, level, 0); + data = pipe_surface_map(surface); + + for (i = 0; i < size; i++) { + for (j = 0; j < size; j++) { + uint d; + if (size == 1) { + d = 255; + } + else if (size == 2) { + d = 200; /* tuneable */ + } + else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { + d = 0; + } + else { + d = 255; + } + data[i * surface->pitch + j] = d; + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + } +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +aaline_create_sampler(struct aaline_stage *aaline) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = aaline->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = MAX_TEXTURE_LEVEL; + + aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aaline_fragment_shader(struct aaline_stage *aaline) +{ + if (!aaline->fs->aaline_fs) { + generate_aaline_fs(aaline); + } + aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); +} + + + +static INLINE struct aaline_stage * +aaline_stage( struct draw_stage *stage ) +{ + return (struct aaline_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad, using geometry which will + * fullfill GL's antialiased line requirements. + */ +static void +aaline_line(struct draw_stage *stage, struct prim_header *header) +{ + const struct aaline_stage *aaline = aaline_stage(stage); + const float half_width = aaline->half_line_width; + struct prim_header tri; + struct vertex_header *v[8]; + uint texPos = aaline->tex_slot; + float *pos, *tex; + float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; + float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; + float a = atan2(dy, dx); + float c_a = cos(a), s_a = sin(a); + uint i; + + /* XXX the ends of lines aren't quite perfect yet, but probably passable */ + dx = 0.5 * half_width; + dy = half_width; + + /* allocate/dup new verts */ + for (i = 0; i < 8; i++) { + v[i] = dup_vert(stage, header->v[i/4], i); + } + + /* + * Quad strip for line from v0 to v1 (*=endpoints): + * + * 1 3 5 7 + * +---+---------------------+---+ + * | | + * | *v0 v1* | + * | | + * +---+---------------------+---+ + * 0 2 4 6 + */ + + /* new verts */ + pos = v[0]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[1]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[2]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[3]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + pos = v[4]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[5]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[6]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[7]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, 0, 0, 0, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 0, 1, 0, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[4]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[5]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[6]->data[texPos]; + ASSIGN_4V(tex, 1, 0, 0, 1); + + tex = v[7]->data[texPos]; + ASSIGN_4V(tex, 1, 1, 0, 1); + + /* emit 6 tris for the quad strip */ + tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[4]; tri.v[1] = v[3]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[5]; tri.v[1] = v[3]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[6]; tri.v[1] = v[5]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[7]; tri.v[1] = v[5]; tri.v[2] = v[6]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aaline_first_line(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aaline_stage *aaline = aaline_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = aaline->pipe; + + assert(draw->rasterizer->line_smooth); + + if (draw->rasterizer->line_width <= 3.0) + aaline->half_line_width = 1.5f; + else + aaline->half_line_width = 0.5f * draw->rasterizer->line_width; + + aaline->tex_slot = draw->num_vs_outputs; + assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ + draw->extra_vp_outputs.slot = aaline->tex_slot; + + /* + * Bind our fragprog, sampler and texture + */ + bind_aaline_fragment_shader(aaline); + + aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso); + aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture); + + /* now really draw first line */ + stage->line = aaline_line; + stage->line(stage, header); +} + + +static void +aaline_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aaline_stage *aaline = aaline_stage(stage); + struct pipe_context *pipe = aaline->pipe; + + stage->line = aaline_first_line; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, + aaline->state.sampler[aaline->sampler_unit]); + aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, + aaline->state.texture[aaline->sampler_unit]); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aaline_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aaline_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aaline_stage * +draw_aaline_stage(struct draw_context *draw) +{ + struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + + draw_alloc_temp_verts( &aaline->stage, 8 ); + + aaline->stage.draw = draw; + aaline->stage.next = NULL; + aaline->stage.point = passthrough_point; + aaline->stage.line = aaline_first_line; + aaline->stage.tri = passthrough_tri; + aaline->stage.flush = aaline_flush; + aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; + aaline->stage.destroy = aaline_destroy; + + return aaline; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a aaline_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct aaline_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_aa_pipe_context(struct pipe_context *pipe, struct aaline_stage *aa) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = aa; + NumContexts++; +} + +static struct aaline_stage * +aaline_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aaline_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); + } + + return aafs; +} + + +static void +aaline_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* save current */ + aaline->fs = aafs; + /* pass-through */ + aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs); +} + + +static void +aaline_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* pass-through */ + aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +aaline_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + aaline->state.sampler[unit] = sampler; + /* pass-through */ + aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler); +} + + +static void +aaline_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + aaline->state.texture[sampler] = texture; + /* pass-through */ + aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture); +} + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) +{ + struct aaline_stage *aaline; + + /* + * Create / install AA line drawing / prim stage + */ + aaline = draw_aaline_stage( draw ); + assert(aaline); + draw->pipeline.aaline = &aaline->stage; + + aaline->pipe = pipe; + + /* create special texture, sampler state */ + aaline_create_texture(aaline); + aaline_create_sampler(aaline); + + /* save original driver functions */ + aaline->driver_create_fs_state = pipe->create_fs_state; + aaline->driver_bind_fs_state = pipe->bind_fs_state; + aaline->driver_delete_fs_state = pipe->delete_fs_state; + + aaline->driver_bind_sampler_state = pipe->bind_sampler_state; + aaline->driver_set_sampler_texture = pipe->set_sampler_texture; + + /* override the driver's functions */ + pipe->create_fs_state = aaline_create_fs_state; + pipe->bind_fs_state = aaline_bind_fs_state; + pipe->delete_fs_state = aaline_delete_fs_state; + + pipe->bind_sampler_state = aaline_bind_sampler_state; + pipe->set_sampler_texture = aaline_set_sampler_texture; + + add_aa_pipe_context(pipe, aaline); +} diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 4be38303169..a7f3b4aecb2 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -103,6 +103,8 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); draw->pipeline.cull->destroy( draw->pipeline.cull ); draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); @@ -239,6 +241,26 @@ draw_convert_wide_lines(struct draw_context *draw, boolean enable) } +/** + * The draw module may sometimes generate vertices with extra attributes + * (such as texcoords for AA lines). The driver can call this function + * to find those attributes. + */ +int +draw_find_vs_output(struct draw_context *draw, + uint semantic_name, uint semantic_index) +{ + /* XXX there may be more than one extra vertex attrib. + * For example, simulated gl_FragCoord and gl_PointCoord. + */ + if (draw->extra_vp_outputs.semantic_name == semantic_name && + draw->extra_vp_outputs.semantic_index == semantic_index) { + return draw->extra_vp_outputs.slot; + } + return 0; +} + + /** * Allocate space for temporary post-transform vertices, such as for clipping. */ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index ddeb184497a..82035aa8ada 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -41,6 +41,7 @@ #include "pipe/p_state.h" +struct pipe_context; struct vertex_buffer; struct vertex_info; struct draw_context; @@ -93,6 +94,20 @@ void draw_convert_wide_points(struct draw_context *draw, boolean enable); void draw_convert_wide_lines(struct draw_context *draw, boolean enable); +boolean draw_use_sse(struct draw_context *draw); + +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); + + +int +draw_find_vs_output(struct draw_context *draw, + uint semantic_name, uint semantic_index); + + +/* + * Vertex shader functions + */ struct draw_vertex_shader * draw_create_vertex_shader(struct draw_context *draw, @@ -102,7 +117,11 @@ void draw_bind_vertex_shader(struct draw_context *draw, void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs); -boolean draw_use_sse(struct draw_context *draw); + + +/* + * Vertex data functions + */ void draw_set_vertex_buffer(struct draw_context *draw, unsigned attr, diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 51e2242719a..dd9a8488637 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -121,11 +121,15 @@ static void draw_prim_queue_flush( struct draw_context *draw ) void draw_do_flush( struct draw_context *draw, unsigned flags ) { + static boolean flushing = FALSE; + if (0) debug_printf("Flushing with %d verts, %d prims\n", draw->vs.queue_nr, draw->pq.queue_nr ); + if (!flushing) { + flushing = TRUE; if (flags >= DRAW_FLUSH_SHADER_QUEUE) { if (draw->vs.queue_nr) @@ -146,6 +150,9 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) } } } + + flushing = FALSE; + } } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index bc11259cb21..dd75f9aefc3 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -48,6 +48,7 @@ #include "tgsi/exec/tgsi_exec.h" +struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; @@ -179,6 +180,7 @@ struct draw_context struct draw_stage *offset; struct draw_stage *unfilled; struct draw_stage *stipple; + struct draw_stage *aaline; struct draw_stage *wide; struct draw_stage *rasterize; } pipeline; @@ -212,9 +214,17 @@ struct draw_context unsigned nr_planes; boolean convert_wide_points; /**< convert wide points to tris? */ - boolean convert_wide_lines; /**< convert side lines to tris? */ + boolean convert_wide_lines; /**< convert wide lines to tris? */ boolean use_sse; + /* If a prim stage introduces new vertex attributes, they'll be stored here + */ + struct { + uint semantic_name; + uint semantic_index; + int slot; + } extra_vp_outputs; + unsigned reduced_prim; /** TGSI program interpreter runtime state */ diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 4375ebabbc4..97e40e372b8 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -58,7 +58,13 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) * shorter pipelines for lines & points. */ - if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines) || + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) { + draw->pipeline.aaline->next = next; + next = draw->pipeline.aaline; + } + + if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines + && !draw->rasterizer->line_smooth) || (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || draw->rasterizer->point_sprite) { draw->pipeline.wide->next = next; diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index c10ab396d8d..8bb62b2a0a5 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -10,6 +10,7 @@ DRIVER_SOURCES = \ util/tgsi_build.c \ util/tgsi_dump.c \ util/tgsi_parse.c \ + util/tgsi_transform.c \ util/tgsi_util.c C_SOURCES = \ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 5e98f190bbf..254c6adca44 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -327,6 +327,9 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_set_rasterize_stage(softpipe->draw, softpipe->setup); } + /* enable aaline stage */ + draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + sp_init_surface_functions(softpipe); return &softpipe->pipe; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 9d8fd8b750f..f9f2c5eaa8f 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -44,7 +44,8 @@ * condition that users shouldn't hit anyway. */ static int -find_vs_output(const struct pipe_shader_state *vs, +find_vs_output(struct softpipe_context *sp, + const struct pipe_shader_state *vs, uint semantic_name, uint semantic_index) { @@ -54,7 +55,9 @@ find_vs_output(const struct pipe_shader_state *vs, vs->output_semantic_index[i] == semantic_index) return i; } - return 0; + + /* See if the draw module is introducing a new attribute... */ + return draw_find_vs_output(sp->draw, semantic_name, semantic_index); } @@ -111,24 +114,24 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) int src; switch (fs->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); break; case TGSI_SEMANTIC_COLOR: - src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, + src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_COLOR, fs->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); + src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_GENERIC, fs->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; @@ -138,7 +141,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } } - softpipe->psize_slot = find_vs_output(vs, TGSI_SEMANTIC_PSIZE, 0); + softpipe->psize_slot = find_vs_output(softpipe, vs, TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, softpipe->psize_slot); -- cgit v1.2.3 From ae9931dad24703d99530b2761c759cef1cbc0fb5 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 18 Feb 2008 18:36:20 -0700 Subject: gallium: call draw_flush() for scissor/stipple state changes --- src/gallium/drivers/softpipe/sp_state_clip.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index c797c0dd3b1..d8ea9799577 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -68,6 +68,8 @@ void softpipe_set_scissor_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + memcpy( &softpipe->scissor, scissor, sizeof(*scissor) ); softpipe->dirty |= SP_NEW_SCISSOR; } @@ -78,6 +80,8 @@ void softpipe_set_polygon_stipple( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + memcpy( &softpipe->poly_stipple, stipple, sizeof(*stipple) ); softpipe->dirty |= SP_NEW_STIPPLE; } -- cgit v1.2.3 From 6c7f663cb96cac4873129f835614181405bd3f6e Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 18 Feb 2008 18:39:55 -0700 Subject: gallium: move draw_set_viewport_state() call, plus code clean-up, remove obsolete comments --- src/gallium/drivers/softpipe/sp_state_clip.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index d8ea9799577..4946c776e3e 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -42,24 +42,16 @@ void softpipe_set_clip_state( struct pipe_context *pipe, } - -/* Called when driver state tracker notices changes to the viewport - * matrix: - */ void softpipe_set_viewport_state( struct pipe_context *pipe, const struct pipe_viewport_state *viewport ) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->viewport = *viewport; /* struct copy */ - softpipe->dirty |= SP_NEW_VIEWPORT; - /* pass the viewport info to the draw module */ draw_set_viewport_state(softpipe->draw, viewport); - /* Using tnl/ and vf/ modules is temporary while getting started. - * Full pipe will have vertex shader, vertex fetch of its own. - */ + softpipe->viewport = *viewport; /* struct copy */ + softpipe->dirty |= SP_NEW_VIEWPORT; } @@ -70,7 +62,7 @@ void softpipe_set_scissor_state( struct pipe_context *pipe, draw_flush(softpipe->draw); - memcpy( &softpipe->scissor, scissor, sizeof(*scissor) ); + softpipe->scissor = *scissor; /* struct copy */ softpipe->dirty |= SP_NEW_SCISSOR; } @@ -82,6 +74,6 @@ void softpipe_set_polygon_stipple( struct pipe_context *pipe, draw_flush(softpipe->draw); - memcpy( &softpipe->poly_stipple, stipple, sizeof(*stipple) ); + softpipe->poly_stipple = *stipple; /* struct copy */ softpipe->dirty |= SP_NEW_STIPPLE; } -- cgit v1.2.3 From e279b1c57ac8d17703d80f1b644fd1d4f115101b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 18 Feb 2008 14:35:19 +0000 Subject: More llvm -> gallivm. Forgot this one on the last commit. --- src/gallium/drivers/cell/ppu/cell_state_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c index b2ed699a5be..f3958fa429e 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_fs.c +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -32,7 +32,7 @@ #include "draw/draw_context.h" #if 0 #include "pipe/p_shader_tokens.h" -#include "llvm/gallivm.h" +#include "gallivm/gallivm.h" #include "tgsi/util/tgsi_dump.h" #include "tgsi/exec/tgsi_sse2.h" #endif -- cgit v1.2.3 From f430d95a36d55141cd9ef911aab70364ce4a4108 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 19 Feb 2008 12:52:28 +0900 Subject: Use gallium's rtasm module. --- src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/auxiliary/draw/draw_vf.c | 10 +++------- src/gallium/auxiliary/draw/draw_vf_sse.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/mesa/state_tracker/st_program.h | 1 - 7 files changed, 8 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index dd75f9aefc3..6c7e860861e 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,7 +44,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_exec.h" diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index dc3a5ecd219..901ff20a7e7 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -30,6 +30,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" +#include "rtasm/rtasm_execmem.h" #include "draw_vf.h" @@ -37,11 +38,6 @@ #define DRAW_VF_DBG 0 -/* TODO: remove this */ -extern void -_mesa_exec_free( void *addr ); - - static boolean match_fastpath( struct draw_vertex_fetch *vf, const struct draw_vf_fastpath *fp) { @@ -414,12 +410,12 @@ void draw_vf_destroy( struct draw_vertex_fetch *vf ) FREE(fp->attr); /* KW: At the moment, fp->func is constrained to be allocated by - * _mesa_exec_alloc(), as the hardwired fastpaths in + * rtasm_exec_alloc(), as the hardwired fastpaths in * t_vertex_generic.c are handled specially. It would be nice * to unify them, but this probably won't change until this * module gets another overhaul. */ - //_mesa_exec_free((void *) fp->func); + //rtasm_exec_free((void *) fp->func); FREE(fp); } diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c index 1ad2ae756dd..1e889deeea8 100644 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ b/src/gallium/auxiliary/draw/draw_vf_sse.c @@ -35,7 +35,7 @@ #if defined(USE_SSE_ASM) -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "x86/common_x86_asm.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 27bc66812c3..11ef0c503dc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -41,7 +41,7 @@ #include "draw_private.h" #include "draw_context.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_sse2.h" diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 62c6a69c63f..29a7f842ed8 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -32,7 +32,7 @@ #include "tgsi_exec.h" #include "tgsi_sse2.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #if defined(__i386__) || defined(__386__) diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index d90066e0257..b18772f4e6d 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -42,7 +42,7 @@ #if defined(__i386__) || defined(__386__) -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" /* Surely this should be defined somewhere in a tgsi header: */ diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index ea1dde4a7a3..25cf3e94a8f 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -36,7 +36,6 @@ #include "mtypes.h" #include "pipe/p_shader_tokens.h" -#include "x86/rtasm/x86sse.h" #define ST_MAX_SHADER_TOKENS 1024 -- cgit v1.2.3 From 90b2beb661f630966788a6e909dc759c99e38973 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 19 Feb 2008 13:27:13 +0900 Subject: Simplify makefile boilerplate code. Don't define ASM_SOURCES variable globally -- reserve that variable to be defined locally by makefiles, together with C_SOURCES and CPP_SOURCES. --- configs/beos | 4 ++-- configs/default | 2 +- configs/freebsd-dri | 2 +- configs/freebsd-dri-amd64 | 4 ++-- configs/freebsd-dri-x86 | 4 ++-- configs/linux-directfb | 4 ++-- configs/linux-dri | 2 +- configs/linux-dri-ppc | 2 +- configs/linux-dri-x86 | 4 ++-- configs/linux-dri-x86-64 | 4 ++-- configs/linux-dri-xcb | 2 +- configs/linux-icc | 4 ++-- configs/linux-icc-static | 4 ++-- configs/linux-indirect | 2 +- configs/linux-solo | 2 +- configs/linux-solo-x86 | 4 ++-- configs/linux-sparc | 4 ++-- configs/linux-x86 | 4 ++-- configs/linux-x86-64 | 4 ++-- configs/linux-x86-glide | 4 ++-- configs/sunos5-gcc | 4 ++-- src/gallium/auxiliary/cso_cache/Makefile | 7 +------ src/gallium/auxiliary/draw/Makefile | 7 +------ src/gallium/auxiliary/pipebuffer/Makefile | 8 +------- src/gallium/auxiliary/rtasm/Makefile | 8 +------- src/gallium/auxiliary/tgsi/Makefile | 8 +------- src/gallium/auxiliary/util/Makefile | 8 +------- src/gallium/drivers/failover/Makefile | 9 +-------- src/gallium/drivers/i915simple/Makefile | 9 +-------- src/gallium/drivers/i965simple/Makefile | 25 +++++++------------------ src/gallium/drivers/softpipe/Makefile | 9 +-------- src/gallium/winsys/dri/Makefile.template | 5 +++-- src/glx/x11/Makefile | 6 +++--- src/mesa/sources | 6 +++--- 34 files changed, 60 insertions(+), 125 deletions(-) (limited to 'src/gallium/drivers') diff --git a/configs/beos b/configs/beos index 2b74af739d0..c6e972789a5 100644 --- a/configs/beos +++ b/configs/beos @@ -26,8 +26,8 @@ ifeq ($(CPU), x86) -DUSE_3DNOW_ASM \ -DUSE_SSE_ASM - ASM_SOURCES = $(X86_SOURCES) - ASM_API = $(X86_API) + MESA_ASM_SOURCES = $(X86_SOURCES) + GLAPI_ASM_SOURCES = $(X86_API) CC = gcc CXX = g++ diff --git a/configs/default b/configs/default index c9be5ec3e33..48ddd29282b 100644 --- a/configs/default +++ b/configs/default @@ -51,7 +51,7 @@ OSMESA_LIB_NAME = lib$(OSMESA_LIB).so # Optional assembly language optimization files for libGL -ASM_SOURCES = +MESA_ASM_SOURCES = # GLw widget sources (Append "GLwMDrawA.c" here and add -lXm to GLW_LIB_DEPS in # order to build the Motif widget too) diff --git a/configs/freebsd-dri b/configs/freebsd-dri index 67d253b8695..6fc1abbc802 100644 --- a/configs/freebsd-dri +++ b/configs/freebsd-dri @@ -22,7 +22,7 @@ CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) -Wmissing-prototypes -std=c99 - CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(DEFINES) -Wall -ansi -pedantic $(ASM_FLAGS) $(X11_INCLUDES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies LIBDRM_CFLAGS = `pkg-config --cflags libdrm` diff --git a/configs/freebsd-dri-amd64 b/configs/freebsd-dri-amd64 index 39341b9701a..bb6c361398a 100644 --- a/configs/freebsd-dri-amd64 +++ b/configs/freebsd-dri-amd64 @@ -6,5 +6,5 @@ include $(TOP)/configs/freebsd-dri CONFIG_NAME = freebsd-dri-x86-64 ASM_FLAGS = -DUSE_X86_64_ASM -ASM_SOURCES = $(X86-64_SOURCES) -ASM_API = $(X86-64_API) +MESA_ASM_SOURCES = $(X86-64_SOURCES) +GLAPI_ASM_SOURCES = $(X86-64_API) diff --git a/configs/freebsd-dri-x86 b/configs/freebsd-dri-x86 index af0d27ff47d..9475437fc5b 100644 --- a/configs/freebsd-dri-x86 +++ b/configs/freebsd-dri-x86 @@ -9,5 +9,5 @@ CONFIG_NAME = freebsd-dri-x86 PIC_FLAGS = ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-directfb b/configs/linux-directfb index dff27f78503..2ed94fe2750 100644 --- a/configs/linux-directfb +++ b/configs/linux-directfb @@ -17,8 +17,8 @@ HAVE_X86 = $(shell uname -m | grep 'i[3-6]86' >/dev/null && echo yes) ifeq ($(HAVE_X86), yes) CFLAGS += -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM CXXFLAGS += -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM - ASM_SOURCES = $(X86_SOURCES) - ASM_API = $(X86_API) + MESA_ASM_SOURCES = $(X86_SOURCES) + GLAPI_ASM_SOURCES = $(X86_API) endif # Directories diff --git a/configs/linux-dri b/configs/linux-dri index c45b600013c..67e60cbd4c5 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -33,7 +33,7 @@ CFLAGS = -Wall -Wmissing-prototypes -std=c99 -ffast-math \ CXXFLAGS = -Wall $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies EXTRA_LIB_PATH=-L/usr/X11R6/lib diff --git a/configs/linux-dri-ppc b/configs/linux-dri-ppc index fb87688065f..a3a3ca83cb3 100644 --- a/configs/linux-dri-ppc +++ b/configs/linux-dri-ppc @@ -9,7 +9,7 @@ OPT_FLAGS = -Os -mcpu=603 PIC_FLAGS = -fPIC ASM_FLAGS = -DUSE_PPC_ASM -DUSE_VMX_ASM -ASM_SOURCES = $(PPC_SOURCES) +MESA_ASM_SOURCES = $(PPC_SOURCES) # Build only the drivers for cards that exist on PowerPC. At some point MGA # will be added, but not yet. diff --git a/configs/linux-dri-x86 b/configs/linux-dri-x86 index b196004e58f..ec8242dd68f 100644 --- a/configs/linux-dri-x86 +++ b/configs/linux-dri-x86 @@ -12,6 +12,6 @@ PIC_FLAGS = ARCH_FLAGS = -m32 ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-dri-x86-64 b/configs/linux-dri-x86-64 index 821ab3e3366..bb56de375a1 100644 --- a/configs/linux-dri-x86-64 +++ b/configs/linux-dri-x86-64 @@ -8,8 +8,8 @@ CONFIG_NAME = linux-dri-x86-64 ARCH_FLAGS = -m64 ASM_FLAGS = -DUSE_X86_64_ASM -ASM_SOURCES = $(X86-64_SOURCES) -ASM_API = $(X86-64_API) +MESA_ASM_SOURCES = $(X86-64_SOURCES) +GLAPI_ASM_SOURCES = $(X86-64_API) LIB_DIR = lib64 diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb index ea4bdf1864c..fbf9b9b2687 100644 --- a/configs/linux-dri-xcb +++ b/configs/linux-dri-xcb @@ -33,7 +33,7 @@ CFLAGS = -Wall -Wmissing-prototypes $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) \ CXXFLAGS = -Wall $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies EXTRA_LIB_PATH=$(shell pkg-config --libs-only-L x11) diff --git a/configs/linux-icc b/configs/linux-icc index 978a45af70b..d90a1dab3d4 100644 --- a/configs/linux-icc +++ b/configs/linux-icc @@ -16,7 +16,7 @@ GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lpthread GLUT_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -lm -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-icc-static b/configs/linux-icc-static index 0c957568c22..384db3bfe48 100644 --- a/configs/linux-icc-static +++ b/configs/linux-icc-static @@ -23,5 +23,5 @@ GL_LIB_DEPS = GLUT_LIB_DEPS = APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm -lpthread -lcxa -lunwind -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-indirect b/configs/linux-indirect index bd33345ed70..0c4805ea87c 100644 --- a/configs/linux-indirect +++ b/configs/linux-indirect @@ -34,7 +34,7 @@ CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \ CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies EXTRA_LIB_PATH=-L/usr/X11R6/lib diff --git a/configs/linux-solo b/configs/linux-solo index d49b9722282..3145e127757 100644 --- a/configs/linux-solo +++ b/configs/linux-solo @@ -33,7 +33,7 @@ CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \ CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies DRI_LIB_DEPS = -lm -lpthread -lexpat -ldl -L$(TOP)/$(LIB_DIR) $(PCIACCESS_LIB) diff --git a/configs/linux-solo-x86 b/configs/linux-solo-x86 index 13cab376587..5f5aa09c826 100644 --- a/configs/linux-solo-x86 +++ b/configs/linux-solo-x86 @@ -9,5 +9,5 @@ CONFIG_NAME = linux-solo-x86 PIC_FLAGS = ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-sparc b/configs/linux-sparc index 9925afc19b6..346d438c283 100644 --- a/configs/linux-sparc +++ b/configs/linux-sparc @@ -5,5 +5,5 @@ include $(TOP)/configs/linux CONFIG_NAME = linux-sparc #ASM_FLAGS = -DUSE_SPARC_ASM -#ASM_SOURCES = $(SPARC_SOURCES) -#ASM_API = $(SPARC_API) +#MESA_ASM_SOURCES = $(SPARC_SOURCES) +#GLAPI_ASM_SOURCES = $(SPARC_API) diff --git a/configs/linux-x86 b/configs/linux-x86 index 18fa06101de..a4cf4e8d624 100644 --- a/configs/linux-x86 +++ b/configs/linux-x86 @@ -5,5 +5,5 @@ include $(TOP)/configs/linux CONFIG_NAME = linux-x86 ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-x86-64 b/configs/linux-x86-64 index 67c03918362..c2441e09d07 100644 --- a/configs/linux-x86-64 +++ b/configs/linux-x86-64 @@ -6,8 +6,8 @@ CONFIG_NAME = linux-x86-64 ARCH_FLAGS = -m64 -ASM_SOURCES = $(X86-64_SOURCES) -ASM_API = $(X86-64_API) +MESA_ASM_SOURCES = $(X86-64_SOURCES) +GLAPI_ASM_SOURCES = $(X86-64_API) ASM_FLAGS = -DUSE_X86_64_ASM LIB_DIR = lib64 diff --git a/configs/linux-x86-glide b/configs/linux-x86-glide index f2f8aeea60e..b963fbdc66b 100644 --- a/configs/linux-x86-glide +++ b/configs/linux-x86-glide @@ -15,8 +15,8 @@ CXXFLAGS = -Wall -O3 -ansi -pedantic -fPIC -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199 GLUT_CFLAGS = -fexceptions -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) # Library/program dependencies GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -L/usr/local/glide/lib -lglide3x -lm -lpthread diff --git a/configs/sunos5-gcc b/configs/sunos5-gcc index 77b293c5452..3fa13d0496f 100644 --- a/configs/sunos5-gcc +++ b/configs/sunos5-gcc @@ -16,8 +16,8 @@ ARCH_FLAGS ?= DEFINES = -D_REENTRANT -DUSE_XSHM -ASM_SOURCES = $(SPARC_SOURCES) -ASM_API = $(SPARC_API) +MESA_ASM_SOURCES = $(SPARC_SOURCES) +GLAPI_ASM_SOURCES = $(SPARC_API) ASM_FLAGS = -DUSE_SPARC_ASM CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \ diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile index 8248b097fde..3e49266163b 100644 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -3,15 +3,10 @@ include $(TOP)/configs/current LIBNAME = cso_cache -DRIVER_SOURCES = \ +C_SOURCES = \ cso_cache.c \ cso_hash.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index c8000cbe9cb..1ee9eca0ca5 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -3,7 +3,7 @@ include $(TOP)/configs/current LIBNAME = draw -DRIVER_SOURCES = \ +C_SOURCES = \ draw_aaline.c \ draw_clip.c \ draw_vs_exec.c \ @@ -29,11 +29,6 @@ DRIVER_SOURCES = \ draw_vf_sse.c \ draw_wide_prims.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 588629e8701..a9fa518c674 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = pipebuffer -DRIVER_SOURCES = \ +C_SOURCES = \ pb_buffer_fenced.c \ pb_buffer_malloc.c \ pb_bufmgr_fenced.c \ @@ -12,11 +11,6 @@ DRIVER_SOURCES = \ pb_bufmgr_pool.c \ pb_winsys.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index edfae2a204f..bc339d2aa6e 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -1,18 +1,12 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = rtasm -DRIVER_SOURCES = \ +C_SOURCES = \ rtasm_execmem.c \ rtasm_x86sse.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 8bb62b2a0a5..71f64b747c2 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = tgsi -DRIVER_SOURCES = \ +C_SOURCES = \ exec/tgsi_exec.c \ exec/tgsi_sse2.c \ util/tgsi_build.c \ @@ -13,11 +12,6 @@ DRIVER_SOURCES = \ util/tgsi_transform.c \ util/tgsi_util.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 7cc2aa44f96..906a46d6b4e 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -1,20 +1,14 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = util -DRIVER_SOURCES = \ +C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ u_mm.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile index 14389bd0551..f08b8df07a1 100644 --- a/src/gallium/drivers/failover/Makefile +++ b/src/gallium/drivers/failover/Makefile @@ -1,20 +1,13 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = failover -DRIVER_SOURCES = \ +C_SOURCES = \ fo_state.c \ fo_state_emit.c \ fo_context.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index ee22ba86f9b..2a75f5d57ce 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = i915simple -DRIVER_SOURCES = \ +C_SOURCES = \ i915_blit.c \ i915_clear.c \ i915_flush.c \ @@ -26,12 +25,6 @@ DRIVER_SOURCES = \ i915_fpc_translate.c \ i915_surface.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 1dec1f97495..cc8580836ce 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -1,14 +1,13 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = i965simple -DRIVER_SOURCES = \ - brw_blit.c \ - brw_flush.c \ - brw_strings.c \ - brw_surface.c \ +C_SOURCES = \ + brw_blit.c \ + brw_flush.c \ + brw_strings.c \ + brw_surface.c \ brw_cc.c \ brw_clip.c \ brw_clip_line.c \ @@ -31,8 +30,8 @@ DRIVER_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ - brw_shader_info.c \ - brw_state.c \ + brw_shader_info.c \ + brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ brw_state_pool.c \ @@ -51,16 +50,6 @@ DRIVER_SOURCES = \ brw_wm_state.c \ brw_wm_surface_state.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(COMMON_BM_SOURCES) \ - $(MINIGLX_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -DRIVER_DEFINES = -I. - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 5479daf8ea2..539ffb77f55 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = softpipe -DRIVER_SOURCES = \ +C_SOURCES = \ sp_fs_exec.c \ sp_fs_sse.c \ sp_fs_llvm.c \ @@ -41,12 +40,6 @@ DRIVER_SOURCES = \ sp_tile_cache.c \ sp_surface.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template index 65a93bd53e3..3bc1fdd4d40 100644 --- a/src/gallium/winsys/dri/Makefile.template +++ b/src/gallium/winsys/dri/Makefile.template @@ -25,8 +25,9 @@ WINOBJ= WINLIB= INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) -OBJECTS = $(C_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) +OBJECTS = \ + $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) else # miniglx diff --git a/src/glx/x11/Makefile b/src/glx/x11/Makefile index 5f74fcff060..b404727f08b 100644 --- a/src/glx/x11/Makefile +++ b/src/glx/x11/Makefile @@ -35,7 +35,7 @@ SOURCES = \ include $(TOP)/src/mesa/sources -MESA_ASM_API = $(addprefix $(TOP)/src/mesa/, $(ASM_API)) +MESA_GLAPI_ASM_SOURCES = $(addprefix $(TOP)/src/mesa/, $(GLAPI_ASM_SOURCES)) MESA_GLAPI_SOURCES = $(addprefix $(TOP)/src/mesa/, $(GLAPI_SOURCES)) MESA_GLAPI_OBJECTS = $(addprefix $(TOP)/src/mesa/, $(GLAPI_OBJECTS)) @@ -70,11 +70,11 @@ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) Makefile -install $(TOP)/$(LIB_DIR) $(GL_LIB_DEPS) $(OBJECTS) -depend: $(SOURCES) $(MESA_GLAPI_SOURCES) $(MESA_ASM_API) Makefile +depend: $(SOURCES) $(MESA_GLAPI_SOURCES) $(MESA_GLAPI_ASM_SOURCES) Makefile rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(SOURCES) \ - $(MESA_GLAPI_SOURCES) $(MESA_ASM_API) + $(MESA_GLAPI_SOURCES) $(MESA_GLAPI_ASM_SOURCES) # Emacs tags diff --git a/src/mesa/sources b/src/mesa/sources index 0d185fd5f3f..9e56694893e 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -320,7 +320,7 @@ FBDEV_DRIVER_SOURCES = \ ALL_SOURCES = \ $(GLAPI_SOURCES) \ $(SOLO_SOURCES) \ - $(ASM_SOURCES) \ + $(MESA_ASM_SOURCES) \ $(COMMON_DRIVER_SOURCES)\ $(X11_DRIVER_SOURCES) \ $(FBDEV_DRIVER_SOURCES) \ @@ -353,11 +353,11 @@ CORE_SOURCES = \ SOLO_OBJECTS = \ $(SOLO_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) + $(MESA_ASM_SOURCES:.S=.o) GLAPI_OBJECTS = \ $(GLAPI_SOURCES:.c=.o) \ - $(ASM_API:.S=.o) + $(GLAPI_ASM_SOURCES:.S=.o) CORE_OBJECTS = $(SOLO_OBJECTS) $(GLAPI_OBJECTS) -- cgit v1.2.3 From 5d78212d752e021555356bbb9cc5993ad6d9e847 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 19 Feb 2008 14:00:16 +0900 Subject: Bring in ppc spe rtasm into gallium's rtasm module. Moving files since these are not being used outside gallium. --- src/gallium/auxiliary/rtasm/Makefile | 3 +- src/gallium/auxiliary/rtasm/SConscript | 3 +- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 386 +++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 314 ++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_context.h | 2 +- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 2 +- src/mesa/ppc/rtasm/spe_asm.c | 385 ---------------------- src/mesa/ppc/rtasm/spe_asm.h | 314 ------------------ src/mesa/sources | 1 - 9 files changed, 706 insertions(+), 704 deletions(-) create mode 100644 src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c create mode 100644 src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h delete mode 100644 src/mesa/ppc/rtasm/spe_asm.c delete mode 100644 src/mesa/ppc/rtasm/spe_asm.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index 9b972f8f861..39b8a4dbd7a 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -6,7 +6,8 @@ LIBNAME = rtasm C_SOURCES = \ rtasm_cpu.c \ rtasm_execmem.c \ - rtasm_x86sse.c + rtasm_x86sse.c \ + rtasm_ppc_spe.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index ac41a4f2122..8ea25922aa1 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -5,7 +5,8 @@ rtasm = env.ConvenienceLibrary( source = [ 'rtasm_cpu.c', 'rtasm_execmem.c', - 'rtasm_x86sse.c' + 'rtasm_x86sse.c', + 'rtasm_ppc_spe.c', ]) auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c new file mode 100644 index 00000000000..95a2d6fcbbe --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -0,0 +1,386 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "rtasm_ppc_spe.h" + +#ifdef GALLIUM_CELL +/** + * SPE instruction types + * + * There are 6 primary instruction encodings used on the Cell's SPEs. Each of + * the following unions encodes one type. + * + * \bug + * If, at some point, we start generating SPE code from a little-endian host + * these unions will not work. + */ +/*@{*/ +/** + * Encode one output register with two input registers + */ +union spe_inst_RR { + uint32_t bits; + struct { + unsigned op:11; + unsigned rB:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with three input registers + */ +union spe_inst_RRR { + uint32_t bits; + struct { + unsigned op:4; + unsigned rT:7; + unsigned rB:7; + unsigned rA:7; + unsigned rC:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 7-bit signed immed + */ +union spe_inst_RI7 { + uint32_t bits; + struct { + unsigned op:11; + unsigned i7:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and an 8-bit signed immed + */ +union spe_inst_RI8 { + uint32_t bits; + struct { + unsigned op:10; + unsigned i8:8; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 10-bit signed immed + */ +union spe_inst_RI10 { + uint32_t bits; + struct { + unsigned op:8; + unsigned i10:10; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 16-bit signed immediate + */ +union spe_inst_RI16 { + uint32_t bits; + struct { + unsigned op:9; + unsigned i16:16; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 18-bit signed immediate + */ +union spe_inst_RI18 { + uint32_t bits; + struct { + unsigned op:7; + unsigned i18:18; + unsigned rT:7; + } inst; +}; +/*@}*/ + + +static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB) +{ + union spe_inst_RR inst; + inst.inst.op = op; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB, unsigned rC) +{ + union spe_inst_RRR inst; + inst.inst.op = op; + inst.inst.rT = rT; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rC = rC; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI7 inst; + inst.inst.op = op; + inst.inst.i7 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + +static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI8 inst; + inst.inst.op = op; + inst.inst.i8 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + +static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI10 inst; + inst.inst.op = op; + inst.inst.i10 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, + int imm) +{ + union spe_inst_RI16 inst; + inst.inst.op = op; + inst.inst.i16 = imm; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, + int imm) +{ + union spe_inst_RI18 inst; + inst.inst.op = op; + inst.inst.i18 = imm; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + + +#define EMIT_(_name, _op) \ +void _name (struct spe_function *p, unsigned rT) \ +{ \ + emit_RR(p, _op, rT, 0, 0); \ +} + +#define EMIT_R(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA) \ +{ \ + emit_RR(p, _op, rT, rA, 0); \ +} + +#define EMIT_RR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ +{ \ + emit_RR(p, _op, rT, rA, rB); \ +} + +#define EMIT_RRR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ +{ \ + emit_RRR(p, _op, rT, rA, rB, rC); \ +} + +#define EMIT_RI7(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI7(p, _op, rT, rA, imm); \ +} + +#define EMIT_RI8(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI8(p, _op, rT, rA, 155 - imm); \ +} + +#define EMIT_RI10(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI10(p, _op, rT, rA, imm); \ +} + +#define EMIT_RI16(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI16(p, _op, rT, imm); \ +} + +#define EMIT_RI18(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI18(p, _op, rT, imm); \ +} + +#define EMIT_I16(_name, _op) \ +void _name (struct spe_function *p, int imm) \ +{ \ + emit_RI16(p, _op, 0, imm); \ +} + +#include "rtasm_ppc_spe.h" + + +/* + */ +void spe_init_func(struct spe_function *p, unsigned code_size) +{ + p->store = align_malloc(code_size, 16); + p->csr = p->store; +} + + +void spe_release_func(struct spe_function *p) +{ + align_free(p->store); + p->store = NULL; + p->csr = NULL; +} + + +void spe_bi(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); +} + +void spe_iret(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); +} + +void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); +} + +void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); +} + +void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); +} + + +/* Hint-for-branch instructions + */ +#if 0 +hbr; +hbra; +hbrr; +#endif + + +/* Control instructions + */ +#if 0 +stop; +EMIT_RR (spe_stopd, 0x140); +EMIT_ (spe_lnop, 0x001); +EMIT_ (spe_nop, 0x201); +sync; +EMIT_ (spe_dsync, 0x003); +EMIT_R (spe_mfspr, 0x00c); +EMIT_R (spe_mtspr, 0x10c); +#endif + +#endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h new file mode 100644 index 00000000000..10ce44b3a08 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -0,0 +1,314 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#ifndef RTASM_PPC_SPE_H +#define RTASM_PPC_SPE_H + +struct spe_function { + /** + * + */ + uint32_t *store; + uint32_t *csr; + const char *fn; +}; + +extern void spe_init_func(struct spe_function *p, unsigned code_size); +extern void spe_release_func(struct spe_function *p); + +#endif /* RTASM_PPC_SPE_H */ + +#ifndef EMIT_ +#define EMIT_(name, _op) \ + extern void _name (struct spe_function *p, unsigned rT) +#define EMIT_R(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA) +#define EMIT_RR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB) +#define EMIT_RRR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB, unsigned rC) +#define EMIT_RI7(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI8(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI10(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI16(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_RI18(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_I16(_name, _op) \ + extern void _name (struct spe_function *p, int imm) +#define UNDEF_EMIT_MACROS +#endif /* EMIT_ */ + + +/* Memory load / store instructions + */ +EMIT_RI10(spe_lqd, 0x034); +EMIT_RR (spe_lqx, 0x1c4); +EMIT_RI16(spe_lqa, 0x061); +EMIT_RI16(spe_lqr, 0x067); +EMIT_RI10(spe_stqd, 0x024); +EMIT_RR (spe_stqx, 0x144); +EMIT_RI16(spe_stqa, 0x041); +EMIT_RI16(spe_stqr, 0x047); +EMIT_RI7 (spe_cbd, 0x1f4); +EMIT_RR (spe_cbx, 0x1d4); +EMIT_RI7 (spe_chd, 0x1f5); +EMIT_RI7 (spe_chx, 0x1d5); +EMIT_RI7 (spe_cwd, 0x1f6); +EMIT_RI7 (spe_cwx, 0x1d6); +EMIT_RI7 (spe_cdd, 0x1f7); +EMIT_RI7 (spe_cdx, 0x1d7); + + +/* Constant formation instructions + */ +EMIT_RI16(spe_ilh, 0x083); +EMIT_RI16(spe_ilhu, 0x082); +EMIT_RI16(spe_il, 0x081); +EMIT_RI18(spe_ila, 0x021); +EMIT_RI16(spe_iohl, 0x0c1); +EMIT_RI16(spe_fsmbi, 0x0c5); + + +/* Integer and logical instructions + */ +EMIT_RR (spe_ah, 0x0c8); +EMIT_RI10(spe_ahi, 0x01d); +EMIT_RR (spe_a, 0x0c0); +EMIT_RI10(spe_ai, 0x01c); +EMIT_RR (spe_sfh, 0x048); +EMIT_RI10(spe_sfhi, 0x00d); +EMIT_RR (spe_sf, 0x040); +EMIT_RI10(spe_sfi, 0x00c); +EMIT_RR (spe_addx, 0x340); +EMIT_RR (spe_cg, 0x0c2); +EMIT_RR (spe_cgx, 0x342); +EMIT_RR (spe_sfx, 0x341); +EMIT_RR (spe_bg, 0x042); +EMIT_RR (spe_bgx, 0x343); +EMIT_RR (spe_mpy, 0x3c4); +EMIT_RR (spe_mpyu, 0x3cc); +EMIT_RI10(spe_mpyi, 0x074); +EMIT_RI10(spe_mpyui, 0x075); +EMIT_RRR (spe_mpya, 0x00c); +EMIT_RR (spe_mpyh, 0x3c5); +EMIT_RR (spe_mpys, 0x3c7); +EMIT_RR (spe_mpyhh, 0x3c6); +EMIT_RR (spe_mpyhha, 0x346); +EMIT_RR (spe_mpyhhu, 0x3ce); +EMIT_RR (spe_mpyhhau, 0x34e); +EMIT_R (spe_clz, 0x2a5); +EMIT_R (spe_cntb, 0x2b4); +EMIT_R (spe_fsmb, 0x1b6); +EMIT_R (spe_fsmh, 0x1b5); +EMIT_R (spe_fsm, 0x1b4); +EMIT_R (spe_gbb, 0x1b2); +EMIT_R (spe_gbh, 0x1b1); +EMIT_R (spe_gb, 0x1b0); +EMIT_RR (spe_avgb, 0x0d3); +EMIT_RR (spe_absdb, 0x053); +EMIT_RR (spe_sumb, 0x253); +EMIT_R (spe_xsbh, 0x2b6); +EMIT_R (spe_xshw, 0x2ae); +EMIT_R (spe_xswd, 0x2a6); +EMIT_RR (spe_and, 0x0c1); +EMIT_RR (spe_andc, 0x2c1); +EMIT_RI10(spe_andbi, 0x016); +EMIT_RI10(spe_andhi, 0x015); +EMIT_RI10(spe_andi, 0x014); +EMIT_RR (spe_or, 0x041); +EMIT_RR (spe_orc, 0x2c9); +EMIT_RI10(spe_orbi, 0x006); +EMIT_RI10(spe_orhi, 0x005); +EMIT_RI10(spe_ori, 0x004); +EMIT_R (spe_orx, 0x1f0); +EMIT_RR (spe_xor, 0x241); +EMIT_RI10(spe_xorbi, 0x026); +EMIT_RI10(spe_xorhi, 0x025); +EMIT_RI10(spe_xori, 0x024); +EMIT_RR (spe_nand, 0x0c9); +EMIT_RR (spe_nor, 0x049); +EMIT_RR (spe_eqv, 0x249); +EMIT_RRR (spe_selb, 0x008); +EMIT_RRR (spe_shufb, 0x00b); + + +/* Shift and rotate instructions + */ +EMIT_RR (spe_shlh, 0x05f); +EMIT_RI7 (spe_shlhi, 0x07f); +EMIT_RR (spe_shl, 0x05b); +EMIT_RI7 (spe_shli, 0x07b); +EMIT_RR (spe_shlqbi, 0x1db); +EMIT_RI7 (spe_shlqbii, 0x1fb); +EMIT_RR (spe_shlqby, 0x1df); +EMIT_RI7 (spe_shlqbyi, 0x1ff); +EMIT_RR (spe_shlqbybi, 0x1cf); +EMIT_RR (spe_roth, 0x05c); +EMIT_RI7 (spe_rothi, 0x07c); +EMIT_RR (spe_rot, 0x058); +EMIT_RI7 (spe_roti, 0x078); +EMIT_RR (spe_rotqby, 0x1dc); +EMIT_RI7 (spe_rotqbyi, 0x1fc); +EMIT_RR (spe_rotqbybi, 0x1cc); +EMIT_RR (spe_rotqbi, 0x1d8); +EMIT_RI7 (spe_rotqbii, 0x1f8); +EMIT_RR (spe_rothm, 0x05d); +EMIT_RI7 (spe_rothmi, 0x07d); +EMIT_RR (spe_rotm, 0x059); +EMIT_RI7 (spe_rotmi, 0x079); +EMIT_RR (spe_rotqmby, 0x1dd); +EMIT_RI7 (spe_rotqmbyi, 0x1fd); +EMIT_RR (spe_rotqmbybi, 0x1cd); +EMIT_RR (spe_rotqmbi, 0x1c9); +EMIT_RI7 (spe_rotqmbii, 0x1f9); +EMIT_RR (spe_rotmah, 0x05e); +EMIT_RI7 (spe_rotmahi, 0x07e); +EMIT_RR (spe_rotma, 0x05a); +EMIT_RI7 (spe_rotmai, 0x07a); + + +/* Compare, branch, and halt instructions + */ +EMIT_RR (spe_heq, 0x3d8); +EMIT_RI10(spe_heqi, 0x07f); +EMIT_RR (spe_hgt, 0x258); +EMIT_RI10(spe_hgti, 0x04f); +EMIT_RR (spe_hlgt, 0x2d8); +EMIT_RI10(spe_hlgti, 0x05f); +EMIT_RR (spe_ceqb, 0x3d0); +EMIT_RI10(spe_ceqbi, 0x07e); +EMIT_RR (spe_ceqh, 0x3c8); +EMIT_RI10(spe_ceqhi, 0x07d); +EMIT_RR (spe_ceq, 0x3c0); +EMIT_RI10(spe_ceqi, 0x07c); +EMIT_RR (spe_cgtb, 0x250); +EMIT_RI10(spe_cgtbi, 0x04e); +EMIT_RR (spe_cgth, 0x248); +EMIT_RI10(spe_cgthi, 0x04d); +EMIT_RR (spe_cgt, 0x240); +EMIT_RI10(spe_cgti, 0x04c); +EMIT_RR (spe_clgtb, 0x2d0); +EMIT_RI10(spe_clgtbi, 0x05e); +EMIT_RR (spe_clgth, 0x2c8); +EMIT_RI10(spe_clgthi, 0x05d); +EMIT_RR (spe_clgt, 0x2c0); +EMIT_RI10(spe_clgti, 0x05c); +EMIT_I16 (spe_br, 0x064); +EMIT_I16 (spe_bra, 0x060); +EMIT_RI16(spe_brsl, 0x066); +EMIT_RI16(spe_brasl, 0x062); +EMIT_RI16(spe_brnz, 0x042); +EMIT_RI16(spe_brz, 0x040); +EMIT_RI16(spe_brhnz, 0x046); +EMIT_RI16(spe_brhz, 0x044); + +extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); + + +/* Floating-point instructions + */ +EMIT_RR (spe_fa, 0x2c4); +EMIT_RR (spe_dfa, 0x2cc); +EMIT_RR (spe_fs, 0x2c5); +EMIT_RR (spe_dfs, 0x2cd); +EMIT_RR (spe_fm, 0x2c6); +EMIT_RR (spe_dfm, 0x2ce); +EMIT_RRR (spe_fma, 0x00e); +EMIT_RR (spe_dfma, 0x35c); +EMIT_RRR (spe_fnms, 0x00d); +EMIT_RR (spe_dfnms, 0x35e); +EMIT_RRR (spe_fms, 0x00f); +EMIT_RR (spe_dfms, 0x35d); +EMIT_RR (spe_dfnma, 0x35f); +EMIT_R (spe_frest, 0x1b8); +EMIT_R (spe_frsqest, 0x1b9); +EMIT_RR (spe_fi, 0x3d4); +EMIT_RI8 (spe_csflt, 0x1da); +EMIT_RI8 (spe_cflts, 0x1d8); +EMIT_RI8 (spe_cuflt, 0x1db); +EMIT_RI8 (spe_cfltu, 0x1d9); +EMIT_R (spe_frds, 0x3b9); +EMIT_R (spe_fesd, 0x3b8); +EMIT_RR (spe_dfceq, 0x3c3); +EMIT_RR (spe_dfcmeq, 0x3cb); +EMIT_RR (spe_dfcgt, 0x2c3); +EMIT_RR (spe_dfcmgt, 0x2cb); +EMIT_RI7 (spe_dftsv, 0x3bf); +EMIT_RR (spe_fceq, 0x3c2); +EMIT_RR (spe_fcmeq, 0x3ca); +EMIT_RR (spe_fcgt, 0x2c2); +EMIT_RR (spe_fcmgt, 0x2ca); +EMIT_R (spe_fscrwr, 0x3ba); +EMIT_ (spe_fscrrd, 0x398); + + +/* Channel instructions + */ +EMIT_R (spe_rdch, 0x00d); +EMIT_R (spe_rdchcnt, 0x00f); +EMIT_R (spe_wrch, 0x10d); + + +#ifdef UNDEF_EMIT_MACROS +#undef EMIT_ +#undef EMIT_R +#undef EMIT_RR +#undef EMIT_RRR +#undef EMIT_RI7 +#undef EMIT_RI8 +#undef EMIT_RI10 +#undef EMIT_RI16 +#undef EMIT_RI18 +#undef EMIT_I16 +#undef UNDEF_EMIT_MACROS +#endif /* EMIT_ */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 91f8e542a25..3b687bb8689 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -36,7 +36,7 @@ #include "draw/draw_vbuf.h" #include "cell_winsys.h" #include "cell/common.h" -#include "ppc/rtasm/spe_asm.h" +#include "rtasm/rtasm_ppc_spe.h" struct cell_vbuf_render; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index f10689a959e..9cf74bab477 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -31,7 +31,7 @@ #include "../auxiliary/draw/draw_private.h" #include "cell_context.h" -#include "ppc/rtasm/spe_asm.h" +#include "rtasm/rtasm_ppc_spe.h" typedef uint64_t register_mask; diff --git a/src/mesa/ppc/rtasm/spe_asm.c b/src/mesa/ppc/rtasm/spe_asm.c deleted file mode 100644 index 10376372502..00000000000 --- a/src/mesa/ppc/rtasm/spe_asm.c +++ /dev/null @@ -1,385 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file spe_asm.c - * Real-time assembly generation interface for Cell B.E. SPEs. - * - * \author Ian Romanick <idr@us.ibm.com> - */ -#ifdef GALLIUM_CELL -#include <inttypes.h> -#include <imports.h> -#include "spe_asm.h" - -/** - * SPE instruction types - * - * There are 6 primary instruction encodings used on the Cell's SPEs. Each of - * the following unions encodes one type. - * - * \bug - * If, at some point, we start generating SPE code from a little-endian host - * these unions will not work. - */ -/*@{*/ -/** - * Encode one output register with two input registers - */ -union spe_inst_RR { - uint32_t bits; - struct { - unsigned op:11; - unsigned rB:7; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with three input registers - */ -union spe_inst_RRR { - uint32_t bits; - struct { - unsigned op:4; - unsigned rT:7; - unsigned rB:7; - unsigned rA:7; - unsigned rC:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and a 7-bit signed immed - */ -union spe_inst_RI7 { - uint32_t bits; - struct { - unsigned op:11; - unsigned i7:7; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and an 8-bit signed immed - */ -union spe_inst_RI8 { - uint32_t bits; - struct { - unsigned op:10; - unsigned i8:8; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and a 10-bit signed immed - */ -union spe_inst_RI10 { - uint32_t bits; - struct { - unsigned op:8; - unsigned i10:10; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with a 16-bit signed immediate - */ -union spe_inst_RI16 { - uint32_t bits; - struct { - unsigned op:9; - unsigned i16:16; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with a 18-bit signed immediate - */ -union spe_inst_RI18 { - uint32_t bits; - struct { - unsigned op:7; - unsigned i18:18; - unsigned rT:7; - } inst; -}; -/*@}*/ - - -static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB) -{ - union spe_inst_RR inst; - inst.inst.op = op; - inst.inst.rB = rB; - inst.inst.rA = rA; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - -static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB, unsigned rC) -{ - union spe_inst_RRR inst; - inst.inst.op = op; - inst.inst.rT = rT; - inst.inst.rB = rB; - inst.inst.rA = rA; - inst.inst.rC = rC; - *p->csr = inst.bits; - p->csr++; -} - - -static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) -{ - union spe_inst_RI7 inst; - inst.inst.op = op; - inst.inst.i7 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - - -static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) -{ - union spe_inst_RI8 inst; - inst.inst.op = op; - inst.inst.i8 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - - -static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) -{ - union spe_inst_RI10 inst; - inst.inst.op = op; - inst.inst.i10 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - -static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, - int imm) -{ - union spe_inst_RI16 inst; - inst.inst.op = op; - inst.inst.i16 = imm; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - -static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, - int imm) -{ - union spe_inst_RI18 inst; - inst.inst.op = op; - inst.inst.i18 = imm; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - - - -#define EMIT_(_name, _op) \ -void _name (struct spe_function *p, unsigned rT) \ -{ \ - emit_RR(p, _op, rT, 0, 0); \ -} - -#define EMIT_R(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA) \ -{ \ - emit_RR(p, _op, rT, rA, 0); \ -} - -#define EMIT_RR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ -{ \ - emit_RR(p, _op, rT, rA, rB); \ -} - -#define EMIT_RRR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ -{ \ - emit_RRR(p, _op, rT, rA, rB, rC); \ -} - -#define EMIT_RI7(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ -{ \ - emit_RI7(p, _op, rT, rA, imm); \ -} - -#define EMIT_RI8(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ -{ \ - emit_RI8(p, _op, rT, rA, 155 - imm); \ -} - -#define EMIT_RI10(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ -{ \ - emit_RI10(p, _op, rT, rA, imm); \ -} - -#define EMIT_RI16(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ -{ \ - emit_RI16(p, _op, rT, imm); \ -} - -#define EMIT_RI18(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ -{ \ - emit_RI18(p, _op, rT, imm); \ -} - -#define EMIT_I16(_name, _op) \ -void _name (struct spe_function *p, int imm) \ -{ \ - emit_RI16(p, _op, 0, imm); \ -} - -#include "spe_asm.h" - - -/* - */ -void spe_init_func(struct spe_function *p, unsigned code_size) -{ - p->store = _mesa_align_malloc(code_size, 16); - p->csr = p->store; -} - - -void spe_release_func(struct spe_function *p) -{ - _mesa_align_free(p->store); - p->store = NULL; - p->csr = NULL; -} - - -void spe_bi(struct spe_function *p, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); -} - -void spe_iret(struct spe_function *p, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); -} - -void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, - int e) -{ - emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); -} - -void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, - int e) -{ - emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); -} - -void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, - int e) -{ - emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); -} - -void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); -} - -void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); -} - -void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); -} - - -/* Hint-for-branch instructions - */ -#if 0 -hbr; -hbra; -hbrr; -#endif - - -/* Control instructions - */ -#if 0 -stop; -EMIT_RR (spe_stopd, 0x140); -EMIT_ (spe_lnop, 0x001); -EMIT_ (spe_nop, 0x201); -sync; -EMIT_ (spe_dsync, 0x003); -EMIT_R (spe_mfspr, 0x00c); -EMIT_R (spe_mtspr, 0x10c); -#endif - -#endif /* GALLIUM_CELL */ diff --git a/src/mesa/ppc/rtasm/spe_asm.h b/src/mesa/ppc/rtasm/spe_asm.h deleted file mode 100644 index 6d69ae655d1..00000000000 --- a/src/mesa/ppc/rtasm/spe_asm.h +++ /dev/null @@ -1,314 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file spe_asm.h - * Real-time assembly generation interface for Cell B.E. SPEs. - * - * \author Ian Romanick <idr@us.ibm.com> - */ - -#ifndef SPE_ASM_H -#define SPE_ASM_H - -struct spe_function { - /** - * - */ - uint32_t *store; - uint32_t *csr; - const char *fn; -}; - -extern void spe_init_func(struct spe_function *p, unsigned code_size); -extern void spe_release_func(struct spe_function *p); - -#endif /* SPE_ASM_H */ - -#ifndef EMIT_ -#define EMIT_(name, _op) \ - extern void _name (struct spe_function *p, unsigned rT) -#define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA) -#define EMIT_RR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB) -#define EMIT_RRR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB, unsigned rC) -#define EMIT_RI7(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) -#define EMIT_RI8(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) -#define EMIT_RI10(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) -#define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) -#define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) -#define EMIT_I16(_name, _op) \ - extern void _name (struct spe_function *p, int imm) -#define UNDEF_EMIT_MACROS -#endif /* EMIT_ */ - - -/* Memory load / store instructions - */ -EMIT_RI10(spe_lqd, 0x034); -EMIT_RR (spe_lqx, 0x1c4); -EMIT_RI16(spe_lqa, 0x061); -EMIT_RI16(spe_lqr, 0x067); -EMIT_RI10(spe_stqd, 0x024); -EMIT_RR (spe_stqx, 0x144); -EMIT_RI16(spe_stqa, 0x041); -EMIT_RI16(spe_stqr, 0x047); -EMIT_RI7 (spe_cbd, 0x1f4); -EMIT_RR (spe_cbx, 0x1d4); -EMIT_RI7 (spe_chd, 0x1f5); -EMIT_RI7 (spe_chx, 0x1d5); -EMIT_RI7 (spe_cwd, 0x1f6); -EMIT_RI7 (spe_cwx, 0x1d6); -EMIT_RI7 (spe_cdd, 0x1f7); -EMIT_RI7 (spe_cdx, 0x1d7); - - -/* Constant formation instructions - */ -EMIT_RI16(spe_ilh, 0x083); -EMIT_RI16(spe_ilhu, 0x082); -EMIT_RI16(spe_il, 0x081); -EMIT_RI18(spe_ila, 0x021); -EMIT_RI16(spe_iohl, 0x0c1); -EMIT_RI16(spe_fsmbi, 0x0c5); - - -/* Integer and logical instructions - */ -EMIT_RR (spe_ah, 0x0c8); -EMIT_RI10(spe_ahi, 0x01d); -EMIT_RR (spe_a, 0x0c0); -EMIT_RI10(spe_ai, 0x01c); -EMIT_RR (spe_sfh, 0x048); -EMIT_RI10(spe_sfhi, 0x00d); -EMIT_RR (spe_sf, 0x040); -EMIT_RI10(spe_sfi, 0x00c); -EMIT_RR (spe_addx, 0x340); -EMIT_RR (spe_cg, 0x0c2); -EMIT_RR (spe_cgx, 0x342); -EMIT_RR (spe_sfx, 0x341); -EMIT_RR (spe_bg, 0x042); -EMIT_RR (spe_bgx, 0x343); -EMIT_RR (spe_mpy, 0x3c4); -EMIT_RR (spe_mpyu, 0x3cc); -EMIT_RI10(spe_mpyi, 0x074); -EMIT_RI10(spe_mpyui, 0x075); -EMIT_RRR (spe_mpya, 0x00c); -EMIT_RR (spe_mpyh, 0x3c5); -EMIT_RR (spe_mpys, 0x3c7); -EMIT_RR (spe_mpyhh, 0x3c6); -EMIT_RR (spe_mpyhha, 0x346); -EMIT_RR (spe_mpyhhu, 0x3ce); -EMIT_RR (spe_mpyhhau, 0x34e); -EMIT_R (spe_clz, 0x2a5); -EMIT_R (spe_cntb, 0x2b4); -EMIT_R (spe_fsmb, 0x1b6); -EMIT_R (spe_fsmh, 0x1b5); -EMIT_R (spe_fsm, 0x1b4); -EMIT_R (spe_gbb, 0x1b2); -EMIT_R (spe_gbh, 0x1b1); -EMIT_R (spe_gb, 0x1b0); -EMIT_RR (spe_avgb, 0x0d3); -EMIT_RR (spe_absdb, 0x053); -EMIT_RR (spe_sumb, 0x253); -EMIT_R (spe_xsbh, 0x2b6); -EMIT_R (spe_xshw, 0x2ae); -EMIT_R (spe_xswd, 0x2a6); -EMIT_RR (spe_and, 0x0c1); -EMIT_RR (spe_andc, 0x2c1); -EMIT_RI10(spe_andbi, 0x016); -EMIT_RI10(spe_andhi, 0x015); -EMIT_RI10(spe_andi, 0x014); -EMIT_RR (spe_or, 0x041); -EMIT_RR (spe_orc, 0x2c9); -EMIT_RI10(spe_orbi, 0x006); -EMIT_RI10(spe_orhi, 0x005); -EMIT_RI10(spe_ori, 0x004); -EMIT_R (spe_orx, 0x1f0); -EMIT_RR (spe_xor, 0x241); -EMIT_RI10(spe_xorbi, 0x026); -EMIT_RI10(spe_xorhi, 0x025); -EMIT_RI10(spe_xori, 0x024); -EMIT_RR (spe_nand, 0x0c9); -EMIT_RR (spe_nor, 0x049); -EMIT_RR (spe_eqv, 0x249); -EMIT_RRR (spe_selb, 0x008); -EMIT_RRR (spe_shufb, 0x00b); - - -/* Shift and rotate instructions - */ -EMIT_RR (spe_shlh, 0x05f); -EMIT_RI7 (spe_shlhi, 0x07f); -EMIT_RR (spe_shl, 0x05b); -EMIT_RI7 (spe_shli, 0x07b); -EMIT_RR (spe_shlqbi, 0x1db); -EMIT_RI7 (spe_shlqbii, 0x1fb); -EMIT_RR (spe_shlqby, 0x1df); -EMIT_RI7 (spe_shlqbyi, 0x1ff); -EMIT_RR (spe_shlqbybi, 0x1cf); -EMIT_RR (spe_roth, 0x05c); -EMIT_RI7 (spe_rothi, 0x07c); -EMIT_RR (spe_rot, 0x058); -EMIT_RI7 (spe_roti, 0x078); -EMIT_RR (spe_rotqby, 0x1dc); -EMIT_RI7 (spe_rotqbyi, 0x1fc); -EMIT_RR (spe_rotqbybi, 0x1cc); -EMIT_RR (spe_rotqbi, 0x1d8); -EMIT_RI7 (spe_rotqbii, 0x1f8); -EMIT_RR (spe_rothm, 0x05d); -EMIT_RI7 (spe_rothmi, 0x07d); -EMIT_RR (spe_rotm, 0x059); -EMIT_RI7 (spe_rotmi, 0x079); -EMIT_RR (spe_rotqmby, 0x1dd); -EMIT_RI7 (spe_rotqmbyi, 0x1fd); -EMIT_RR (spe_rotqmbybi, 0x1cd); -EMIT_RR (spe_rotqmbi, 0x1c9); -EMIT_RI7 (spe_rotqmbii, 0x1f9); -EMIT_RR (spe_rotmah, 0x05e); -EMIT_RI7 (spe_rotmahi, 0x07e); -EMIT_RR (spe_rotma, 0x05a); -EMIT_RI7 (spe_rotmai, 0x07a); - - -/* Compare, branch, and halt instructions - */ -EMIT_RR (spe_heq, 0x3d8); -EMIT_RI10(spe_heqi, 0x07f); -EMIT_RR (spe_hgt, 0x258); -EMIT_RI10(spe_hgti, 0x04f); -EMIT_RR (spe_hlgt, 0x2d8); -EMIT_RI10(spe_hlgti, 0x05f); -EMIT_RR (spe_ceqb, 0x3d0); -EMIT_RI10(spe_ceqbi, 0x07e); -EMIT_RR (spe_ceqh, 0x3c8); -EMIT_RI10(spe_ceqhi, 0x07d); -EMIT_RR (spe_ceq, 0x3c0); -EMIT_RI10(spe_ceqi, 0x07c); -EMIT_RR (spe_cgtb, 0x250); -EMIT_RI10(spe_cgtbi, 0x04e); -EMIT_RR (spe_cgth, 0x248); -EMIT_RI10(spe_cgthi, 0x04d); -EMIT_RR (spe_cgt, 0x240); -EMIT_RI10(spe_cgti, 0x04c); -EMIT_RR (spe_clgtb, 0x2d0); -EMIT_RI10(spe_clgtbi, 0x05e); -EMIT_RR (spe_clgth, 0x2c8); -EMIT_RI10(spe_clgthi, 0x05d); -EMIT_RR (spe_clgt, 0x2c0); -EMIT_RI10(spe_clgti, 0x05c); -EMIT_I16 (spe_br, 0x064); -EMIT_I16 (spe_bra, 0x060); -EMIT_RI16(spe_brsl, 0x066); -EMIT_RI16(spe_brasl, 0x062); -EMIT_RI16(spe_brnz, 0x042); -EMIT_RI16(spe_brz, 0x040); -EMIT_RI16(spe_brhnz, 0x046); -EMIT_RI16(spe_brhz, 0x044); - -extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); - - -/* Floating-point instructions - */ -EMIT_RR (spe_fa, 0x2c4); -EMIT_RR (spe_dfa, 0x2cc); -EMIT_RR (spe_fs, 0x2c5); -EMIT_RR (spe_dfs, 0x2cd); -EMIT_RR (spe_fm, 0x2c6); -EMIT_RR (spe_dfm, 0x2ce); -EMIT_RRR (spe_fma, 0x00e); -EMIT_RR (spe_dfma, 0x35c); -EMIT_RRR (spe_fnms, 0x00d); -EMIT_RR (spe_dfnms, 0x35e); -EMIT_RRR (spe_fms, 0x00f); -EMIT_RR (spe_dfms, 0x35d); -EMIT_RR (spe_dfnma, 0x35f); -EMIT_R (spe_frest, 0x1b8); -EMIT_R (spe_frsqest, 0x1b9); -EMIT_RR (spe_fi, 0x3d4); -EMIT_RI8 (spe_csflt, 0x1da); -EMIT_RI8 (spe_cflts, 0x1d8); -EMIT_RI8 (spe_cuflt, 0x1db); -EMIT_RI8 (spe_cfltu, 0x1d9); -EMIT_R (spe_frds, 0x3b9); -EMIT_R (spe_fesd, 0x3b8); -EMIT_RR (spe_dfceq, 0x3c3); -EMIT_RR (spe_dfcmeq, 0x3cb); -EMIT_RR (spe_dfcgt, 0x2c3); -EMIT_RR (spe_dfcmgt, 0x2cb); -EMIT_RI7 (spe_dftsv, 0x3bf); -EMIT_RR (spe_fceq, 0x3c2); -EMIT_RR (spe_fcmeq, 0x3ca); -EMIT_RR (spe_fcgt, 0x2c2); -EMIT_RR (spe_fcmgt, 0x2ca); -EMIT_R (spe_fscrwr, 0x3ba); -EMIT_ (spe_fscrrd, 0x398); - - -/* Channel instructions - */ -EMIT_R (spe_rdch, 0x00d); -EMIT_R (spe_rdchcnt, 0x00f); -EMIT_R (spe_wrch, 0x10d); - - -#ifdef UNDEF_EMIT_MACROS -#undef EMIT_ -#undef EMIT_R -#undef EMIT_RR -#undef EMIT_RRR -#undef EMIT_RI7 -#undef EMIT_RI8 -#undef EMIT_RI10 -#undef EMIT_RI16 -#undef EMIT_RI18 -#undef EMIT_I16 -#undef UNDEF_EMIT_MACROS -#endif /* EMIT_ */ diff --git a/src/mesa/sources b/src/mesa/sources index 9e56694893e..f0bf7b31fbc 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -246,7 +246,6 @@ ASM_C_SOURCES = \ x86/rtasm/x86sse.c \ sparc/sparc.c \ ppc/common_ppc.c \ - ppc/rtasm/spe_asm.c \ x86-64/x86-64.c X86_SOURCES = \ -- cgit v1.2.3 From b9da3791c934e05b82063a8c79c423a0a8e29a94 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 19 Feb 2008 15:07:53 +0900 Subject: Remove src/mesa and src/mesa/main from gallium source include paths. --- SConstruct | 2 - src/gallium/Makefile.template | 2 - src/gallium/auxiliary/draw/draw_vf_sse.c | 3 +- src/gallium/auxiliary/pipebuffer/linked_list.h | 91 ------ .../auxiliary/pipebuffer/pb_buffer_fenced.c | 3 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 3 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 3 +- src/gallium/auxiliary/util/u_double_list.h | 91 ++++++ src/gallium/auxiliary/util/u_simple_list.h | 197 +++++++++++++ src/gallium/drivers/i915simple/i915_debug.c | 2 - src/gallium/include/pipe/p_thread.h | 327 +++++++++++++++++++-- src/gallium/winsys/dri/intel/SConscript | 2 +- src/gallium/winsys/dri/intel/intel_batchpool.c | 15 +- src/gallium/winsys/xlib/SConscript | 6 + src/gallium/winsys/xlib/brw_aub.c | 16 +- src/mesa/SConscript | 7 + src/mesa/x86/common_x86_asm.S | 2 +- 17 files changed, 620 insertions(+), 152 deletions(-) delete mode 100644 src/gallium/auxiliary/pipebuffer/linked_list.h create mode 100644 src/gallium/auxiliary/util/u_double_list.h create mode 100644 src/gallium/auxiliary/util/u_simple_list.h (limited to 'src/gallium/drivers') diff --git a/SConstruct b/SConstruct index b20f88a303e..6cb07302e89 100644 --- a/SConstruct +++ b/SConstruct @@ -109,8 +109,6 @@ else: # Includes env.Append(CPPPATH = [ '#/include', - '#/src/mesa', - '#/src/mesa/main', '#/src/gallium/include', '#/src/gallium/auxiliary', '#/src/gallium/drivers', diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 6698212e77e..4e462b5c976 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -18,8 +18,6 @@ INCLUDES = \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main \ -I$(TOP)/include \ $(DRIVER_INCLUDES) diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c index 6076f9849d0..aff4ffd985c 100644 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ b/src/gallium/auxiliary/draw/draw_vf_sse.c @@ -26,9 +26,8 @@ */ -#include "simple_list.h" - #include "pipe/p_compiler.h" +#include "util/u_simple_list.h" #include "draw_vf.h" diff --git a/src/gallium/auxiliary/pipebuffer/linked_list.h b/src/gallium/auxiliary/pipebuffer/linked_list.h deleted file mode 100644 index e99817fb130..00000000000 --- a/src/gallium/auxiliary/pipebuffer/linked_list.h +++ /dev/null @@ -1,91 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - **************************************************************************/ - -/** - * \file - * List macros heavily inspired by the Linux kernel - * list handling. No list looping yet. - * - * Is not threadsafe, so common operations need to - * be protected using an external mutex. - */ - -#ifndef LINKED_LIST_H_ -#define LINKED_LIST_H_ - - -#include <stddef.h> - - -struct list_head -{ - struct list_head *prev; - struct list_head *next; -}; - - -#define LIST_INITHEAD(__item) \ - do { \ - (__item)->prev = (__item); \ - (__item)->next = (__item); \ - } while (0) - -#define LIST_ADD(__item, __list) \ - do { \ - (__item)->prev = (__list); \ - (__item)->next = (__list)->next; \ - (__list)->next->prev = (__item); \ - (__list)->next = (__item); \ - } while (0) - -#define LIST_ADDTAIL(__item, __list) \ - do { \ - (__item)->next = (__list); \ - (__item)->prev = (__list)->prev; \ - (__list)->prev->next = (__item); \ - (__list)->prev = (__item); \ - } while(0) - -#define LIST_DEL(__item) \ - do { \ - (__item)->prev->next = (__item)->next; \ - (__item)->next->prev = (__item)->prev; \ - } while(0) - -#define LIST_DELINIT(__item) \ - do { \ - (__item)->prev->next = (__item)->next; \ - (__item)->next->prev = (__item)->prev; \ - (__item)->next = (__item); \ - (__item)->prev = (__item); \ - } while(0) - -#define LIST_ENTRY(__type, __item, __field) \ - ((__type *)(((char *)(__item)) - offsetof(__type, __field))) - - -#endif /*LINKED_LIST_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index bc85c4b19fd..e2ee72ed1fa 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -34,13 +34,12 @@ */ -#include "linked_list.h" - #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" #include "pipe/p_util.h" +#include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 983a1053475..ff4fd123f36 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -33,12 +33,11 @@ */ -#include "linked_list.h" - #include "pipe/p_defines.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_util.h" +#include "util/u_double_list.h" #include "util/u_mm.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index beb145b7cbe..528e9528f6c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -35,13 +35,12 @@ */ -#include "linked_list.h" - #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h new file mode 100644 index 00000000000..8cb10c98206 --- /dev/null +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * \file + * List macros heavily inspired by the Linux kernel + * list handling. No list looping yet. + * + * Is not threadsafe, so common operations need to + * be protected using an external mutex. + */ + +#ifndef _U_DOUBLE_LIST_H_ +#define _U_DOUBLE_LIST_H_ + + +#include <stddef.h> + + +struct list_head +{ + struct list_head *prev; + struct list_head *next; +}; + + +#define LIST_INITHEAD(__item) \ + do { \ + (__item)->prev = (__item); \ + (__item)->next = (__item); \ + } while (0) + +#define LIST_ADD(__item, __list) \ + do { \ + (__item)->prev = (__list); \ + (__item)->next = (__list)->next; \ + (__list)->next->prev = (__item); \ + (__list)->next = (__item); \ + } while (0) + +#define LIST_ADDTAIL(__item, __list) \ + do { \ + (__item)->next = (__list); \ + (__item)->prev = (__list)->prev; \ + (__list)->prev->next = (__item); \ + (__list)->prev = (__item); \ + } while(0) + +#define LIST_DEL(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + } while(0) + +#define LIST_DELINIT(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + (__item)->next = (__item); \ + (__item)->prev = (__item); \ + } while(0) + +#define LIST_ENTRY(__type, __item, __field) \ + ((__type *)(((char *)(__item)) - offsetof(__type, __field))) + + +#endif /*_U_DOUBLE_LIST_H_*/ diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h new file mode 100644 index 00000000000..f5f43b0faa2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_list.h @@ -0,0 +1,197 @@ +/** + * \file simple_list.h + * Simple macros for type-safe, intrusive lists. + * + * Intended to work with a list sentinal which is created as an empty + * list. Insert & delete are O(1). + * + * \author + * (C) 1997, Keith Whitwell + */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _U_SIMPLE_LIST_H_ +#define _U_SIMPLE_LIST_H_ + +/** + * Remove an element from list. + * + * \param elem element to remove. + */ +#define remove_from_list(elem) \ +do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ +} while (0) + +/** + * Insert an element to the list head. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_head(list, elem) \ +do { \ + (elem)->prev = list; \ + (elem)->next = (list)->next; \ + (list)->next->prev = elem; \ + (list)->next = elem; \ +} while(0) + +/** + * Insert an element to the list tail. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_tail(list, elem) \ +do { \ + (elem)->next = list; \ + (elem)->prev = (list)->prev; \ + (list)->prev->next = elem; \ + (list)->prev = elem; \ +} while(0) + +/** + * Move an element to the list head. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_head(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_head(list, elem); \ +} while (0) + +/** + * Move an element to the list tail. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_tail(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_tail(list, elem); \ +} while (0) + +/** + * Make a empty list empty. + * + * \param sentinal list (sentinal element). + */ +#define make_empty_list(sentinal) \ +do { \ + (sentinal)->next = sentinal; \ + (sentinal)->prev = sentinal; \ +} while (0) + +/** + * Get list first element. + * + * \param list list. + * + * \return pointer to first element. + */ +#define first_elem(list) ((list)->next) + +/** + * Get list last element. + * + * \param list list. + * + * \return pointer to last element. + */ +#define last_elem(list) ((list)->prev) + +/** + * Get next element. + * + * \param elem element. + * + * \return pointer to next element. + */ +#define next_elem(elem) ((elem)->next) + +/** + * Get previous element. + * + * \param elem element. + * + * \return pointer to previous element. + */ +#define prev_elem(elem) ((elem)->prev) + +/** + * Test whether element is at end of the list. + * + * \param list list. + * \param elem element. + * + * \return non-zero if element is at end of list, or zero otherwise. + */ +#define at_end(list, elem) ((elem) == (list)) + +/** + * Test if a list is empty. + * + * \param list list. + * + * \return non-zero if list empty, or zero otherwise. + */ +#define is_empty_list(list) ((list)->next == (list)) + +/** + * Walk through the elements of a list. + * + * \param ptr pointer to the current element. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach(ptr, list) \ + for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next ) + +/** + * Walk through the elements of a list. + * + * Same as #foreach but lets you unlink the current value during a list + * traversal. Useful for freeing a list, element by element. + * + * \param ptr pointer to the current element. + * \param t temporary pointer. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach_s(ptr, t, list) \ + for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next) + +#endif /* _U_SIMPLE_LIST_H_ */ diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index 94db44e1aaf..78102dbac26 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -25,8 +25,6 @@ * **************************************************************************/ -//#include "imports.h" - #include "i915_reg.h" #include "i915_context.h" #include "i915_winsys.h" diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index cd432c547ce..4325abc951e 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -1,54 +1,317 @@ /************************************************************************** * + * Copyright 1999-2006 Brian Paul * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ -#ifndef P_THREAD_H -#define P_THREAD_H +/** + * @file + * Thread + * + * Initial version by John Stone (j.stone@acm.org) (johns@cs.umr.edu) + * and Christoph Poliwoda (poliwoda@volumegraphics.com) + * Revised by Keith Whitwell + * Adapted for new gl dispatcher by Brian Paul + * + * + * + * DOCUMENTATION + * + * This thread module exports the following types: + * _glthread_TSD Thread-specific data area + * _glthread_Thread Thread datatype + * _glthread_Mutex Mutual exclusion lock + * + * Macros: + * _glthread_DECLARE_STATIC_MUTEX(name) Declare a non-local mutex + * _glthread_INIT_MUTEX(name) Initialize a mutex + * _glthread_LOCK_MUTEX(name) Lock a mutex + * _glthread_UNLOCK_MUTEX(name) Unlock a mutex + * + * Functions: + * _glthread_GetID(v) Get integer thread ID + * _glthread_InitTSD() Initialize thread-specific data + * _glthread_GetTSD() Get thread-specific data + * _glthread_SetTSD() Set thread-specific data + * + * If this file is accidentally included by a non-threaded build, + * it should not cause the build to fail, or otherwise cause problems. + * In general, it should only be included when needed however. + */ + +#ifndef _P_THREAD_H_ +#define _P_THREAD_H_ + + +#if defined(USE_MGL_NAMESPACE) +#define _glapi_Dispatch _mglapi_Dispatch +#endif + + + +#if (defined(PTHREADS) || defined(SOLARIS_THREADS) ||\ + defined(WIN32_THREADS) || defined(USE_XTHREADS) || defined(BEOS_THREADS)) \ + && !defined(THREADS) +# define THREADS +#endif + +#ifdef VMS +#include <GL/vms_x_fix.h> +#endif + +/* + * POSIX threads. This should be your choice in the Unix world + * whenever possible. When building with POSIX threads, be sure + * to enable any compiler flags which will cause the MT-safe + * libc (if one exists) to be used when linking, as well as any + * header macros for MT-safe errno, etc. For Solaris, this is the -mt + * compiler flag. On Solaris with gcc, use -D_REENTRANT to enable + * proper compiling for MT-safe libc etc. + */ +#if defined(PTHREADS) +#include <pthread.h> /* POSIX threads headers */ + +typedef struct { + pthread_key_t key; + int initMagic; +} _glthread_TSD; + +typedef pthread_t _glthread_Thread; + +typedef pthread_mutex_t _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name = PTHREAD_MUTEX_INITIALIZER + +#define _glthread_INIT_MUTEX(name) \ + pthread_mutex_init(&(name), NULL) + +#define _glthread_DESTROY_MUTEX(name) \ + pthread_mutex_destroy(&(name)) + +#define _glthread_LOCK_MUTEX(name) \ + (void) pthread_mutex_lock(&(name)) + +#define _glthread_UNLOCK_MUTEX(name) \ + (void) pthread_mutex_unlock(&(name)) + +#endif /* PTHREADS */ + + + + +/* + * Solaris threads. Use only up to Solaris 2.4. + * Solaris 2.5 and higher provide POSIX threads. + * Be sure to compile with -mt on the Solaris compilers, or + * use -D_REENTRANT if using gcc. + */ +#ifdef SOLARIS_THREADS +#include <thread.h> + +typedef struct { + thread_key_t key; + mutex_t keylock; + int initMagic; +} _glthread_TSD; + +typedef thread_t _glthread_Thread; + +typedef mutex_t _glthread_Mutex; + +/* XXX need to really implement mutex-related macros */ +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 +#define _glthread_INIT_MUTEX(name) (void) name +#define _glthread_DESTROY_MUTEX(name) (void) name +#define _glthread_LOCK_MUTEX(name) (void) name +#define _glthread_UNLOCK_MUTEX(name) (void) name + +#endif /* SOLARIS_THREADS */ + + + + +/* + * Windows threads. Should work with Windows NT and 95. + * IMPORTANT: Link with multithreaded runtime library when THREADS are + * used! + */ +#ifdef WIN32_THREADS +#include <windows.h> + +typedef struct { + DWORD key; + int initMagic; +} _glthread_TSD; + +typedef HANDLE _glthread_Thread; + +typedef CRITICAL_SECTION _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) /*static*/ _glthread_Mutex name = {0,0,0,0,0,0} +#define _glthread_INIT_MUTEX(name) InitializeCriticalSection(&name) +#define _glthread_DESTROY_MUTEX(name) DeleteCriticalSection(&name) +#define _glthread_LOCK_MUTEX(name) EnterCriticalSection(&name) +#define _glthread_UNLOCK_MUTEX(name) LeaveCriticalSection(&name) + +#endif /* WIN32_THREADS */ + + -#include "p_compiler.h" /* - * XXX: We should come up with a system-independent thread definitions. - * XXX: Patching glthread defs for now. + * XFree86 has its own thread wrapper, Xthreads.h + * We wrap it again for GL. */ +#ifdef USE_XTHREADS +#include <X11/Xthreads.h> + +typedef struct { + xthread_key_t key; + int initMagic; +} _glthread_TSD; + +typedef xthread_t _glthread_Thread; + +typedef xmutex_rec _glthread_Mutex; + +#ifdef XMUTEX_INITIALIZER +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name = XMUTEX_INITIALIZER +#else +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name +#endif + +#define _glthread_INIT_MUTEX(name) \ + xmutex_init(&(name)) + +#define _glthread_DESTROY_MUTEX(name) \ + xmutex_clear(&(name)) + +#define _glthread_LOCK_MUTEX(name) \ + (void) xmutex_lock(&(name)) + +#define _glthread_UNLOCK_MUTEX(name) \ + (void) xmutex_unlock(&(name)) + +#endif /* USE_XTHREADS */ + + + +/* + * BeOS threads. R5.x required. + */ +#ifdef BEOS_THREADS + +#include <kernel/OS.h> +#include <support/TLS.h> + +typedef struct { + int32 key; + int initMagic; +} _glthread_TSD; + +typedef thread_id _glthread_Thread; + +/* Use Benaphore, aka speeder semaphore */ +typedef struct { + int32 lock; + sem_id sem; +} benaphore; +typedef benaphore _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = { 0, 0 } +#define _glthread_INIT_MUTEX(name) name.sem = create_sem(0, #name"_benaphore"), name.lock = 0 +#define _glthread_DESTROY_MUTEX(name) delete_sem(name.sem), name.lock = 0 +#define _glthread_LOCK_MUTEX(name) if (name.sem == 0) _glthread_INIT_MUTEX(name); \ + if (atomic_add(&(name.lock), 1) >= 1) acquire_sem(name.sem) +#define _glthread_UNLOCK_MUTEX(name) if (atomic_add(&(name.lock), -1) > 1) release_sem(name.sem) + +#endif /* BEOS_THREADS */ + + + +#ifndef THREADS + +/* + * THREADS not defined + */ + +typedef unsigned _glthread_TSD; + +typedef unsigned _glthread_Thread; + +typedef unsigned _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 + +#define _glthread_INIT_MUTEX(name) (void) name + +#define _glthread_DESTROY_MUTEX(name) (void) name + +#define _glthread_LOCK_MUTEX(name) (void) name + +#define _glthread_UNLOCK_MUTEX(name) (void) name + +#endif /* THREADS */ + + + +/* + * Platform independent thread specific data API. + */ + +extern unsigned long +_glthread_GetID(void); + + +extern void +_glthread_InitTSD(_glthread_TSD *); + + +extern void * +_glthread_GetTSD(_glthread_TSD *); -#ifndef __MSC__ -#include "glapi/glthread.h" +extern void +_glthread_SetTSD(_glthread_TSD *, void *); -#else /* __MSC__ */ +#if defined(GLX_USE_TLS) -typedef int _glthread_Mutex; +extern __thread struct _glapi_table * _glapi_tls_Dispatch + __attribute__((tls_model("initial-exec"))); -#define _glthread_INIT_MUTEX( M ) ((void) (M)) -#define _glthread_LOCK_MUTEX( M ) ((void) (M)) -#define _glthread_UNLOCK_MUTEX( M ) ((void) (M)) +#define GET_DISPATCH() _glapi_tls_Dispatch -#define sched_yield() ((void) 0) +#elif !defined(GL_CALL) +# if defined(THREADS) +# define GET_DISPATCH() \ + ((__builtin_expect( _glapi_Dispatch != NULL, 1 )) \ + ? _glapi_Dispatch : _glapi_get_dispatch()) +# else +# define GET_DISPATCH() _glapi_Dispatch +# endif /* defined(THREADS) */ +#endif /* ndef GL_CALL */ -#endif /* __MSC__ */ -#endif /* P_THREAD_H */ +#endif /* _P_THREAD_H_ */ diff --git a/src/gallium/winsys/dri/intel/SConscript b/src/gallium/winsys/dri/intel/SConscript index 525ba580e8e..0ad19d42a85 100644 --- a/src/gallium/winsys/dri/intel/SConscript +++ b/src/gallium/winsys/dri/intel/SConscript @@ -35,5 +35,5 @@ drivers = [ env.SharedLibrary( target ='i915tex_dri.so', source = sources, - LIBS = mesa + drivers + auxiliaries + env['LIBS'], + LIBS = drivers + mesa + auxiliaries + env['LIBS'], ) \ No newline at end of file diff --git a/src/gallium/winsys/dri/intel/intel_batchpool.c b/src/gallium/winsys/dri/intel/intel_batchpool.c index 33b56817f6a..ce154c7b884 100644 --- a/src/gallium/winsys/dri/intel/intel_batchpool.c +++ b/src/gallium/winsys/dri/intel/intel_batchpool.c @@ -36,9 +36,12 @@ #include <xf86drm.h> #include <stdlib.h> +#include <stdio.h> #include <errno.h> -#include "imports.h" -#include "glthread.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_thread.h" + #include "dri_bufpool.h" #include "dri_bufmgr.h" #include "intel_batchpool.h" @@ -196,7 +199,7 @@ pool_create(struct _DriBufferPool *pool, _glthread_LOCK_MUTEX(p->mutex); if (p->numFree == 0) - pool_checkFree(p, GL_TRUE); + pool_checkFree(p, TRUE); if (p->numFree == 0) { fprintf(stderr, "Out of fixed size buffer objects\n"); @@ -278,7 +281,7 @@ pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, return -EBUSY; } - buf->mapped = GL_TRUE; + buf->mapped = TRUE; *virtual = (unsigned char *) p->virtual + buf->start; _glthread_UNLOCK_MUTEX(p->mutex); return 0; @@ -361,7 +364,7 @@ pool_validate(struct _DriBufferPool *pool, void *private) BBuf *buf = (BBuf *) private; BPool *p = buf->parent; _glthread_LOCK_MUTEX(p->mutex); - buf->unfenced = GL_TRUE; + buf->unfenced = TRUE; _glthread_UNLOCK_MUTEX(p->mutex); return 0; } @@ -379,7 +382,7 @@ pool_takedown(struct _DriBufferPool *pool) while ((p->numFree < p->numTot) && p->numDelayed) { _glthread_UNLOCK_MUTEX(p->mutex); sched_yield(); - pool_checkFree(p, GL_TRUE); + pool_checkFree(p, TRUE); _glthread_LOCK_MUTEX(p->mutex); } diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index f8aa5ef945d..c38b5be52c8 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -3,6 +3,12 @@ Import('*') +env = env.Clone() + +env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', +]) sources = [ 'glxapi.c', diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index 541d50c6e40..10eedd8402d 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -29,11 +29,13 @@ * Keith Whitwell <keith@tungstengraphics.com> */ +#include <stdio.h> +#include <stdlib.h> #include "brw_aub.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "imports.h" -//#include "intel_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" struct brw_aubfile { @@ -350,9 +352,9 @@ struct brw_aubfile *brw_aubfile_create( void ) i++; - if (_mesa_getenv("INTEL_AUBFILE")) { - val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4); - _mesa_printf("--> Aub file: %s\n", filename); + if (getenv("INTEL_AUBFILE")) { + val = snprintf(filename, sizeof(filename), "%s%d.aub", getenv("INTEL_AUBFILE"), i%4); + debug_printf("--> Aub file: %s\n", filename); aubfile->file = fopen(filename, "w"); } else { @@ -360,12 +362,12 @@ struct brw_aubfile *brw_aubfile_create( void ) if (val < 0 || val > sizeof(filename)) strcpy(filename, "default.aub"); - _mesa_printf("--> Aub file: %s\n", filename); + debug_printf("--> Aub file: %s\n", filename); aubfile->file = fopen(filename, "w"); } if (!aubfile->file) { - _mesa_printf("couldn't open aubfile\n"); + debug_printf("couldn't open aubfile\n"); exit(1); } diff --git a/src/mesa/SConscript b/src/mesa/SConscript index a828133580a..db18c61fac3 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -4,6 +4,13 @@ Import('*') +env = env.Clone() + +# Includes +env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', +]) ####################################################################### # Core sources diff --git a/src/mesa/x86/common_x86_asm.S b/src/mesa/x86/common_x86_asm.S index ef3cc9eb59f..09c86b05ba8 100644 --- a/src/mesa/x86/common_x86_asm.S +++ b/src/mesa/x86/common_x86_asm.S @@ -39,7 +39,7 @@ * in there will break the build on some platforms. */ -#include "matypes.h" +#include "assyntax.h" #include "common_x86_features.h" SEG_TEXT -- cgit v1.2.3 From 4362c6e59d575a039e654e1520bbff89b73fc8f2 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Mon, 18 Feb 2008 18:51:57 -0800 Subject: Cell: trivial clean-ups --- src/gallium/drivers/cell/spu/spu_exec.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 109540b1f7b..0eb5ea1a3f3 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -146,17 +146,13 @@ spu_exec_machine_init(struct spu_exec_machine *mach, struct spu_sampler *samplers, unsigned processor) { - qword zero; - qword not_zero; - uint i; + const qword zero = si_il(0); + const qword not_zero = si_il(~0); mach->Samplers = samplers; mach->Processor = processor; mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; - zero = si_xor(zero, zero); - not_zero = si_xori(zero, 0xff); - /* Setup constants. */ mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; -- cgit v1.2.3 From 66be2810c3be07dd1ee45a60cfc632725837f2cd Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Mon, 18 Feb 2008 18:55:39 -0800 Subject: Cell: emit vertex shaders and uniforms more intelligently --- src/gallium/drivers/cell/common.h | 22 ++++---- src/gallium/drivers/cell/ppu/cell_state_emit.c | 18 ++++++ src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 17 +++--- src/gallium/drivers/cell/spu/spu_main.c | 9 +++ src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 6 +- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 68 ++++++++++------------- src/gallium/drivers/cell/spu/spu_vertex_shader.h | 5 ++ 7 files changed, 85 insertions(+), 60 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 74b131fbefc..cf892206c66 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -87,10 +87,12 @@ #define CELL_CMD_STATE_TEXTURE 13 #define CELL_CMD_STATE_VERTEX_INFO 14 #define CELL_CMD_STATE_VIEWPORT 15 -#define CELL_CMD_STATE_VS_ARRAY_INFO 16 -#define CELL_CMD_STATE_BLEND 17 -#define CELL_CMD_VS_EXECUTE 18 -#define CELL_CMD_STATE_ATTRIB_FETCH 19 +#define CELL_CMD_STATE_UNIFORMS 16 +#define CELL_CMD_STATE_VS_ARRAY_INFO 17 +#define CELL_CMD_STATE_BIND_VS 18 +#define CELL_CMD_STATE_BLEND 19 +#define CELL_CMD_STATE_ATTRIB_FETCH 20 +#define CELL_CMD_VS_EXECUTE 21 #define CELL_NUM_BUFFERS 4 @@ -144,14 +146,13 @@ struct cell_attribute_fetch_code { struct cell_shader_info { - unsigned num_outputs; - uint64_t declarations; - unsigned num_declarations; uint64_t instructions; - unsigned num_instructions; - uint64_t uniforms; uint64_t immediates; + + unsigned num_outputs; + unsigned num_declarations; + unsigned num_instructions; unsigned num_immediates; } ALIGN16_ATTRIB; @@ -160,10 +161,9 @@ struct cell_shader_info struct cell_command_vs { uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ - struct cell_shader_info shader; + uint64_t vOut[SPU_VERTS_PER_BATCH]; unsigned num_elts; unsigned elts[SPU_VERTS_PER_BATCH]; - uint64_t vOut[SPU_VERTS_PER_BATCH]; float plane[12][4]; unsigned nr_planes; unsigned nr_attrs; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 5d2a7864493..49c0d130c52 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -31,6 +31,8 @@ #include "cell_state_emit.h" #include "cell_batch.h" #include "cell_texture.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" static void @@ -100,4 +102,20 @@ cell_emit_state(struct cell_context *cell) emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, &cell->vertex_info, sizeof(struct vertex_info)); } + + if (cell->dirty & CELL_NEW_VS) { + const struct draw_context *const draw = cell->draw; + struct cell_shader_info info; + + info.num_outputs = draw->num_vs_outputs; + info.declarations = (uintptr_t) draw->machine.Declarations; + info.num_declarations = draw->machine.NumDeclarations; + info.instructions = (uintptr_t) draw->machine.Instructions; + info.num_instructions = draw->machine.NumInstructions; + info.immediates = (uintptr_t) draw->machine.Imms; + info.num_immediates = draw->machine.ImmLimit / 4; + + emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, + & info, sizeof(info)); + } } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 6a1d3bc20a1..64c7821c193 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -54,6 +54,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) struct cell_command_vs *const vs = &cell_global.command[0].vs; uint64_t *batch; struct cell_array_info *array_info; + struct cell_shader_info *shader_info; unsigned i, j; struct cell_attribute_fetch_code *cf; @@ -100,17 +101,17 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) (void) memcpy(&batch[1], &draw->viewport, sizeof(struct pipe_viewport_state)); + { + uint64_t uniforms = (uintptr_t) draw->user.constants; + + batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); + batch[0] = CELL_CMD_STATE_UNIFORMS; + batch[1] = uniforms; + } + cell_batch_flush(cell); vs->opcode = CELL_CMD_VS_EXECUTE; - vs->shader.num_outputs = draw->num_vs_outputs; - vs->shader.declarations = (uintptr_t) draw->machine.Declarations; - vs->shader.num_declarations = draw->machine.NumDeclarations; - vs->shader.instructions = (uintptr_t) draw->machine.Instructions; - vs->shader.num_instructions = draw->machine.NumInstructions; - vs->shader.uniforms = (uintptr_t) draw->user.constants; - vs->shader.immediates = (uintptr_t) draw->machine.Imms; - vs->shader.num_immediates = draw->machine.ImmLimit / 4; vs->nr_attrs = draw->vertex_fetch.nr_attrs; (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index fcbf0f841e6..dbc3705c241 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -433,10 +433,19 @@ cmd_batch(uint opcode) sizeof(struct pipe_viewport_state)); pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); break; + case CELL_CMD_STATE_UNIFORMS: + draw.constants = (float (*)[4]) (uintptr_t) buffer[pos + 1]; + pos += 2; + break; case CELL_CMD_STATE_VS_ARRAY_INFO: cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; + case CELL_CMD_STATE_BIND_VS: + spu_bind_vertex_shader(&draw, + (struct cell_shader_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); + break; case CELL_CMD_STATE_ATTRIB_FETCH: { struct cell_attribute_fetch_code *code = (struct cell_attribute_fetch_code *) &buffer[pos+1]; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 55c6c287175..e5d9910ff30 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -64,7 +64,7 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[] = { +static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { /* Shuffle used by CVT_64_FLOAT */ { @@ -108,7 +108,7 @@ static const qword fetch_shuffle_data[] = { static INLINE void fetch_unaligned(qword *dst, unsigned ea, unsigned size) { - qword tmp[4]; + qword tmp[4] ALIGN16_ATTRIB; const int shift = ea & 0x0f; const unsigned aligned_start_ea = ea & ~0x0f; const unsigned aligned_end_ea = (ea + size) & ~0x0f; @@ -169,7 +169,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4]; + qword in[2 * 4] ALIGN16_ATTRIB; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 3f5bf41aa2f..8363efeeb6e 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -165,63 +165,55 @@ run_vertex_program(struct spu_vs_context *draw, } -static void -spu_bind_vertex_shader(struct spu_vs_context *draw, - void *uniforms, - void *planes, - unsigned nr_planes, - unsigned num_outputs - ) -{ - draw->constants = (float (*)[4]) uniforms; - - (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); - draw->nr_planes = nr_planes; - draw->num_vs_outputs = num_outputs; - - /* specify the shader to interpret/execute */ - spu_exec_machine_init(&draw->machine, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/, - PIPE_SHADER_VERTEX); -} - - unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] ALIGN16_ATTRIB; + void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs) +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs) { - unsigned i; - - const uint64_t immediate_addr = vs->shader.immediates; + const unsigned immediate_addr = vs->immediates; const unsigned immediate_size = - ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates) - + (immediate_addr & 0x0f)); + ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) + + (immediate_addr & 0x0f)); + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, TAG_VERTEX_BUFFER, 0, 0); draw->machine.Instructions = (struct tgsi_full_instruction *) - vs->shader.instructions; - draw->machine.NumInstructions = vs->shader.num_instructions; + vs->instructions; + draw->machine.NumInstructions = vs->num_instructions; draw->machine.Declarations = (struct tgsi_full_declaration *) - vs->shader.declarations; - draw->machine.NumDeclarations = vs->shader.num_declarations; + vs->declarations; + draw->machine.NumDeclarations = vs->num_declarations; - draw->vertex_fetch.nr_attrs = vs->nr_attrs; + draw->num_vs_outputs = vs->num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); wait_on_mask(1 << TAG_VERTEX_BUFFER); (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], - sizeof(float) * 4 * vs->shader.num_immediates); + sizeof(float) * 4 * vs->num_immediates); +} - spu_bind_vertex_shader(draw, vs->shader.uniforms, - vs->plane, vs->nr_planes, - vs->shader.num_outputs); + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); + draw->nr_planes = vs->nr_planes; + draw->vertex_fetch.nr_attrs = vs->nr_attrs; for (i = 0; i < vs->num_elts; i += 4) { const unsigned batch_size = MIN2(vs->num_elts - i, 4); diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h index 0fb0bc28d03..54a4b8d9b9f 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -1,6 +1,7 @@ #ifndef SPU_VERTEX_SHADER_H #define SPU_VERTEX_SHADER_H +#include "cell/common.h" #include "pipe/p_format.h" #include "spu_exec.h" @@ -54,6 +55,10 @@ static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, struct cell_command_vs; +extern void +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs); + extern void spu_execute_vertex_shader(struct spu_vs_context *draw, const struct cell_command_vs *vs); -- cgit v1.2.3 From 3e329ea7e41e8d97de5b5f345ecab0833c8afe70 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 19 Feb 2008 11:14:54 -0700 Subject: gallium: updated cell build Building on Ian's Cell build fix. Put libcell.a in the gallium/drivers/cell/ directory. General Makefile clean-up, simplification, updated comments. --- src/gallium/drivers/cell/ppu/Makefile | 12 ++++++++---- src/gallium/winsys/xlib/Makefile | 19 ++++++++++--------- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 196ab777f54..3c3f622a2fc 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -1,6 +1,6 @@ # Gallium3D Cell driver: PPU code -# This makefile builds the g3dcell.a library which gets pulled into +# This makefile builds the libcell.a library which gets pulled into # the main libGL.so library @@ -8,10 +8,14 @@ TOP = ../../../../.. include $(TOP)/configs/linux-cell -#PROG = gl4 +# This is the "top-level" cell PPU driver code, will get pulled into libGL.so +# by the winsys Makefile. +CELL_LIB = ../libcell.a -CELL_LIB = libcell.a +# This is the SPU code. We'd like to be able to put this into the libcell.a +# archive with the PPU code, but nesting .a libs doesn't seem to work. +# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile SPU_CODE_MODULE = ../spu/g3d_spu.a @@ -56,7 +60,7 @@ default: $(CELL_LIB) $(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) -# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) +# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work ar -ru $(CELL_LIB) $(OBJECTS) #$(PROG): $(PPU_OBJECTS) diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 17405388eeb..09f10e5ea87 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -1,4 +1,4 @@ -# src/mesa/Makefile +# src/gallium/winsys/xlib/Makefile TOP = ../../../.. include $(TOP)/configs/current @@ -27,16 +27,17 @@ GL_MINOR = 5 GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) +ifeq ($(CONFIG_NAME), linux-cell) +# The SPU code is in a separate .a file, unfortunately +CELL_SPU_LIB = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a +endif + PIPE_LIB = \ $(GALLIUM_DRIVERS) \ $(TOP)/src/mesa/libglapi.a \ $(TOP)/src/mesa/libmesa.a \ - $(GALLIUM_AUXILIARIES) - -ifeq ($(CONFIG_NAME), linux-cell) -CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a -CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a -endif + $(GALLIUM_AUXILIARIES) \ + $(CELL_SPU_LIB) \ .SUFFIXES : .cpp @@ -65,13 +66,13 @@ STAND_ALONE_OBJECTS = \ $(STAND_ALONE_DRIVER_OBJECTS) # Make the GL library -$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(PIPE_LIB) $(TOP)/bin/mklib -o $(GL_LIB) \ -linker "$(CC)" \ -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ -install $(TOP)/$(LIB_DIR) \ $(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \ - --start-group $(PIPE_LIB) --end-group $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS) + --start-group $(PIPE_LIB) --end-group $(GL_LIB_DEPS) ###################################################################### -- cgit v1.2.3 From b1c8fa5b6002296d9abe21c06d5cb81a3f70828a Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 19 Feb 2008 14:55:18 -0700 Subject: gallium: implement correct sampling for RECT targets / unnormalized texcoords --- src/gallium/drivers/softpipe/sp_tex_sample.c | 168 +++++++++++++++++++++++---- 1 file changed, 143 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index c54e9d385c2..ff872f360e9 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -332,6 +332,61 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, } +/** + * For RECT textures / unnormalized texcoords + * Only a subset of wrap modes supported. + */ +static INLINE int +nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) +{ + int i; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + i = ifloor(s); + return CLAMP(i, 0, size-1); + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return ifloor( CLAMP(s, 0.5F, size - 0.5F) ); + default: + assert(0); + return 0; + } +} + + +/** + * For RECT textures / unnormalized texcoords. + * Only a subset of wrap modes supported. + */ +static INLINE void +linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, + int *i0, int *i1, float *a) +{ + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + /* Not exactly what the spec says, but it matches NVIDIA output */ + s = CLAMP(s - 0.5F, 0.0, size - 1.0); + *i0 = ifloor(s); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + s = CLAMP(s, 0.5F, size - 0.5F); + s -= 0.5F; + *i0 = ifloor(s); + *i1 = *i0 + 1; + if (*i1 > size - 1) + *i1 = size - 1; + break; + default: + assert(0); + } + *a = FRAC(s); +} + + static unsigned choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) { @@ -415,15 +470,15 @@ compute_lambda(struct tgsi_sampler *sampler, { float rho, lambda; + assert(sampler->state->normalized_coords); + assert(s); { float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = FABSF(dsdx); dsdy = FABSF(dsdy); - rho = MAX2(dsdx, dsdy); - if (sampler->state->normalized_coords) - rho *= sampler->texture->width[0]; + rho = MAX2(dsdx, dsdy) * sampler->texture->width[0]; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -431,9 +486,7 @@ compute_lambda(struct tgsi_sampler *sampler, float max; dtdx = FABSF(dtdx); dtdy = FABSF(dtdy); - max = MAX2(dtdx, dtdy); - if (sampler->state->normalized_coords) - max *= sampler->texture->height[0]; + max = MAX2(dtdx, dtdy) * sampler->texture->height[0]; rho = MAX2(rho, max); } if (p) { @@ -442,9 +495,7 @@ compute_lambda(struct tgsi_sampler *sampler, float max; dpdx = FABSF(dpdx); dpdy = FABSF(dpdy); - max = MAX2(dpdx, dpdy); - if (sampler->state->normalized_coords) - max *= sampler->texture->depth[0]; + max = MAX2(dpdx, dpdy) * sampler->texture->depth[0]; rho = MAX2(rho, max); } @@ -628,13 +679,10 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, choose_mipmap_levels(sampler, s, t, p, lodbias, &level0, &level1, &levelBlend, &imgFilter); - if (sampler->state->normalized_coords) { - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; - } - else { - width = height = 1; - } + assert(sampler->state->normalized_coords); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; assert(width > 0); @@ -765,14 +813,11 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, choose_mipmap_levels(sampler, s, t, p, lodbias, &level0, &level1, &levelBlend, &imgFilter); - if (sampler->state->normalized_coords) { - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; - depth = sampler->texture->depth[level0]; - } - else { - width = height = depth = 1; - } + assert(sampler->state->normalized_coords); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + depth = sampler->texture->depth[level0]; assert(width > 0); assert(height > 0); @@ -889,6 +934,73 @@ sp_get_samples_cube(struct tgsi_sampler *sampler, } +static void +sp_get_samples_rect(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + //sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); + static const uint face = 0; + const uint compare_func = sampler->state->compare_func; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + /* texture RECTS cannot be mipmapped */ + assert(level0 == level1); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord_unnorm(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord_unnorm(sampler->state->wrap_t, t[j], height); + get_texel(sampler, face, level0, x, y, 0, rgba, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4], a, b; + int x0, y0, x1, y1, c; + linear_texcoord_unnorm(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); + linear_texcoord_unnorm(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); + get_texel(sampler, face, level0, x0, y0, 0, tx, 0); + get_texel(sampler, face, level0, x1, y0, 0, tx, 1); + get_texel(sampler, face, level0, x0, y1, 0, tx, 2); + get_texel(sampler, face, level0, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + } + break; + default: + assert(0); + } +} + + + + /** * Called via tgsi_sampler::get_samples() * Use the sampler's state setting to get a filtered RGBA value @@ -914,15 +1026,21 @@ sp_get_samples(struct tgsi_sampler *sampler, switch (sampler->texture->target) { case PIPE_TEXTURE_1D: + assert(sampler->state->normalized_coords); sp_get_samples_1d(sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_2D: - sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + if (sampler->state->normalized_coords) + sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + else + sp_get_samples_rect(sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_3D: + assert(sampler->state->normalized_coords); sp_get_samples_3d(sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_CUBE: + assert(sampler->state->normalized_coords); sp_get_samples_cube(sampler, s, t, p, lodbias, rgba); break; default: -- cgit v1.2.3 From a2c06c5b5c8b3fb9f6d65bcd288d62e112e6a603 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 19 Feb 2008 14:56:01 -0700 Subject: gallium: don't hard-code attrib slot=0 in setup_fragcoord_coeff() --- src/gallium/drivers/softpipe/sp_prim_setup.c | 41 ++++++++++++---------------- 1 file changed, 18 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index d73521ccbe5..7b1e131ee14 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -476,33 +476,33 @@ static void tri_persp_coeff( struct setup_stage *setup, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coeff(struct setup_stage *setup) +setup_fragcoord_coeff(struct setup_stage *setup, uint slot) { /*X*/ - setup->coef[0].a0[0] = 0; - setup->coef[0].dadx[0] = 1.0; - setup->coef[0].dady[0] = 0.0; + setup->coef[slot].a0[0] = 0; + setup->coef[slot].dadx[0] = 1.0; + setup->coef[slot].dady[0] = 0.0; /*Y*/ if (setup->softpipe->rasterizer->origin_lower_left) { /* y=0=bottom */ const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height; - setup->coef[0].a0[1] = (float) (winHeight - 1); - setup->coef[0].dady[1] = -1.0; + setup->coef[slot].a0[1] = (float) (winHeight - 1); + setup->coef[slot].dady[1] = -1.0; } else { /* y=0=top */ - setup->coef[0].a0[1] = 0.0; - setup->coef[0].dady[1] = 1.0; + setup->coef[slot].a0[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; } - setup->coef[0].dadx[1] = 0.0; + setup->coef[slot].dadx[1] = 0.0; /*Z*/ - setup->coef[0].a0[2] = setup->posCoef.a0[2]; - setup->coef[0].dadx[2] = setup->posCoef.dadx[2]; - setup->coef[0].dady[2] = setup->posCoef.dady[2]; + setup->coef[slot].a0[2] = setup->posCoef.a0[2]; + setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[slot].dady[2] = setup->posCoef.dady[2]; /*W*/ - setup->coef[0].a0[3] = setup->posCoef.a0[3]; - setup->coef[0].dadx[3] = setup->posCoef.dadx[3]; - setup->coef[0].dady[3] = setup->posCoef.dady[3]; + setup->coef[slot].a0[3] = setup->posCoef.a0[3]; + setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[slot].dady[3] = setup->posCoef.dady[3]; } @@ -543,8 +543,7 @@ static void setup_tri_coefficients( struct setup_stage *setup ) tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); break; case INTERP_POS: - assert(fragSlot == 0); - setup_fragcoord_coeff(setup); + setup_fragcoord_coeff(setup, fragSlot); break; default: assert(0); @@ -798,9 +797,7 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); break; case INTERP_POS: - assert(fragSlot == 0); - assert(0); /* XXX fix this: */ - setup_fragcoord_coeff(setup); + setup_fragcoord_coeff(setup, fragSlot); break; default: assert(0); @@ -1022,9 +1019,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) &setup->coef[fragSlot], vertSlot, j); break; case INTERP_POS: - assert(fragSlot == 0); - assert(0); /* XXX fix this: */ - setup_fragcoord_coeff(setup); + setup_fragcoord_coeff(setup, fragSlot); break; default: assert(0); -- cgit v1.2.3 From 4ec46e4869b60b60c7ddf43168604713b5c4c359 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 19 Feb 2008 14:58:23 -0700 Subject: gallium: add some casts to prevent likely msvc warnings --- src/gallium/drivers/softpipe/sp_tex_sample.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index ff872f360e9..43d5085895f 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -347,7 +347,7 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) case PIPE_TEX_WRAP_CLAMP_TO_EDGE: /* fall-through */ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return ifloor( CLAMP(s, 0.5F, size - 0.5F) ); + return ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) ); default: assert(0); return 0; @@ -366,14 +366,14 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, switch (wrapMode) { case PIPE_TEX_WRAP_CLAMP: /* Not exactly what the spec says, but it matches NVIDIA output */ - s = CLAMP(s - 0.5F, 0.0, size - 1.0); + s = CLAMP(s - 0.5F, 0.0, (float) size - 1.0); *i0 = ifloor(s); *i1 = *i0 + 1; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: /* fall-through */ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - s = CLAMP(s, 0.5F, size - 0.5F); + s = CLAMP(s, 0.5F, (float) size - 0.5F); s -= 0.5F; *i0 = ifloor(s); *i1 = *i0 + 1; -- cgit v1.2.3 From 75a4524f2c6444b27055e539da052827670b62cf Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 19 Feb 2008 16:28:25 -0700 Subject: gallium: initialize the killmask register to zero before running shader This fixes mysterious missing fragments when running with SSE. --- src/gallium/drivers/softpipe/sp_fs_sse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index b18772f4e6d..53050b78230 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -124,6 +124,9 @@ fs_sse_run( struct sp_fragment_shader *base, (float)quad->x0, (float)quad->y0, machine->Temps); + /* init kill mask */ + machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0; + shader->func( machine->Inputs, machine->Outputs, machine->Consts, -- cgit v1.2.3 From 46c3d0918dd7a47f69c21e4eb1a3fd2a2fbe6223 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <darktama@beleth.(none)> Date: Wed, 20 Feb 2008 16:21:28 +1100 Subject: nv40: keep track of generated context state vs current channel state --- src/gallium/drivers/nv40/nv40_context.h | 26 ++++++----- src/gallium/drivers/nv40/nv40_fragprog.c | 6 +-- src/gallium/drivers/nv40/nv40_state_emit.c | 67 ++++++++++----------------- src/gallium/drivers/nv40/nv40_state_scissor.c | 8 ++-- src/gallium/drivers/nv40/nv40_state_stipple.c | 8 ++-- src/gallium/drivers/nv40/nv40_vertprog.c | 6 +-- 6 files changed, 53 insertions(+), 68 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 28a0274d771..2dd137ead0f 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -22,6 +22,15 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +enum nv40_state_index { + NV40_STATE_CLIP = 1ULL, + NV40_STATE_SCISSOR = 2ULL, + NV40_STATE_STIPPLE = 3ULL, + NV40_STATE_FRAGPROG = 4ULL, + NV40_STATE_VERTPROG = 5ULL, + NV40_STATE_MAX = 6ULL +}; + #define NV40_NEW_BLEND (1 << 0) #define NV40_NEW_RAST (1 << 1) #define NV40_NEW_ZSA (1 << 2) @@ -56,6 +65,9 @@ struct nv40_channel_context { /* Vtxprog resources */ struct nouveau_resource *vp_exec_heap; struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV40_STATE_MAX]; }; struct nv40_rasterizer_state { @@ -64,18 +76,10 @@ struct nv40_rasterizer_state { }; struct nv40_state { - struct { - unsigned enabled; - struct nouveau_stateobj *so; - } scissor; - - struct { - unsigned enabled; - struct nouveau_stateobj *so; - } stipple; + unsigned scissor_enabled; + unsigned stipple_enabled; - struct nouveau_stateobj *fragprog; - struct nouveau_stateobj *vertprog; + struct nouveau_stateobj *hw[NV40_STATE_MAX]; }; struct nv40_context { diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 77ac8ab2c61..db2613ef8bb 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -841,8 +841,8 @@ update_constants: nv40_fragprog_upload(nv40, fp); } - if (fp->so != nv40->state.fragprog) { - so_ref(fp->so, &nv40->state.fragprog); + if (fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { + so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); return TRUE; } @@ -861,7 +861,7 @@ struct nv40_state_entry nv40_state_fragprog = { .validate = nv40_fragprog_validate, .dirty = { .pipe = NV40_NEW_FRAGPROG, - .hw = NV40_NEW_FRAGPROG + .hw = NV40_STATE_FRAGPROG } }; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index e8230111bb4..58beb72389a 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -1,27 +1,6 @@ #include "nv40_context.h" #include "nv40_state.h" -/* Emit relocs for every referenced buffer. - * - * This is to ensure the bufmgr has an accurate idea of how - * the buffer is used. These relocs appear in the push buffer as - * NOPs, and will only be turned into state changes if a buffer - * actually moves. - */ -static void -nv40_state_emit_dummy_relocs(struct nv40_context *nv40) -{ - unsigned i; - - so_emit_reloc_markers(nv40->nvws, nv40->so_framebuffer); - for (i = 0; i < 16; i++) { - if (!(nv40->fp_samplers & (1 << i))) - continue; - so_emit_reloc_markers(nv40->nvws, nv40->so_fragtex[i]); - } - so_emit_reloc_markers(nv40->nvws, nv40->state.fragprog); -} - static struct nv40_state_entry *render_states[] = { &nv40_state_clip, &nv40_state_scissor, @@ -45,7 +24,7 @@ nv40_state_validate(struct nv40_context *nv40) if (nv40->dirty & e->dirty.pipe) { if (e->validate(nv40)) - nv40->hw_dirty |= e->dirty.hw; + nv40->hw_dirty |= (1 << e->dirty.hw); } states++; @@ -70,6 +49,28 @@ nv40_state_validate(struct nv40_context *nv40) } } +static void +nv40_state_emit(struct nv40_context *nv40) +{ + unsigned i; + + while (nv40->hw_dirty) { + unsigned idx = ffs(nv40->hw_dirty) - 1; + nv40->hw_dirty &= ~(1 << idx); + + so_ref (nv40->state.hw[idx], &nv40->hw->state[idx]); + so_emit(nv40->nvws, nv40->hw->state[idx]); + } + + so_emit_reloc_markers(nv40->nvws, nv40->so_framebuffer); + for (i = 0; i < 16; i++) { + if (!(nv40->fp_samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv40->nvws, nv40->so_fragtex[i]); + } + so_emit_reloc_markers(nv40->nvws, nv40->state.hw[NV40_STATE_FRAGPROG]); +} + void nv40_emit_hw_state(struct nv40_context *nv40) { @@ -90,24 +91,9 @@ nv40_emit_hw_state(struct nv40_context *nv40) if (nv40->dirty & NV40_NEW_BCOL) so_emit(nv40->nvws, nv40->so_bcol); - if (nv40->hw_dirty & NV40_NEW_SCISSOR) { - so_emit(nv40->nvws, nv40->state.scissor.so); - nv40->hw_dirty &= ~NV40_NEW_SCISSOR; - } - if (nv40->dirty & NV40_NEW_VIEWPORT) so_emit(nv40->nvws, nv40->so_viewport); - if (nv40->hw_dirty & NV40_NEW_STIPPLE) { - so_emit(nv40->nvws, nv40->state.stipple.so); - nv40->hw_dirty &= ~NV40_NEW_STIPPLE; - } - - if (nv40->hw_dirty & NV40_NEW_FRAGPROG) { - so_emit(nv40->nvws, nv40->state.fragprog); - nv40->hw_dirty &= ~NV40_NEW_FRAGPROG; - } - if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { nv40_fragtex_bind(nv40); @@ -118,14 +104,9 @@ nv40_emit_hw_state(struct nv40_context *nv40) nv40->dirty &= ~NV40_NEW_FRAGPROG; } - if (nv40->hw_dirty & NV40_NEW_VERTPROG) { - so_emit(nv40->nvws, nv40->state.vertprog); - nv40->hw_dirty &= ~NV40_NEW_VERTPROG; - } + nv40_state_emit(nv40); nv40->dirty_samplers = 0; nv40->dirty = 0; - - nv40_state_emit_dummy_relocs(nv40); } diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index 556b820e581..dc7b6d3a9d8 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -7,8 +7,8 @@ nv40_state_scissor_validate(struct nv40_context *nv40) struct pipe_scissor_state *s = &nv40->pipe_state.scissor; struct nouveau_stateobj *so; - if (nv40->state.scissor.so && - (rast->scissor == 0 && nv40->state.scissor.enabled == 0)) + if (nv40->state.hw[NV40_STATE_SCISSOR] && + (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) return FALSE; so = so_new(3, 0); @@ -21,7 +21,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40) so_data (so, 4096 << 16); } - so_ref(so, &nv40->state.scissor.so); + so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); so_ref(NULL, &so); return TRUE; } @@ -30,6 +30,6 @@ struct nv40_state_entry nv40_state_scissor = { .validate = nv40_state_scissor_validate, .dirty = { .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, - .hw = NV40_NEW_SCISSOR + .hw = NV40_STATE_SCISSOR } }; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index 52462a0b502..1b0b194432e 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -7,8 +7,8 @@ nv40_state_stipple_validate(struct nv40_context *nv40) struct nouveau_grobj *curie = nv40->hw->curie; struct nouveau_stateobj *so; - if (nv40->state.stipple.so && (rast->poly_stipple_enable == 0 && - nv40->state.stipple.enabled == 0)) + if (nv40->state.hw[NV40_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) return FALSE; if (rast->poly_stipple_enable) { @@ -26,7 +26,7 @@ nv40_state_stipple_validate(struct nv40_context *nv40) so_data (so, 0); } - so_ref(so, &nv40->state.stipple.so); + so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); so_ref(NULL, &so); return TRUE; } @@ -35,6 +35,6 @@ struct nv40_state_entry nv40_state_stipple = { .validate = nv40_state_stipple_validate, .dirty = { .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, - .hw = NV40_NEW_STIPPLE + .hw = NV40_STATE_STIPPLE, } }; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 4a15e51eb33..8a2d2336974 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -786,8 +786,8 @@ check_gpu_resources: } } - if (vp->so != nv40->state.vertprog) { - so_ref(vp->so, &nv40->state.vertprog); + if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { + so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); return TRUE; } @@ -807,7 +807,7 @@ struct nv40_state_entry nv40_state_vertprog = { .validate = nv40_vertprog_validate, .dirty = { .pipe = NV40_NEW_VERTPROG, - .hw = NV40_NEW_VERTPROG + .hw = NV40_STATE_VERTPROG, } }; -- cgit v1.2.3 From 9cd10d7618a226fe46395b08beb19e420bc14a4f Mon Sep 17 00:00:00 2001 From: Ben Skeggs <darktama@beleth.(none)> Date: Wed, 20 Feb 2008 17:14:41 +1100 Subject: nv40: almost there.. --- src/gallium/drivers/nv40/Makefile | 6 ++- src/gallium/drivers/nv40/nv40_context.h | 66 +++++++++++++++++++----- src/gallium/drivers/nv40/nv40_state.c | 62 +++++++++------------- src/gallium/drivers/nv40/nv40_state_blend.c | 41 +++++++++++++++ src/gallium/drivers/nv40/nv40_state_emit.c | 20 ++----- src/gallium/drivers/nv40/nv40_state_rasterizer.c | 17 ++++++ src/gallium/drivers/nv40/nv40_state_scissor.c | 2 +- src/gallium/drivers/nv40/nv40_state_stipple.c | 2 +- src/gallium/drivers/nv40/nv40_state_viewport.c | 30 +++++++++++ src/gallium/drivers/nv40/nv40_state_zsa.c | 17 ++++++ 10 files changed, 194 insertions(+), 69 deletions(-) create mode 100644 src/gallium/drivers/nv40/nv40_state_blend.c create mode 100644 src/gallium/drivers/nv40/nv40_state_rasterizer.c create mode 100644 src/gallium/drivers/nv40/nv40_state_viewport.c create mode 100644 src/gallium/drivers/nv40/nv40_state_zsa.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 12b8eef2590..82295cbefca 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -12,10 +12,14 @@ DRIVER_SOURCES = \ nv40_miptree.c \ nv40_query.c \ nv40_state.c \ - nv40_state_emit.c \ + nv40_state_blend.c \ nv40_state_clip.c \ + nv40_state_emit.c \ + nv40_state_rasterizer.c \ nv40_state_scissor.c \ nv40_state_stipple.c \ + nv40_state_viewport.c \ + nv40_state_zsa.c \ nv40_surface.c \ nv40_vbo.c \ nv40_vertprog.c diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 2dd137ead0f..d71fe11a4ab 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -23,12 +23,38 @@ fprintf(stderr, "nouveau: "fmt, ##args); enum nv40_state_index { - NV40_STATE_CLIP = 1ULL, - NV40_STATE_SCISSOR = 2ULL, - NV40_STATE_STIPPLE = 3ULL, - NV40_STATE_FRAGPROG = 4ULL, - NV40_STATE_VERTPROG = 5ULL, - NV40_STATE_MAX = 6ULL + NV40_STATE_FB = 0, + NV40_STATE_VIEWPORT = 1, + NV40_STATE_BLEND = 2, + NV40_STATE_RAST = 3, + NV40_STATE_ZSA = 4, + NV40_STATE_BCOL = 5, + NV40_STATE_CLIP = 6, + NV40_STATE_SCISSOR = 7, + NV40_STATE_STIPPLE = 8, + NV40_STATE_FRAGPROG = 9, + NV40_STATE_VERTPROG = 10, + NV40_STATE_FRAGTEX0 = 11, + NV40_STATE_FRAGTEX1 = 12, + NV40_STATE_FRAGTEX2 = 13, + NV40_STATE_FRAGTEX3 = 14, + NV40_STATE_FRAGTEX4 = 15, + NV40_STATE_FRAGTEX5 = 16, + NV40_STATE_FRAGTEX6 = 17, + NV40_STATE_FRAGTEX7 = 18, + NV40_STATE_FRAGTEX8 = 19, + NV40_STATE_FRAGTEX9 = 20, + NV40_STATE_FRAGTEX10 = 21, + NV40_STATE_FRAGTEX11 = 22, + NV40_STATE_FRAGTEX12 = 23, + NV40_STATE_FRAGTEX13 = 24, + NV40_STATE_FRAGTEX14 = 25, + NV40_STATE_FRAGTEX15 = 26, + NV40_STATE_VERTTEX0 = 27, + NV40_STATE_VERTTEX1 = 28, + NV40_STATE_VERTTEX2 = 29, + NV40_STATE_VERTTEX3 = 30, + NV40_STATE_MAX = 31 }; #define NV40_NEW_BLEND (1 << 0) @@ -75,6 +101,17 @@ struct nv40_rasterizer_state { struct nouveau_stateobj *so; }; +struct nv40_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv40_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + struct nv40_state { unsigned scissor_enabled; unsigned stipple_enabled; @@ -107,6 +144,11 @@ struct nv40_context { struct nv40_vertex_program *vertprog; struct nv40_fragment_program *fragprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct nv40_rasterizer_state *rasterizer; + struct nv40_zsa_state *zsa; + struct nv40_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; } pipe_state; struct nv40_state state; @@ -115,13 +157,6 @@ struct nv40_context { struct nouveau_stateobj *so_framebuffer; struct nouveau_stateobj *so_fragtex[16]; struct nouveau_stateobj *so_vtxbuf; - struct nouveau_stateobj *so_blend; - struct nv40_rasterizer_state *rasterizer; - struct nouveau_stateobj *so_rast; - struct nouveau_stateobj *so_zsa; - struct nouveau_stateobj *so_bcol; - struct nouveau_stateobj *so_viewport; - struct nouveau_stateobj *so_stipple; struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; @@ -164,10 +199,15 @@ extern void nv40_fragtex_bind(struct nv40_context *); extern void nv40_emit_hw_state(struct nv40_context *nv40); extern void nv40_state_tex_update(struct nv40_context *nv40); extern struct nv40_state_entry nv40_state_clip; +extern struct nv40_state_entry nv40_state_rasterizer; extern struct nv40_state_entry nv40_state_scissor; extern struct nv40_state_entry nv40_state_stipple; extern struct nv40_state_entry nv40_state_fragprog; extern struct nv40_state_entry nv40_state_vertprog; +extern struct nv40_state_entry nv40_state_blend; +extern struct nv40_state_entry nv40_state_blend_colour; +extern struct nv40_state_entry nv40_state_zsa; +extern struct nv40_state_entry nv40_state_viewport; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 8ffbb131f73..41631ef2dd6 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -11,6 +11,7 @@ nv40_blend_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nouveau_grobj *curie = nv40->hw->curie; + struct nv40_blend_state *bso = MALLOC(sizeof(*bso)); struct nouveau_stateobj *so = so_new(16, 0); if (cso->blend_enable) { @@ -46,7 +47,9 @@ nv40_blend_state_create(struct pipe_context *pipe, so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); so_data (so, cso->dither ? 1 : 0); - return (void *)so; + bso->so = so; + bso->pipe = *cso; + return (void *)bso; } static void @@ -54,16 +57,17 @@ nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - so_ref(hwcso, &nv40->so_blend); + nv40->pipe_state.blend = hwcso; nv40->dirty |= NV40_NEW_BLEND; } static void nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nouveau_stateobj *so = hwcso; + struct nv40_blend_state *bso = hwcso; - so_ref(NULL, &so); + so_ref(NULL, &bso->so); + FREE(bso); } @@ -260,7 +264,7 @@ nv40_sampler_state_bind(struct pipe_context *pipe, unsigned unit, static void nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + FREE(hwcso); } static void @@ -392,10 +396,8 @@ static void nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_rasterizer_state *rsso = hwcso; - so_ref(rsso->so, &nv40->so_rast); - nv40->rasterizer = rsso; + nv40->pipe_state.rasterizer = hwcso; nv40->dirty |= NV40_NEW_RAST; } @@ -405,7 +407,7 @@ nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) struct nv40_rasterizer_state *rsso = hwcso; so_ref(NULL, &rsso->so); - free(rsso); + FREE(rsso); } static void * @@ -413,6 +415,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *cso) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_zsa_state *zsaso = MALLOC(sizeof(*zsaso)); struct nouveau_stateobj *so = so_new(32, 0); so_method(so, nv40->hw->curie, NV40TCL_DEPTH_FUNC, 3); @@ -455,7 +458,9 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - return (void *)so; + zsaso->so = so; + zsaso->pipe = *cso; + return (void *)zsaso; } static void @@ -463,16 +468,17 @@ nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - so_ref(hwcso, &nv40->so_zsa); + nv40->pipe_state.zsa = hwcso; nv40->dirty |= NV40_NEW_ZSA; } static void nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nouveau_stateobj *so = hwcso; + struct nv40_zsa_state *zsaso = hwcso; - so_ref(NULL, &so); + so_ref(NULL, &zsaso->so); + FREE(zsaso); } static void * @@ -503,7 +509,7 @@ nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv40_vertex_program *vp = hwcso; nv40_vertprog_destroy(nv40, vp); - free(vp); + FREE(vp); } static void * @@ -534,7 +540,7 @@ nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv40_fragment_program *fp = hwcso; nv40_fragprog_destroy(nv40, fp); - free(fp); + FREE(fp); } static void @@ -542,16 +548,8 @@ nv40_set_blend_color(struct pipe_context *pipe, const struct pipe_blend_color *bcol) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(2, 0); - - so_method(so, nv40->hw->curie, NV40TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - so_ref(so, &nv40->so_bcol); - so_ref(NULL, &so); + nv40->pipe_state.blend_colour = *bcol; nv40->dirty |= NV40_NEW_BCOL; } @@ -754,20 +752,8 @@ nv40_set_viewport_state(struct pipe_context *pipe, const struct pipe_viewport_state *vpt) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_stateobj *so = so_new(9, 0); - - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - - so_ref(so, &nv40->so_viewport); - so_ref(NULL, &so); + + nv40->pipe_state.viewport = *vpt; nv40->dirty |= NV40_NEW_VIEWPORT; } diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c new file mode 100644 index 00000000000..b12f8b03dd8 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -0,0 +1,41 @@ +#include "nv40_context.h" + +static boolean +nv40_state_blend_validate(struct nv40_context *nv40) +{ + so_ref(nv40->pipe_state.blend->so, &nv40->state.hw[NV40_STATE_BLEND]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend = { + .validate = nv40_state_blend_validate, + .dirty = { + .pipe = NV40_NEW_BLEND, + .hw = NV40_STATE_BLEND + } +}; + +static boolean +nv40_state_blend_colour_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *so = so_new(2, 0); + struct pipe_blend_color *bcol = &nv40->pipe_state.blend_colour; + + so_method(so, nv40->hw->curie, NV40TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); + so_ref(NULL, &so); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend_colour = { + .validate = nv40_state_blend_colour_validate, + .dirty = { + .pipe = NV40_NEW_BCOL, + .hw = NV40_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 58beb72389a..65d7e2978a8 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -2,11 +2,16 @@ #include "nv40_state.h" static struct nv40_state_entry *render_states[] = { + &nv40_state_rasterizer, &nv40_state_clip, &nv40_state_scissor, &nv40_state_stipple, &nv40_state_fragprog, &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, NULL }; @@ -79,21 +84,6 @@ nv40_emit_hw_state(struct nv40_context *nv40) if (nv40->dirty & NV40_NEW_FB) so_emit(nv40->nvws, nv40->so_framebuffer); - if (nv40->dirty & NV40_NEW_BLEND) - so_emit(nv40->nvws, nv40->so_blend); - - if (nv40->dirty & NV40_NEW_RAST) - so_emit(nv40->nvws, nv40->so_rast); - - if (nv40->dirty & NV40_NEW_ZSA) - so_emit(nv40->nvws, nv40->so_zsa); - - if (nv40->dirty & NV40_NEW_BCOL) - so_emit(nv40->nvws, nv40->so_bcol); - - if (nv40->dirty & NV40_NEW_VIEWPORT) - so_emit(nv40->nvws, nv40->so_viewport); - if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { nv40_fragtex_bind(nv40); diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c new file mode 100644 index 00000000000..59b35d1d50a --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_rasterizer_validate(struct nv40_context *nv40) +{ + so_ref(nv40->pipe_state.rasterizer->so, + &nv40->state.hw[NV40_STATE_RAST]); + return TRUE; +} + +struct nv40_state_entry nv40_state_rasterizer = { + .validate = nv40_state_rasterizer_validate, + .dirty = { + .pipe = NV40_NEW_RAST, + .hw = NV40_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index dc7b6d3a9d8..2871fa2516d 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -3,7 +3,7 @@ static boolean nv40_state_scissor_validate(struct nv40_context *nv40) { - struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_rasterizer_state *rast = &nv40->pipe_state.rasterizer->pipe; struct pipe_scissor_state *s = &nv40->pipe_state.scissor; struct nouveau_stateobj *so; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index 1b0b194432e..bd163582a39 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -3,7 +3,7 @@ static boolean nv40_state_stipple_validate(struct nv40_context *nv40) { - struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_rasterizer_state *rast = &nv40->pipe_state.rasterizer->pipe; struct nouveau_grobj *curie = nv40->hw->curie; struct nouveau_stateobj *so; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c new file mode 100644 index 00000000000..79fcc31a8bf --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -0,0 +1,30 @@ +#include "nv40_context.h" + +static boolean +nv40_state_viewport_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *so = so_new(9, 0); + struct pipe_viewport_state *vpt = &nv40->pipe_state.viewport; + + so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + + so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); + so_ref(NULL, &so); + return TRUE; +} + +struct nv40_state_entry nv40_state_viewport = { + .validate = nv40_state_viewport_validate, + .dirty = { + .pipe = NV40_NEW_VIEWPORT, + .hw = NV40_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c new file mode 100644 index 00000000000..061a3555cbe --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_zsa_validate(struct nv40_context *nv40) +{ + so_ref(nv40->pipe_state.zsa->so, + &nv40->state.hw[NV40_STATE_ZSA]); + return TRUE; +} + +struct nv40_state_entry nv40_state_zsa = { + .validate = nv40_state_zsa_validate, + .dirty = { + .pipe = NV40_NEW_ZSA, + .hw = NV40_STATE_ZSA + } +}; -- cgit v1.2.3 From 759fa5fcc8038af4845a6d9c57b75933ef26559c Mon Sep 17 00:00:00 2001 From: Ben Skeggs <darktama@beleth.(none)> Date: Wed, 20 Feb 2008 17:22:40 +1100 Subject: nv40: fb state --- src/gallium/drivers/nv40/Makefile | 1 + src/gallium/drivers/nv40/nv40_context.h | 3 +- src/gallium/drivers/nv40/nv40_state.c | 140 +------------------------- src/gallium/drivers/nv40/nv40_state_emit.c | 6 +- src/gallium/drivers/nv40/nv40_state_fb.c | 156 +++++++++++++++++++++++++++++ 5 files changed, 162 insertions(+), 144 deletions(-) create mode 100644 src/gallium/drivers/nv40/nv40_state_fb.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 82295cbefca..fd002b54e78 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -15,6 +15,7 @@ DRIVER_SOURCES = \ nv40_state_blend.c \ nv40_state_clip.c \ nv40_state_emit.c \ + nv40_state_fb.c \ nv40_state_rasterizer.c \ nv40_state_scissor.c \ nv40_state_stipple.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index d71fe11a4ab..69062a8a202 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -149,12 +149,12 @@ struct nv40_context { struct nv40_blend_state *blend; struct pipe_blend_color blend_colour; struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; } pipe_state; struct nv40_state state; unsigned fallback; - struct nouveau_stateobj *so_framebuffer; struct nouveau_stateobj *so_fragtex[16]; struct nouveau_stateobj *so_vtxbuf; @@ -208,6 +208,7 @@ extern struct nv40_state_entry nv40_state_blend; extern struct nv40_state_entry nv40_state_blend_colour; extern struct nv40_state_entry nv40_state_zsa; extern struct nv40_state_entry nv40_state_viewport; +extern struct nv40_state_entry nv40_state_framebuffer; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 41631ef2dd6..84818d6729e 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -584,146 +584,8 @@ nv40_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nv40_context *nv40 = nv40_context(pipe); - struct pipe_surface *rt[4], *zeta; - uint32_t rt_enable, rt_format, w, h; - int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(64, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - - rt_enable = 0; - for (i = 0; i < 4; i++) { - if (!fb->cbufs[i]) - continue; - - if (colour_format) { - assert(w == fb->cbufs[i]->width); - assert(h == fb->cbufs[i]->height); - assert(colour_format == fb->cbufs[i]->format); - } else { - w = fb->cbufs[i]->width; - h = fb->cbufs[i]->height; - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); - rt[i] = fb->cbufs[i]; - } - } - - if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | - NV40TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV40TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - - zeta_format = fb->zsbuf->format; - zeta = fb->zsbuf; - } - - rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR0, 1); - so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR0_PITCH, 2); - so_data (so, rt[0]->pitch * rt[0]->cpp); - so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR1, 1); - so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR1_OFFSET, 2); - so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch * rt[1]->cpp); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch * rt[2]->cpp); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch * rt[3]->cpp); - } - - if (zeta_format) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_ZETA, 1); - so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv40->nvws->channel->vram->handle, - nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_ZETA_OFFSET, 1); - so_reloc (so, zeta->buffer, zeta->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch * zeta->cpp); - } - so_method(so, nv40->hw->curie, NV40TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, nv40->hw->curie, NV40TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - - so_ref(so, &nv40->so_framebuffer); - so_ref(NULL, &so); + nv40->pipe_state.framebuffer = *fb; nv40->dirty |= NV40_NEW_FB; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 65d7e2978a8..a9ca71c5e93 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -2,6 +2,7 @@ #include "nv40_state.h" static struct nv40_state_entry *render_states[] = { + &nv40_state_framebuffer, &nv40_state_rasterizer, &nv40_state_clip, &nv40_state_scissor, @@ -67,7 +68,7 @@ nv40_state_emit(struct nv40_context *nv40) so_emit(nv40->nvws, nv40->hw->state[idx]); } - so_emit_reloc_markers(nv40->nvws, nv40->so_framebuffer); + so_emit_reloc_markers(nv40->nvws, nv40->state.hw[NV40_STATE_FB]); for (i = 0; i < 16; i++) { if (!(nv40->fp_samplers & (1 << i))) continue; @@ -81,9 +82,6 @@ nv40_emit_hw_state(struct nv40_context *nv40) { nv40_state_validate(nv40); - if (nv40->dirty & NV40_NEW_FB) - so_emit(nv40->nvws, nv40->so_framebuffer); - if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { nv40_fragtex_bind(nv40); diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c new file mode 100644 index 00000000000..d3032f1be5a --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -0,0 +1,156 @@ +#include "nv40_context.h" + +static boolean +nv40_state_framebuffer_validate(struct nv40_context *nv40) +{ + struct pipe_framebuffer_state *fb = &nv40->pipe_state.framebuffer; + struct pipe_surface *rt[4], *zeta; + uint32_t rt_enable, rt_format, w, h; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + + rt_enable = 0; + for (i = 0; i < 4; i++) { + if (!fb->cbufs[i]) + continue; + + if (colour_format) { + assert(w == fb->cbufs[i]->width); + assert(h == fb->cbufs[i]->height); + assert(colour_format == fb->cbufs[i]->format); + } else { + w = fb->cbufs[i]->width; + h = fb->cbufs[i]->height; + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | + NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV40TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR0, 1); + so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_COLOR0_PITCH, 2); + so_data (so, rt[0]->pitch * rt[0]->cpp); + so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR1, 1); + so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_COLOR1_OFFSET, 2); + so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->pitch * rt[1]->cpp); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->hw->curie, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->pitch * rt[2]->cpp); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->hw->curie, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->pitch * rt[3]->cpp); + } + + if (zeta_format) { + so_method(so, nv40->hw->curie, NV40TCL_DMA_ZETA, 1); + so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->hw->curie, NV40TCL_ZETA_OFFSET, 1); + so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->hw->curie, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->pitch * zeta->cpp); + } + + so_method(so, nv40->hw->curie, NV40TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv40->hw->curie, NV40TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + + so_ref(so, &nv40->state.hw[NV40_STATE_FB]); + so_ref(NULL, &so); + return TRUE; +} + +struct nv40_state_entry nv40_state_framebuffer = { + .validate = nv40_state_framebuffer_validate, + .dirty = { + .pipe = NV40_NEW_FB, + .hw = NV40_STATE_FB + } +}; -- cgit v1.2.3 From 22a0b85eaebf767f5b03bf899596e09f5cc03876 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 11:15:59 -0700 Subject: gallium: use pipe_texture_reference() in sp_tile_cache_set_texture() --- src/gallium/drivers/softpipe/sp_state_sampler.c | 2 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 5 +++-- src/gallium/drivers/softpipe/sp_tile_cache.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 460adccec4f..9246915e19e 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -84,7 +84,7 @@ softpipe_set_sampler_texture(struct pipe_context *pipe, assert(unit < PIPE_MAX_SAMPLERS); softpipe->texture[unit] = softpipe_texture(texture); /* ptr, not struct */ - sp_tile_cache_set_texture(softpipe->tex_cache[unit], texture); + sp_tile_cache_set_texture(pipe, softpipe->tex_cache[unit], texture); softpipe->dirty |= SP_NEW_TEXTURE; } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index dde3fabc81e..9ed3c5072d0 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -212,14 +212,15 @@ sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) * Specify the texture to cache. */ void -sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, +sp_tile_cache_set_texture(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, struct pipe_texture *texture) { uint i; assert(!tc->surface); - tc->texture = texture; + pipe_texture_reference(pipe, &tc->texture, texture); if (tc->tex_surf_map) { pipe_surface_unmap(tc->tex_surf); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 7fd10812863..2631e29a3a6 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -80,7 +80,8 @@ extern void sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc); extern void -sp_tile_cache_set_texture(struct softpipe_tile_cache *tc, +sp_tile_cache_set_texture(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, struct pipe_texture *texture); extern void -- cgit v1.2.3 From d5640a2dbdc4454d0405f2cd5b18fc49b1ca7694 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 13:24:52 -0700 Subject: gallium: new pipe->texture_update() function Called whenever texture data is changed (glTexImage, glTexSubImage, glCopyTexSubImage, etc). --- src/gallium/drivers/cell/ppu/cell_context.c | 1 + src/gallium/drivers/cell/ppu/cell_texture.c | 8 +++++ src/gallium/drivers/cell/ppu/cell_texture.h | 3 ++ src/gallium/drivers/failover/fo_context.c | 9 +++-- src/gallium/drivers/i915simple/i915_context.c | 1 + src/gallium/drivers/i915simple/i915_texture.c | 7 ++++ src/gallium/drivers/i915simple/i915_texture.h | 4 +++ src/gallium/drivers/i965simple/brw_context.c | 1 + src/gallium/drivers/i965simple/brw_tex_layout.c | 8 +++++ src/gallium/drivers/i965simple/brw_tex_layout.h | 3 ++ src/gallium/drivers/softpipe/sp_context.c | 1 + src/gallium/drivers/softpipe/sp_texture.c | 15 +++++++++ src/gallium/drivers/softpipe/sp_texture.h | 4 +++ src/gallium/drivers/softpipe/sp_tile_cache.c | 45 ++++++++++++++----------- src/gallium/include/pipe/p_context.h | 8 +++++ src/mesa/state_tracker/st_atom_texture.c | 14 +++++--- 16 files changed, 104 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index e1eb22f4685..b6ba14578c6 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -244,6 +244,7 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) /* textures */ cell->pipe.texture_create = cell_texture_create; cell->pipe.texture_release = cell_texture_release; + cell->pipe.texture_update = cell_texture_update; cell->pipe.get_tex_surface = cell_get_tex_surface; cell->pipe.set_sampler_texture = cell_set_sampler_texture; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index c8ef36002f7..4629eb13201 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -128,6 +128,14 @@ cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } +void +cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +{ + /* XXX TO DO: re-tile the texture data ... */ + +} + + /** * Called via pipe->get_tex_surface() */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 0264fed88e6..07e81582f40 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -67,6 +67,9 @@ cell_texture_create(struct pipe_context *pipe, extern void cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); +extern void +cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture); + extern struct pipe_surface * cell_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 7ce4a7df17e..156f7399b03 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -137,15 +137,14 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover_init_state_functions( failover ); -#if 0 - failover->pipe.surface_alloc = hw->surface_alloc; -#endif - failover->pipe.get_tex_surface = hw->get_tex_surface; - failover->pipe.surface_copy = hw->surface_copy; failover->pipe.surface_fill = hw->surface_fill; + failover->pipe.texture_create = hw->texture_create; failover->pipe.texture_release = hw->texture_release; + failover->pipe.texture_update = hw->texture_update; + failover->pipe.get_tex_surface = hw->get_tex_surface; + failover->pipe.flush = hw->flush; failover->dirty = 0; diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 7f71f8fd4f5..97773f12568 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -302,6 +302,7 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, i915->pipe.texture_create = i915_texture_create; i915->pipe.texture_release = i915_texture_release; + i915->pipe.texture_update = i915_texture_update; i915->dirty = ~0; i915->hardware_dirty = ~0; diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 6d37ae3d747..4ba76d19adc 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -534,3 +534,10 @@ i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } *pt = NULL; } + + +void +i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +{ + /* no-op? */ +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h index 330d111dc78..03129775520 100644 --- a/src/gallium/drivers/i915simple/i915_texture.h +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -14,4 +14,8 @@ extern void i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); +extern void +i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture); + + #endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 5e58701e91c..2e2380a8d6f 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -224,6 +224,7 @@ struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys, brw->pipe.clear = brw_clear; brw->pipe.texture_create = brw_texture_create; brw->pipe.texture_release = brw_texture_release; + brw->pipe.texture_update = brw_texture_update; brw_init_surface_functions(brw); brw_init_state_functions(brw); diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 90561f1307d..220591da9a4 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -351,3 +351,11 @@ brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } *pt = NULL; } + + +void +brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +{ + /* no-op? */ +} + diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h index cfd6b1ef3ae..7d118d0fa89 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.h +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -12,4 +12,7 @@ brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat extern void brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); +extern void +brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture); + #endif diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 254c6adca44..316020cba63 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -283,6 +283,7 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, /* textures */ softpipe->pipe.texture_create = softpipe_texture_create; softpipe->pipe.texture_release = softpipe_texture_release; + softpipe->pipe.texture_update = softpipe_texture_update; softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; /* diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 6de7a9b543b..8f31f05e478 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -39,6 +39,7 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" +#include "sp_tile_cache.h" /* Simple, maximally packed layout. @@ -128,6 +129,20 @@ softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } +void +softpipe_texture_update(struct pipe_context *pipe, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint unit; + for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { + if (softpipe->texture[unit] == texture) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); + } + } +} + + /** * Called via pipe->get_tex_surface() */ diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index fa646c0de9f..50fc1004274 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -62,6 +62,10 @@ softpipe_texture_create(struct pipe_context *pipe, extern void softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); +extern void +softpipe_texture_update(struct pipe_context *pipe, + struct pipe_texture *texture); + extern struct pipe_surface * softpipe_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 9ed3c5072d0..da30dd6c484 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -359,30 +359,37 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, struct pipe_surface *ps = tc->surface; int inuse = 0, pos; - if (!ps || !ps->buffer) - return; - - for (pos = 0; pos < NUM_ENTRIES; pos++) { - struct softpipe_cached_tile *tile = tc->entries + pos; - if (tile->x >= 0) { - if (tc->depth_stencil) { - pipe_put_tile_raw(pipe, ps, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, - tile->data.depth32, 0/*STRIDE*/); - } - else { - pipe_put_tile_rgba(pipe, ps, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, - (float *) tile->data.color); + if (ps && ps->buffer) { + /* caching a drawing surface */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + if (tile->x >= 0) { + if (tc->depth_stencil) { + pipe_put_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + tile->x = tile->y = -1; /* mark as empty */ + inuse++; } - tile->x = tile->y = -1; /* mark as empty */ - inuse++; } - } #if TILE_CLEAR_OPTIMIZATION - sp_tile_cache_flush_clear(&softpipe->pipe, tc); + sp_tile_cache_flush_clear(&softpipe->pipe, tc); #endif + } + else if (tc->texture) { + /* caching a texture, mark all entries as embpy */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].x = -1; + } + tc->tex_face = -1; + } #if 0 debug_printf("flushed tiles in use: %d\n", inuse); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 39f95695fb6..036c4c89649 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -206,6 +206,14 @@ struct pipe_context { void (*texture_release)(struct pipe_context *pipe, struct pipe_texture **pt); + /** + * Called when texture data is changed. + * Note: we could pass some hints about which mip levels or cube faces + * have changed... + */ + void (*texture_update)(struct pipe_context *pipe, + struct pipe_texture *texture); + /** Get a surface which is a "view" into a texture */ struct pipe_surface *(*get_tex_surface)(struct pipe_context *pipe, struct pipe_texture *texture, diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 9fead7e314d..a4ac7268160 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -67,14 +67,20 @@ update_textures(struct st_context *st) * this table before being deleted, otherwise the pointer * comparison below could fail. */ - if (st->state.sampler_texture[unit] != stObj || - (stObj && stObj->dirtyData)) { + if (st->state.sampler_texture[unit] != stObj) { struct pipe_texture *pt = st_get_stobj_texture(stObj); st->state.sampler_texture[unit] = stObj; st->pipe->set_sampler_texture(st->pipe, unit, pt); - if (stObj) - stObj->dirtyData = GL_FALSE; } + + stObj = st->state.sampler_texture[unit]; + + if (stObj && stObj->dirtyData) { + struct pipe_texture *pt = st_get_stobj_texture(stObj); + st->pipe->texture_update(st->pipe, pt); + stObj->dirtyData = GL_FALSE; + } + } } -- cgit v1.2.3 From 882a4b505484a50f1ccc2cf3ae0c3a52d4ec1be3 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:00:42 -0700 Subject: gallium: minor re-org of 915 surface/texture code --- src/gallium/drivers/i915simple/i915_context.c | 4 +- src/gallium/drivers/i915simple/i915_surface.c | 45 +-------------------- src/gallium/drivers/i915simple/i915_texture.c | 58 +++++++++++++++++++++++++-- src/gallium/drivers/i915simple/i915_texture.h | 37 ++++++++++++----- 4 files changed, 84 insertions(+), 60 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 97773f12568..acfa3494397 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -296,13 +296,11 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, i915_init_state_functions(i915); i915_init_flush_functions(i915); i915_init_string_functions(i915); + i915_init_texture_functions(i915); i915->pci_id = pci_id; i915->flags.is_i945 = is_i945; - i915->pipe.texture_create = i915_texture_create; - i915->pipe.texture_release = i915_texture_release; - i915->pipe.texture_update = i915_texture_update; i915->dirty = ~0; i915->hardware_dirty = ~0; diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 17fd27895a2..f4fbedbe9bc 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -36,48 +36,6 @@ #include "util/p_tile.h" -/* - * XXX note: same as code in sp_surface.c - */ -static struct pipe_surface * -i915_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct i915_texture *tex = (struct i915_texture *)pt; - struct pipe_surface *ps; - unsigned offset; /* in bytes */ - - offset = tex->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face] * pt->cpp; - } - else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice] * pt->cpp; - } - else { - assert(face == 0); - assert(zslice == 0); - } - - ps = pipe->winsys->surface_alloc(pipe->winsys); - if (ps) { - assert(ps->refcount); - assert(ps->winsys); - pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = tex->pitch; - ps->offset = offset; - } - return ps; -} - - - /* Assumes all values are within bounds -- no checking at this level - * do it higher up if required. */ @@ -115,6 +73,7 @@ i915_surface_copy(struct pipe_context *pipe, } } + /* Fill a rectangular sub-region. Need better logic about when to * push buffers into AGP - will currently do so whenever possible. */ @@ -184,8 +143,6 @@ i915_surface_fill(struct pipe_context *pipe, void i915_init_surface_functions(struct i915_context *i915) { - i915->pipe.get_tex_surface = i915_get_tex_surface; - i915->pipe.surface_copy = i915_surface_copy; i915->pipe.surface_fill = i915_surface_fill; } diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 4ba76d19adc..b235fae96d9 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -478,7 +478,7 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) } -struct pipe_texture * +static struct pipe_texture * i915_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) { @@ -506,7 +506,7 @@ i915_texture_create(struct pipe_context *pipe, } -void +static void i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) { if (!*pt) @@ -536,8 +536,60 @@ i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } -void +static void i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) { /* no-op? */ } + + +/* + * XXX note: same as code in sp_surface.c + */ +static struct pipe_surface * +i915_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct i915_texture *tex = (struct i915_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face] * pt->cpp; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice] * pt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = tex->pitch; + ps->offset = offset; + } + return ps; +} + + + +void +i915_init_texture_functions(struct i915_context *i915) +{ + i915->pipe.texture_create = i915_texture_create; + i915->pipe.texture_release = i915_texture_release; + i915->pipe.texture_update = i915_texture_update; + i915->pipe.get_tex_surface = i915_get_tex_surface; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h index 03129775520..6d8d41178f7 100644 --- a/src/gallium/drivers/i915simple/i915_texture.h +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -1,21 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef I915_TEXTURE_H #define I915_TEXTURE_H struct pipe_context; -struct pipe_texture; - - -struct pipe_texture * -i915_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat); - -extern void -i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); extern void -i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture); +i915_init_texture_functions(struct i915_context *i915); #endif /* I915_TEXTURE_H */ -- cgit v1.2.3 From 4eae65c8e052976a130564560699e60e1a3a9cc3 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:04:05 -0700 Subject: gallium: re-org of i965 texture/surface code, functions --- src/gallium/drivers/i965simple/brw_context.c | 4 +- src/gallium/drivers/i965simple/brw_surface.c | 43 +----------------- src/gallium/drivers/i965simple/brw_tex_layout.c | 60 +++++++++++++++++++++++-- src/gallium/drivers/i965simple/brw_tex_layout.h | 10 +---- 4 files changed, 60 insertions(+), 57 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 2e2380a8d6f..6fb840708e5 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -222,11 +222,9 @@ struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys, brw->pipe.get_param = brw_get_param; brw->pipe.get_paramf = brw_get_paramf; brw->pipe.clear = brw_clear; - brw->pipe.texture_create = brw_texture_create; - brw->pipe.texture_release = brw_texture_release; - brw->pipe.texture_update = brw_texture_update; brw_init_surface_functions(brw); + brw_init_texture_functions(brw); brw_init_state_functions(brw); brw_init_flush_functions(brw); brw_init_string_functions(brw); diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 376a42b1a6a..dc4846d39fc 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -35,47 +35,6 @@ #include "util/p_tile.h" -/* - * XXX note: same as code in sp_surface.c - */ -static struct pipe_surface * -brw_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct brw_texture *tex = (struct brw_texture *)pt; - struct pipe_surface *ps; - unsigned offset; /* in bytes */ - - offset = tex->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face] * pt->cpp; - } - else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice] * pt->cpp; - } - else { - assert(face == 0); - assert(zslice == 0); - } - - ps = pipe->winsys->surface_alloc(pipe->winsys); - if (ps) { - assert(ps->format); - assert(ps->refcount); - pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = tex->pitch; - ps->offset = offset; - } - return ps; -} - - /* Upload data to a rectangular sub-region. Lots of choices how to do this: * * - memcpy by span to current destination @@ -201,10 +160,10 @@ brw_surface_fill(struct pipe_context *pipe, } } + void brw_init_surface_functions(struct brw_context *brw) { - brw->pipe.get_tex_surface = brw_get_tex_surface; brw->pipe.surface_copy = brw_surface_copy; brw->pipe.surface_fill = brw_surface_fill; } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 220591da9a4..043a2ff9a45 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -300,8 +300,9 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture } -struct pipe_texture * -brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +static struct pipe_texture * +brw_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) { struct brw_texture *tex = CALLOC_STRUCT(brw_texture); @@ -323,7 +324,8 @@ brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat return &tex->base; } -void + +static void brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) { if (!*pt) @@ -353,9 +355,59 @@ brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } -void +static void brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) { /* no-op? */ } + +/* + * XXX note: same as code in sp_surface.c + */ +static struct pipe_surface * +brw_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct brw_texture *tex = (struct brw_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face] * pt->cpp; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice] * pt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->format); + assert(ps->refcount); + pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = tex->pitch; + ps->offset = offset; + } + return ps; +} + + +void +brw_init_texture_functions(struct brw_context *brw) +{ + brw->pipe.texture_create = brw_texture_create; + brw->pipe.texture_release = brw_texture_release; + brw->pipe.texture_update = brw_texture_update; + brw->pipe.get_tex_surface = brw_get_tex_surface; +} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h index 7d118d0fa89..ed49baeef80 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.h +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -1,18 +1,12 @@ #ifndef BRW_TEX_LAYOUT_H #define BRW_TEX_LAYOUT_H -#include "pipe/p_compiler.h" -struct pipe_context; -struct pipe_texture; +struct brw_context; -extern struct pipe_texture * -brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat); extern void -brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); +brw_init_texture_functions(struct brw_context *brw); -extern void -brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture); #endif -- cgit v1.2.3 From 9171e63f414866aef155b17d3c85c9a236a872d6 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:21:45 -0700 Subject: cell: put most simple state-setter functions in new cell_pipe_state.c file Also, re-org of texture/surface functions. --- src/gallium/drivers/cell/ppu/Makefile | 6 +- src/gallium/drivers/cell/ppu/cell_context.c | 35 +-- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 323 +++++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_pipe_state.h | 39 +++ src/gallium/drivers/cell/ppu/cell_state.h | 69 +----- src/gallium/drivers/cell/ppu/cell_texture.c | 21 +- src/gallium/drivers/cell/ppu/cell_texture.h | 20 +- 7 files changed, 389 insertions(+), 124 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_pipe_state.c create mode 100644 src/gallium/drivers/cell/ppu/cell_pipe_state.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 3c3f622a2fc..6b6dfca890e 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -25,14 +25,10 @@ SOURCES = \ cell_context.c \ cell_draw_arrays.c \ cell_flush.c \ - cell_state_blend.c \ - cell_state_clip.c \ cell_state_derived.c \ cell_state_emit.c \ cell_state_fs.c \ - cell_state_rasterizer.c \ - cell_state_sampler.c \ - cell_state_surface.c \ + cell_pipe_state.c \ cell_state_vertex.c \ cell_spu.c \ cell_surface.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index b6ba14578c6..9f0ecb2be8b 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -48,6 +48,7 @@ #include "cell_state.h" #include "cell_surface.h" #include "cell_spu.h" +#include "cell_pipe_state.h" #include "cell_texture.h" #include "cell_vbuf.h" @@ -198,22 +199,6 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) /* state setters */ - cell->pipe.create_blend_state = cell_create_blend_state; - cell->pipe.bind_blend_state = cell_bind_blend_state; - cell->pipe.delete_blend_state = cell_delete_blend_state; - - cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_sampler_state = cell_bind_sampler_state; - cell->pipe.delete_sampler_state = cell_delete_sampler_state; - - cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; - cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; - cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; - - cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; - cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; - cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; - cell->pipe.create_fs_state = cell_create_fs_state; cell->pipe.bind_fs_state = cell_bind_fs_state; cell->pipe.delete_fs_state = cell_delete_fs_state; @@ -222,16 +207,8 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) cell->pipe.bind_vs_state = cell_bind_vs_state; cell->pipe.delete_vs_state = cell_delete_vs_state; - cell->pipe.set_blend_color = cell_set_blend_color; - cell->pipe.set_clip_state = cell_set_clip_state; cell->pipe.set_constant_buffer = cell_set_constant_buffer; - cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; - - cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; - cell->pipe.set_scissor_state = cell_set_scissor_state; - cell->pipe.set_viewport_state = cell_set_viewport_state; - cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; cell->pipe.set_vertex_element = cell_set_vertex_element; @@ -241,21 +218,15 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) cell->pipe.clear = cell_clear_surface; cell->pipe.flush = cell_flush; - /* textures */ - cell->pipe.texture_create = cell_texture_create; - cell->pipe.texture_release = cell_texture_release; - cell->pipe.texture_update = cell_texture_update; - cell->pipe.get_tex_surface = cell_get_tex_surface; - - cell->pipe.set_sampler_texture = cell_set_sampler_texture; - #if 0 cell->pipe.begin_query = cell_begin_query; cell->pipe.end_query = cell_end_query; cell->pipe.wait_query = cell_wait_query; #endif + cell_init_state_functions(cell); cell_init_surface_functions(cell); + cell_init_texture_functions(cell); cell->draw = cell_draw_create(cell); diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c new file mode 100644 index 00000000000..aef50725e87 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -0,0 +1,323 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + + +static void * +cell_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + + +static void +cell_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend = (const struct pipe_blend_state *)blend; + + cell->dirty |= CELL_NEW_BLEND; +} + + +static void +cell_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + + +static void +cell_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend_color = *blend_color; + + cell->dirty |= CELL_NEW_BLEND; +} + + + + +static void * +cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + + +static void +cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->depth_stencil + = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; + + cell->dirty |= CELL_NEW_DEPTH_STENCIL; +} + + +static void +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) +{ + FREE(depth); +} + + +static void cell_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(cell->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void +cell_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct cell_context *cell = cell_context(pipe); + + cell->viewport = *viewport; /* struct copy */ + cell->dirty |= CELL_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(cell->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +static void +cell_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->scissor, scissor, sizeof(*scissor) ); + cell->dirty |= CELL_NEW_SCISSOR; +} + + +static void +cell_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); + cell->dirty |= CELL_NEW_STIPPLE; +} + + + +static void * +cell_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *setup) +{ + struct pipe_rasterizer_state *state + = MALLOC(sizeof(struct pipe_rasterizer_state)); + memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); + return state; +} + + +static void +cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(cell->draw, setup); + + cell->rasterizer = (struct pipe_rasterizer_state *)setup; + + cell->dirty |= CELL_NEW_RASTERIZER; +} + + +static void +cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} + + + +static void * +cell_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + + +static void +cell_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + cell->sampler[unit] = (struct pipe_sampler_state *)sampler; + + cell->dirty |= CELL_NEW_SAMPLER; +} + + +static void +cell_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + +static void +cell_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->texture[sampler] = cell_texture(texture); + + cell_update_texture_mapping(cell); + + cell->dirty |= CELL_NEW_TEXTURE; +} + + + +static void +cell_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct cell_context *cell = cell_context(pipe); + + if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { + struct pipe_surface *csurf = fb->cbufs[0]; + struct pipe_surface *zsurf = fb->zsbuf; + uint i; + + /* unmap old surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { + pipe_surface_unmap(cell->framebuffer.cbufs[i]); + cell->cbuf_map[i] = NULL; + } + } + + if (cell->framebuffer.zsbuf && cell->zsbuf_map) { + pipe_surface_unmap(cell->framebuffer.zsbuf); + cell->zsbuf_map = NULL; + } + + /* update my state */ + cell->framebuffer = *fb; + + /* map new surfaces */ + if (csurf) + cell->cbuf_map[0] = pipe_surface_map(csurf); + + if (zsurf) + cell->zsbuf_map = pipe_surface_map(zsurf); + + cell->dirty |= CELL_NEW_FRAMEBUFFER; + } +} + + + +void +cell_init_state_functions(struct cell_context *cell) +{ + cell->pipe.create_blend_state = cell_create_blend_state; + cell->pipe.bind_blend_state = cell_bind_blend_state; + cell->pipe.delete_blend_state = cell_delete_blend_state; + + cell->pipe.create_sampler_state = cell_create_sampler_state; + cell->pipe.bind_sampler_state = cell_bind_sampler_state; + cell->pipe.delete_sampler_state = cell_delete_sampler_state; + + cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; + cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; + cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; + + cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; + cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; + cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; + + cell->pipe.set_blend_color = cell_set_blend_color; + cell->pipe.set_clip_state = cell_set_clip_state; + cell->pipe.set_constant_buffer = cell_set_constant_buffer; + + cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; + + cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; + cell->pipe.set_scissor_state = cell_set_scissor_state; + cell->pipe.set_viewport_state = cell_set_viewport_state; +} + diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h new file mode 100644 index 00000000000..1889bd52ff5 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_PIPE_STATE_H +#define CELL_PIPE_STATE_H + + +struct cell_context; + +extern void +cell_init_state_functions(struct cell_context *cell); + + +#endif /* CELL_PIPE_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index 3a71ba14fa8..2af7770ebbb 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -23,42 +23,6 @@ -extern void -cell_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - - - -extern void * -cell_create_blend_state(struct pipe_context *, const struct pipe_blend_state *); -extern void cell_bind_blend_state(struct pipe_context *, void *); -extern void cell_delete_blend_state(struct pipe_context *, void *); - -extern void cell_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - - -void * -cell_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); - -extern void -cell_bind_sampler_state(struct pipe_context *, unsigned, void *); - -extern void -cell_delete_sampler_state(struct pipe_context *, void *); - - -extern void * -cell_create_depth_stencil_alpha_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); - -extern void -cell_bind_depth_stencil_alpha_state(struct pipe_context *, void *); - -extern void -cell_delete_depth_stencil_alpha_state(struct pipe_context *, void *); - void *cell_create_fs_state(struct pipe_context *, const struct pipe_shader_state *); @@ -69,34 +33,11 @@ void *cell_create_vs_state(struct pipe_context *, void cell_bind_vs_state(struct pipe_context *, void *); void cell_delete_vs_state(struct pipe_context *, void *); - -void * -cell_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void cell_bind_rasterizer_state(struct pipe_context *, void *); -void cell_delete_rasterizer_state(struct pipe_context *, void *); - - -void cell_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void cell_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf); - -void cell_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - void -cell_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture); - -void cell_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); +cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf); -void cell_set_texture_state( struct pipe_context *, - unsigned unit, struct pipe_texture * ); void cell_set_vertex_element(struct pipe_context *, unsigned index, @@ -106,10 +47,6 @@ void cell_set_vertex_buffer(struct pipe_context *, unsigned index, const struct pipe_vertex_buffer *); -void cell_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - - void cell_update_derived( struct cell_context *softpipe ); #endif diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 4629eb13201..e1b91075b2c 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -79,8 +79,9 @@ cell_texture_layout(struct cell_texture * spt) } -struct pipe_texture * -cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +static struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) { struct cell_texture *spt = CALLOC_STRUCT(cell_texture); if (!spt) @@ -103,7 +104,7 @@ cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templa } -void +static void cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) { if (!*pt) @@ -128,7 +129,7 @@ cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } -void +static void cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) { /* XXX TO DO: re-tile the texture data ... */ @@ -139,7 +140,7 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) /** * Called via pipe->get_tex_surface() */ -struct pipe_surface * +static struct pipe_surface * cell_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) @@ -258,3 +259,13 @@ cell_update_texture_mapping(struct cell_context *cell) cell->tex_map = pipe_surface_map(cell->tex_surf); #endif } + + +void +cell_init_texture_functions(struct cell_context *cell) +{ + cell->pipe.texture_create = cell_texture_create; + cell->pipe.texture_release = cell_texture_release; + cell->pipe.texture_update = cell_texture_update; + cell->pipe.get_tex_surface = cell_get_tex_surface; +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 07e81582f40..824fb3e20fc 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -29,7 +29,7 @@ #define CELL_TEXTURE_H -struct pipe_context; +struct cell_context; struct pipe_texture; @@ -60,24 +60,12 @@ cell_texture(struct pipe_texture *pt) -extern struct pipe_texture * -cell_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat); - -extern void -cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); - extern void -cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture); - -extern struct pipe_surface * -cell_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice); +cell_update_texture_mapping(struct cell_context *cell); extern void -cell_update_texture_mapping(struct cell_context *cell); +cell_init_texture_functions(struct cell_context *cell); -#endif /* CELL_TEXTURE */ +#endif /* CELL_TEXTURE_H */ -- cgit v1.2.3 From acd2253ae80d133bc84a5e78909ec72464e3f901 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:24:46 -0700 Subject: Remove obsolete files replaced by cell_pipe_state.c --- src/gallium/drivers/cell/ppu/cell_state_blend.c | 109 --------------------- src/gallium/drivers/cell/ppu/cell_state_clip.c | 84 ---------------- .../drivers/cell/ppu/cell_state_rasterizer.c | 106 -------------------- src/gallium/drivers/cell/ppu/cell_state_sampler.c | 84 ---------------- src/gallium/drivers/cell/ppu/cell_state_surface.c | 71 -------------- 5 files changed, 454 deletions(-) delete mode 100644 src/gallium/drivers/cell/ppu/cell_state_blend.c delete mode 100644 src/gallium/drivers/cell/ppu/cell_state_clip.c delete mode 100644 src/gallium/drivers/cell/ppu/cell_state_rasterizer.c delete mode 100644 src/gallium/drivers/cell/ppu/cell_state_sampler.c delete mode 100644 src/gallium/drivers/cell/ppu/cell_state_surface.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c deleted file mode 100644 index b6d6d71f0ce..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_blend.c +++ /dev/null @@ -1,109 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_state.h" - - - -void * -cell_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - return mem_dup(blend, sizeof(*blend)); -} - - -void -cell_bind_blend_state(struct pipe_context *pipe, void *blend) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend = (const struct pipe_blend_state *)blend; - - cell->dirty |= CELL_NEW_BLEND; -} - - -void -cell_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - FREE(blend); -} - - -void -cell_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *blend_color) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend_color = *blend_color; - - cell->dirty |= CELL_NEW_BLEND; -} - - - - -void * -cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - return mem_dup(depth_stencil, sizeof(*depth_stencil)); -} - - -void -cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->depth_stencil - = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; - - cell->dirty |= CELL_NEW_DEPTH_STENCIL; -} - - -void -cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) -{ - FREE(depth); -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c deleted file mode 100644 index 0482f87e889..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_clip.c +++ /dev/null @@ -1,84 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "cell_context.h" -#include "cell_state.h" -#include "draw/draw_context.h" - - -void cell_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct cell_context *cell = cell_context(pipe); - - /* pass the clip state to the draw module */ - draw_set_clip_state(cell->draw, clip); -} - - - -/* Called when driver state tracker notices changes to the viewport - * matrix: - */ -void cell_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct cell_context *cell = cell_context(pipe); - - cell->viewport = *viewport; /* struct copy */ - cell->dirty |= CELL_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - draw_set_viewport_state(cell->draw, viewport); - - /* Using tnl/ and vf/ modules is temporary while getting started. - * Full pipe will have vertex shader, vertex fetch of its own. - */ -} - - -void cell_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->scissor, scissor, sizeof(*scissor) ); - cell->dirty |= CELL_NEW_SCISSOR; -} - - -void cell_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); - cell->dirty |= CELL_NEW_STIPPLE; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c deleted file mode 100644 index 7eca5b57656..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c +++ /dev/null @@ -1,106 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_state.h" - - - -struct spu_rasterizer_state -{ - unsigned flatshade:1; -#if 0 - unsigned light_twoside:1; - unsigned front_winding:2; /**< PIPE_WINDING_x */ - unsigned cull_mode:2; /**< PIPE_WINDING_x */ - unsigned fill_cw:2; /**< PIPE_POLYGON_MODE_x */ - unsigned fill_ccw:2; /**< PIPE_POLYGON_MODE_x */ - unsigned offset_cw:1; - unsigned offset_ccw:1; -#endif - unsigned scissor:1; - unsigned poly_smooth:1; - unsigned poly_stipple_enable:1; - unsigned point_smooth:1; -#if 0 - unsigned point_sprite:1; - unsigned point_size_per_vertex:1; /**< size computed in vertex shader */ -#endif - unsigned multisample:1; /* XXX maybe more ms state in future */ - unsigned line_smooth:1; - unsigned line_stipple_enable:1; - unsigned line_stipple_factor:8; /**< [1..256] actually */ - unsigned line_stipple_pattern:16; -#if 0 - unsigned bypass_clipping:1; -#endif - unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ - - float line_width; - float point_size; /**< used when no per-vertex size */ -#if 0 - float offset_units; - float offset_scale; - ubyte sprite_coord_mode[PIPE_MAX_SHADER_OUTPUTS]; /**< PIPE_SPRITE_COORD_ */ -#endif -}; - - - -void * -cell_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *setup) -{ - struct pipe_rasterizer_state *state - = MALLOC(sizeof(struct pipe_rasterizer_state)); - memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); - return state; -} - - -void -cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) -{ - struct cell_context *cell = cell_context(pipe); - - /* pass-through to draw module */ - draw_set_rasterizer_state(cell->draw, setup); - - cell->rasterizer = (struct pipe_rasterizer_state *)setup; - - cell->dirty |= CELL_NEW_RASTERIZER; -} - - -void -cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) -{ - FREE(rasterizer); -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_state_sampler.c deleted file mode 100644 index a33421a4ad0..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_sampler.c +++ /dev/null @@ -1,84 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: - * Brian Paul - */ - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_state.h" -#include "cell_texture.h" - - -void * -cell_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - return mem_dup(sampler, sizeof(*sampler)); -} - -void -cell_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - assert(unit < PIPE_MAX_SAMPLERS); - cell->sampler[unit] = (struct pipe_sampler_state *)sampler; - - cell->dirty |= CELL_NEW_SAMPLER; -} - - -void -cell_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - FREE( sampler ); -} - - - -void -cell_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->texture[sampler] = texture; - - cell_update_texture_mapping(cell); - - cell->dirty |= CELL_NEW_TEXTURE; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_surface.c b/src/gallium/drivers/cell/ppu/cell_state_surface.c deleted file mode 100644 index 287610b76b9..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_surface.c +++ /dev/null @@ -1,71 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "pipe/p_inlines.h" -#include "cell_context.h" -#include "cell_state.h" - - -void -cell_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct cell_context *cell = cell_context(pipe); - - if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { - struct pipe_surface *csurf = fb->cbufs[0]; - struct pipe_surface *zsurf = fb->zsbuf; - uint i; - - /* unmap old surfaces */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { - pipe_surface_unmap(cell->framebuffer.cbufs[i]); - cell->cbuf_map[i] = NULL; - } - } - - if (cell->framebuffer.zsbuf && cell->zsbuf_map) { - pipe_surface_unmap(cell->framebuffer.zsbuf); - cell->zsbuf_map = NULL; - } - - /* update my state */ - cell->framebuffer = *fb; - - /* map new surfaces */ - if (csurf) - cell->cbuf_map[0] = pipe_surface_map(csurf); - - if (zsurf) - cell->zsbuf_map = pipe_surface_map(zsurf); - - cell->dirty |= CELL_NEW_FRAMEBUFFER; - } -} - -- cgit v1.2.3 From f6e1654e22d983ec9015fe4e7445e03df1227c71 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:25:07 -0700 Subject: cell: plug in cell_set_sampler_texture --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index aef50725e87..c5ddf6a09ea 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -302,6 +302,8 @@ cell_init_state_functions(struct cell_context *cell) cell->pipe.bind_sampler_state = cell_bind_sampler_state; cell->pipe.delete_sampler_state = cell_delete_sampler_state; + cell->pipe.set_sampler_texture = cell_set_sampler_texture; + cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; -- cgit v1.2.3 From 9e57e70b42471ff587441fb8c1b5de728521fafd Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:27:08 -0700 Subject: cell: #includes to silence warnings --- src/gallium/drivers/cell/ppu/cell_clear.c | 1 + src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index e588a30d5bc..3ffe09add66 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -41,6 +41,7 @@ #include "cell_batch.h" #include "cell_flush.h" #include "cell_spu.h" +#include "cell_state.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 64c7821c193..f7ef72e5a2c 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -35,6 +35,7 @@ #include "cell_context.h" #include "cell_draw_arrays.h" +#include "cell_flush.h" #include "cell_spu.h" #include "cell_batch.h" -- cgit v1.2.3 From 64683473753de6eb978245e348e9b20cd1d42883 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:30:50 -0700 Subject: cell: init shader-related functions in cell_init_shader_functions() --- src/gallium/drivers/cell/ppu/cell_context.c | 11 +----- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 1 - src/gallium/drivers/cell/ppu/cell_state.h | 50 ++++++++++++++++---------- src/gallium/drivers/cell/ppu/cell_state_fs.c | 29 +++++++++++---- 4 files changed, 55 insertions(+), 36 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 9f0ecb2be8b..98c314f45c4 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -199,16 +199,6 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) /* state setters */ - cell->pipe.create_fs_state = cell_create_fs_state; - cell->pipe.bind_fs_state = cell_bind_fs_state; - cell->pipe.delete_fs_state = cell_delete_fs_state; - - cell->pipe.create_vs_state = cell_create_vs_state; - cell->pipe.bind_vs_state = cell_bind_vs_state; - cell->pipe.delete_vs_state = cell_delete_vs_state; - - cell->pipe.set_constant_buffer = cell_set_constant_buffer; - cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; cell->pipe.set_vertex_element = cell_set_vertex_element; @@ -225,6 +215,7 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) #endif cell_init_state_functions(cell); + cell_init_shader_functions(cell); cell_init_surface_functions(cell); cell_init_texture_functions(cell); diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index c5ddf6a09ea..35e88f79b12 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -314,7 +314,6 @@ cell_init_state_functions(struct cell_context *cell) cell->pipe.set_blend_color = cell_set_blend_color; cell->pipe.set_clip_state = cell_set_clip_state; - cell->pipe.set_constant_buffer = cell_set_constant_buffer; cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index 2af7770ebbb..31ce505e211 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -1,3 +1,29 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef CELL_STATE_H @@ -22,23 +48,6 @@ #define CELL_NEW_VERTEX_INFO 0x8000 - - -void *cell_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void cell_bind_fs_state(struct pipe_context *, void *); -void cell_delete_fs_state(struct pipe_context *, void *); -void *cell_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void cell_bind_vs_state(struct pipe_context *, void *); -void cell_delete_vs_state(struct pipe_context *, void *); - -void -cell_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf); - - void cell_set_vertex_element(struct pipe_context *, unsigned index, const struct pipe_vertex_element *); @@ -49,4 +58,9 @@ void cell_set_vertex_buffer(struct pipe_context *, void cell_update_derived( struct cell_context *softpipe ); -#endif + +void +cell_init_shader_functions(struct cell_context *cell); + +#endif /* CELL_STATE_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c index f3958fa429e..935501441b1 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_fs.c +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -41,7 +41,7 @@ #include "cell_state.h" -void * +static void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -80,7 +80,7 @@ cell_create_fs_state(struct pipe_context *pipe, } -void +static void cell_bind_fs_state(struct pipe_context *pipe, void *fs) { struct cell_context *cell = cell_context(pipe); @@ -91,7 +91,7 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) } -void +static void cell_delete_fs_state(struct pipe_context *pipe, void *fs) { struct cell_fragment_shader_state *state = @@ -101,7 +101,7 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs) } -void * +static void * cell_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { @@ -124,7 +124,7 @@ cell_create_vs_state(struct pipe_context *pipe, } -void +static void cell_bind_vs_state(struct pipe_context *pipe, void *vs) { struct cell_context *cell = cell_context(pipe); @@ -137,7 +137,7 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) } -void +static void cell_delete_vs_state(struct pipe_context *pipe, void *vs) { struct cell_context *cell = cell_context(pipe); @@ -150,7 +150,7 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) } -void +static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, const struct pipe_constant_buffer *buf) @@ -169,3 +169,18 @@ cell_set_constant_buffer(struct pipe_context *pipe, cell->dirty |= CELL_NEW_CONSTANTS; } + + +void +cell_init_shader_functions(struct cell_context *cell) +{ + cell->pipe.create_fs_state = cell_create_fs_state; + cell->pipe.bind_fs_state = cell_bind_fs_state; + cell->pipe.delete_fs_state = cell_delete_fs_state; + + cell->pipe.create_vs_state = cell_create_vs_state; + cell->pipe.bind_vs_state = cell_bind_vs_state; + cell->pipe.delete_vs_state = cell_delete_vs_state; + + cell->pipe.set_constant_buffer = cell_set_constant_buffer; +} -- cgit v1.2.3 From fd4bdd020a9e1999b87d553b50151405c054a619 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:32:43 -0700 Subject: cell: rename cell_state_fs.c -> cell_state_shader.c --- src/gallium/drivers/cell/ppu/Makefile | 2 +- src/gallium/drivers/cell/ppu/cell_state_fs.c | 186 ----------------------- src/gallium/drivers/cell/ppu/cell_state_shader.c | 186 +++++++++++++++++++++++ 3 files changed, 187 insertions(+), 187 deletions(-) delete mode 100644 src/gallium/drivers/cell/ppu/cell_state_fs.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_shader.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 6b6dfca890e..164dde762c0 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -27,7 +27,7 @@ SOURCES = \ cell_flush.c \ cell_state_derived.c \ cell_state_emit.c \ - cell_state_fs.c \ + cell_state_shader.c \ cell_pipe_state.c \ cell_state_vertex.c \ cell_spu.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c deleted file mode 100644 index 935501441b1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_fs.c +++ /dev/null @@ -1,186 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" -#include "draw/draw_context.h" -#if 0 -#include "pipe/p_shader_tokens.h" -#include "gallivm/gallivm.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/exec/tgsi_sse2.h" -#endif - -#include "cell_context.h" -#include "cell_state.h" - - -static void * -cell_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - /*struct cell_context *cell = cell_context(pipe);*/ - struct cell_fragment_shader_state *state; - - state = CALLOC_STRUCT(cell_fragment_shader_state); - if (!state) - return NULL; - - state->shader = *templ; - -#if 0 - if (cell->dump_fs) { - tgsi_dump(state->shader.tokens, 0); - } - -#if defined(__i386__) || defined(__386__) - if (cell->use_sse) { - x86_init_func( &state->sse2_program ); - tgsi_emit_sse2_fs( state->shader.tokens, &state->sse2_program ); - } -#endif - -#ifdef MESA_LLVM - state->llvm_prog = 0; - if (!gallivm_global_cpu_engine()) { - gallivm_cpu_engine_create(state->llvm_prog); - } - else - gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); -#endif -#endif - - return state; -} - - -static void -cell_bind_fs_state(struct pipe_context *pipe, void *fs) -{ - struct cell_context *cell = cell_context(pipe); - - cell->fs = (struct cell_fragment_shader_state *) fs; - - cell->dirty |= CELL_NEW_FS; -} - - -static void -cell_delete_fs_state(struct pipe_context *pipe, void *fs) -{ - struct cell_fragment_shader_state *state = - (struct cell_fragment_shader_state *) fs; - - FREE( state ); -} - - -static void * -cell_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *state; - - state = CALLOC_STRUCT(cell_vertex_shader_state); - if (!state) - return NULL; - - state->shader = *templ; - - state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader); - if (state->draw_data == NULL) { - FREE( state ); - return NULL; - } - - return state; -} - - -static void -cell_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct cell_context *cell = cell_context(pipe); - - cell->vs = (const struct cell_vertex_shader_state *) vs; - - draw_bind_vertex_shader(cell->draw, cell->vs->draw_data); - - cell->dirty |= CELL_NEW_VS; -} - - -static void -cell_delete_vs_state(struct pipe_context *pipe, void *vs) -{ - struct cell_context *cell = cell_context(pipe); - - struct cell_vertex_shader_state *state = - (struct cell_vertex_shader_state *) vs; - - draw_delete_vertex_shader(cell->draw, state->draw_data); - FREE( state ); -} - - -static void -cell_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct cell_context *cell = cell_context(pipe); - struct pipe_winsys *ws = pipe->winsys; - - assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); - - /* note: reference counting */ - pipe_buffer_reference(ws, - &cell->constants[shader].buffer, - buf->buffer); - cell->constants[shader].size = buf->size; - - cell->dirty |= CELL_NEW_CONSTANTS; -} - - -void -cell_init_shader_functions(struct cell_context *cell) -{ - cell->pipe.create_fs_state = cell_create_fs_state; - cell->pipe.bind_fs_state = cell_bind_fs_state; - cell->pipe.delete_fs_state = cell_delete_fs_state; - - cell->pipe.create_vs_state = cell_create_vs_state; - cell->pipe.bind_vs_state = cell_bind_vs_state; - cell->pipe.delete_vs_state = cell_delete_vs_state; - - cell->pipe.set_constant_buffer = cell_set_constant_buffer; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c new file mode 100644 index 00000000000..935501441b1 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -0,0 +1,186 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "draw/draw_context.h" +#if 0 +#include "pipe/p_shader_tokens.h" +#include "gallivm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" +#endif + +#include "cell_context.h" +#include "cell_state.h" + + +static void * +cell_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + /*struct cell_context *cell = cell_context(pipe);*/ + struct cell_fragment_shader_state *state; + + state = CALLOC_STRUCT(cell_fragment_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + +#if 0 + if (cell->dump_fs) { + tgsi_dump(state->shader.tokens, 0); + } + +#if defined(__i386__) || defined(__386__) + if (cell->use_sse) { + x86_init_func( &state->sse2_program ); + tgsi_emit_sse2_fs( state->shader.tokens, &state->sse2_program ); + } +#endif + +#ifdef MESA_LLVM + state->llvm_prog = 0; + if (!gallivm_global_cpu_engine()) { + gallivm_cpu_engine_create(state->llvm_prog); + } + else + gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); +#endif +#endif + + return state; +} + + +static void +cell_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->fs = (struct cell_fragment_shader_state *) fs; + + cell->dirty |= CELL_NEW_FS; +} + + +static void +cell_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_fragment_shader_state *state = + (struct cell_fragment_shader_state *) fs; + + FREE( state ); +} + + +static void * +cell_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *state; + + state = CALLOC_STRUCT(cell_vertex_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + + state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader); + if (state->draw_data == NULL) { + FREE( state ); + return NULL; + } + + return state; +} + + +static void +cell_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->vs = (const struct cell_vertex_shader_state *) vs; + + draw_bind_vertex_shader(cell->draw, cell->vs->draw_data); + + cell->dirty |= CELL_NEW_VS; +} + + +static void +cell_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + struct cell_vertex_shader_state *state = + (struct cell_vertex_shader_state *) vs; + + draw_delete_vertex_shader(cell->draw, state->draw_data); + FREE( state ); +} + + +static void +cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* note: reference counting */ + pipe_buffer_reference(ws, + &cell->constants[shader].buffer, + buf->buffer); + cell->constants[shader].size = buf->size; + + cell->dirty |= CELL_NEW_CONSTANTS; +} + + +void +cell_init_shader_functions(struct cell_context *cell) +{ + cell->pipe.create_fs_state = cell_create_fs_state; + cell->pipe.bind_fs_state = cell_bind_fs_state; + cell->pipe.delete_fs_state = cell_delete_fs_state; + + cell->pipe.create_vs_state = cell_create_vs_state; + cell->pipe.bind_vs_state = cell_bind_vs_state; + cell->pipe.delete_vs_state = cell_delete_vs_state; + + cell->pipe.set_constant_buffer = cell_set_constant_buffer; +} -- cgit v1.2.3 From fce61f341faf6a2e1a8497ab963985ddbffa8b0a Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 14:44:15 -0700 Subject: gallium: fix bad ptr comparison --- src/gallium/drivers/softpipe/sp_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 8f31f05e478..295704c05f8 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -136,7 +136,7 @@ softpipe_texture_update(struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context(pipe); uint unit; for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (softpipe->texture[unit] == texture) { + if (softpipe->texture[unit] == softpipe_texture(texture)) { sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); } } -- cgit v1.2.3 From 25ea1901b44107a5bc5351487e18d52d75df8ffd Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 15:09:27 -0700 Subject: gallium: replace some ordinary assignments with pipe_reference_texture() This fixes at least one instance of dereferencing an invalid texture pointer. --- src/gallium/drivers/cell/ppu/cell_texture.c | 1 + src/gallium/drivers/i915simple/i915_state.c | 5 ++++- src/gallium/drivers/i915simple/i915_texture.c | 1 + src/gallium/drivers/i965simple/brw_state.c | 4 +++- src/gallium/drivers/i965simple/brw_tex_layout.c | 1 + src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 3 ++- src/gallium/drivers/softpipe/sp_texture.c | 5 ++++- src/mesa/state_tracker/st_texture.c | 9 ++++++--- 10 files changed, 24 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index e1b91075b2c..0edefa5f056 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -88,6 +88,7 @@ cell_texture_create(struct pipe_context *pipe, return NULL; spt->base = *templat; + spt->base.refcount = 1; cell_texture_layout(spt); diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 294e6fad035..e055eed7e02 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -32,6 +32,7 @@ #include "draw/draw_context.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "i915_context.h" #include "i915_reg.h" @@ -505,7 +506,9 @@ static void i915_set_sampler_texture(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); - i915->texture[sampler] = (struct i915_texture*)texture; /* ptr, not struct */ + pipe_texture_reference(pipe, + (struct pipe_texture **) &i915->texture[sampler], + texture); i915->dirty |= I915_NEW_TEXTURE; } diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index b235fae96d9..1b415a94d40 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -488,6 +488,7 @@ i915_texture_create(struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); tex->base = *templat; + tex->base.refcount = 1; if (i915->flags.is_i945 ? i945_miptree_layout(pipe, tex) : i915_miptree_layout(pipe, tex)) diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index f746d1cc57c..f269b2882ca 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -327,7 +327,9 @@ static void brw_set_sampler_texture(struct pipe_context *pipe, { struct brw_context *brw = brw_context(pipe); - brw->attribs.Texture[unit] = (struct brw_texture*)texture; /* ptr, not struct */ + pipe_reference_texture(pipe, + (struct pipe_texture **) &brw->attribs.Texture[unit], + texture); brw->state.dirty.brw |= BRW_NEW_TEXTURE; } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 043a2ff9a45..86ce3d0cc38 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -308,6 +308,7 @@ brw_texture_create(struct pipe_context *pipe, if (tex) { tex->base = *templat; + tex->base.refcount = 1; if (brw_miptree_layout(pipe, tex)) tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index b70d4fea85a..a50cee76486 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -68,7 +68,7 @@ struct softpipe_context { struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; - struct softpipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index cf1b1eff75c..2f40e09d5c2 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -142,7 +142,7 @@ static void shade_begin(struct quad_stage *qs) /* set TGSI sampler state that varies */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { qss->samplers[i].state = softpipe->sampler[i]; - qss->samplers[i].texture = &softpipe->texture[i]->base; + qss->samplers[i].texture = softpipe->texture[i]; } /* find output slots for depth, color */ diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 9246915e19e..18669a1c6ef 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -30,6 +30,7 @@ */ #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "draw/draw_context.h" @@ -82,7 +83,7 @@ softpipe_set_sampler_texture(struct pipe_context *pipe, draw_flush(softpipe->draw); assert(unit < PIPE_MAX_SAMPLERS); - softpipe->texture[unit] = softpipe_texture(texture); /* ptr, not struct */ + pipe_texture_reference(pipe, &softpipe->texture[unit], texture); sp_tile_cache_set_texture(pipe, softpipe->tex_cache[unit], texture); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 295704c05f8..6ba0f09e0a1 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -89,6 +89,7 @@ softpipe_texture_create(struct pipe_context *pipe, return NULL; spt->base = *templat; + spt->base.refcount = 1; softpipe_texture_layout(spt); @@ -100,6 +101,8 @@ softpipe_texture_create(struct pipe_context *pipe, return NULL; } + assert(spt->base.refcount == 1); + return &spt->base; } @@ -136,7 +139,7 @@ softpipe_texture_update(struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context(pipe); uint unit; for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (softpipe->texture[unit] == softpipe_texture(texture)) { + if (softpipe->texture[unit] == texture) { sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); } } diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index b86f416c9b4..ad284170e44 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -76,7 +76,7 @@ st_texture_create(struct st_context *st, GLuint depth0, GLuint compress_byte) { - struct pipe_texture pt; + struct pipe_texture pt, *newtex; assert(target <= PIPE_TEXTURE_CUBE); @@ -95,9 +95,12 @@ st_texture_create(struct st_context *st, pt.depth[0] = depth0; pt.compressed = compress_byte ? 1 : 0; pt.cpp = pt.compressed ? compress_byte : st_sizeof_format(format); - pt.refcount = 1; - return st->pipe->texture_create(st->pipe, &pt); + newtex = st->pipe->texture_create(st->pipe, &pt); + + assert(!newtex || newtex->refcount == 1); + + return newtex; } -- cgit v1.2.3 From e523ef72044d7f8137a298d60597b8913bae9145 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 15:13:33 -0700 Subject: cell: use pipe_texture_reference() --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 35e88f79b12..95bfc29fbeb 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -242,7 +242,9 @@ cell_set_sampler_texture(struct pipe_context *pipe, draw_flush(cell->draw); - cell->texture[sampler] = cell_texture(texture); + pipe_texture_reference(pipe, + (struct pipe_texture **) &cell->texture[sampler], + texture); cell_update_texture_mapping(cell); -- cgit v1.2.3 From d3b7d26b0bab6587cfad64735aefa28d8377c358 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 17:57:40 -0700 Subject: gallium: s/pipe_reference_texture/pipe_texture_reference/ --- src/gallium/drivers/i965simple/brw_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index f269b2882ca..254e3f7245e 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -327,7 +327,7 @@ static void brw_set_sampler_texture(struct pipe_context *pipe, { struct brw_context *brw = brw_context(pipe); - pipe_reference_texture(pipe, + pipe_texture_reference(pipe, (struct pipe_texture **) &brw->attribs.Texture[unit], texture); -- cgit v1.2.3 From 8be9bc08e1da31619f1b1c49aa6280d44f94c442 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 20 Feb 2008 18:00:03 -0700 Subject: gallium: include p_inlines.h --- src/gallium/drivers/i965simple/brw_state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 254e3f7245e..2fc048bde00 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -32,6 +32,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_dump.h" -- cgit v1.2.3 From 7c74037852a484a8a50e8bc540b954a624de4d33 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Wed, 20 Feb 2008 14:32:25 -0800 Subject: Cell: Initial pass at unified data cache --- src/gallium/drivers/cell/common.h | 8 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 6 +- src/gallium/drivers/cell/ppu/cell_flush.c | 14 ++++ src/gallium/drivers/cell/spu/spu_dcache.c | 100 ++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_dcache.h | 34 ++++++++ src/gallium/drivers/cell/spu/spu_exec.c | 49 ++++++------ src/gallium/drivers/cell/spu/spu_main.c | 8 ++ src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 70 +---------------- 8 files changed, 194 insertions(+), 95 deletions(-) create mode 100644 src/gallium/drivers/cell/spu/spu_dcache.c create mode 100644 src/gallium/drivers/cell/spu/spu_dcache.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index cf892206c66..f32ad5bfbe6 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -93,6 +93,7 @@ #define CELL_CMD_STATE_BLEND 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 #define CELL_CMD_VS_EXECUTE 21 +#define CELL_CMD_FLUSH_BUFFER_RANGE 22 #define CELL_NUM_BUFFERS 4 @@ -144,6 +145,13 @@ struct cell_attribute_fetch_code { uint size; }; + +struct cell_buffer_range { + uint64_t base; + unsigned size; +}; + + struct cell_shader_info { uint64_t declarations; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index f12613649b9..cbd387f0142 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -49,9 +49,12 @@ cell_map_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].size) + if (sp->constants[i].size) { sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, sp->mapped_constants[i], + sp->constants[i].buffer->size); + } } draw_set_mapped_constant_buffer(sp->draw, @@ -124,6 +127,7 @@ cell_draw_elements(struct pipe_context *pipe, void *buf = pipe->winsys->buffer_map(pipe->winsys, sp->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); draw_set_mapped_vertex_buffer(draw, i, buf); } } diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 20f27531fce..66a5627d844 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -82,3 +82,17 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags) flushing = FALSE; } + + +void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size) +{ + uint64_t batch[1 + (ROUNDUP8(sizeof(struct cell_buffer_range)) / 8)]; + struct cell_buffer_range *br = (struct cell_buffer_range *) & batch[1]; + + batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; + br->base = (uintptr_t) ptr; + br->size = size; + cell_batch_append(cell, batch, sizeof(batch)); +} diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c new file mode 100644 index 00000000000..9e30e178804 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -0,0 +1,100 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "spu_main.h" +#include "spu_dcache.h" + +#define CACHE_NAME data +#define CACHED_TYPE qword +#define CACHE_TYPE CACHE_TYPE_RO +#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER +#define CACHE_LOG2NNWAY 2 +#define CACHE_LOG2NSETS 6 +#include <cache-api.h> + +/* Yes folks, this is ugly. + */ +#undef CACHE_NWAY +#undef CACHE_NSETS +#define CACHE_NAME data +#define CACHE_NWAY 4 +#define CACHE_NSETS (1U << 6) + + +/** + * Fetch between arbitrary number of bytes from an unaligned address + */ +void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) +{ + const int shift = ea & 0x0f; + const unsigned aligned_start_ea = ea & ~0x0f; + const unsigned aligned_end_ea = (ea + size) & ~0x0f; + const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1; + unsigned i; + + + if (shift == 0) { + /* Data is already aligned. Fetch directly into the destination buffer. + */ + for (i = 0; i < num_entries; i++) { + dst[i] = cache_rd(data, (ea & ~0x0f) + (i * 16)); + } + } else { + qword tmp[2] ALIGN16_ATTRIB; + + + tmp[0] = cache_rd(data, (ea & ~0x0f)); + for (i = 0; i < (num_entries & ~1); i++) { + const unsigned curr = i & 1; + const unsigned next = curr ^ 1; + + tmp[next] = cache_rd(data, (ea & ~0x0f) + (next * 16)); + + dst[i] = si_or((qword) spu_slqwbyte(tmp[curr], shift), + (qword) spu_rlmaskqwbyte(tmp[next], shift - 16)); + } + + if (i < num_entries) { + dst[i] = si_or((qword) spu_slqwbyte(tmp[(i & 1)], shift), + si_il(0)); + } + } +} + + +void +spu_dcache_mark_dirty(unsigned ea, unsigned size) +{ + unsigned i; + + (void) ea; + (void) size; + + /* Invalidate the whole cache for now. + */ + for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { + CACHELINE_CLEARVALID(i); + } +} diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h new file mode 100644 index 00000000000..7a06b8c25af --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.h @@ -0,0 +1,34 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SPU_DCACHE_H +#define SPU_DCACHE_H + +extern void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); + +extern void +spu_dcache_mark_dirty(unsigned ea, unsigned size); + +#endif /* SPU_DCACHE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 0eb5ea1a3f3..94ac6a28850 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -72,6 +72,7 @@ #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" +#include "spu_dcache.h" #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 @@ -352,19 +353,17 @@ fetch_src_file_channel( case TGSI_EXTSWIZZLE_W: switch( file ) { case TGSI_FILE_CONSTANT: { - unsigned char buffer[32] ALIGN16_ATTRIB; unsigned i; for (i = 0; i < 4; i++) { const float *ptr = mach->Consts[index->i[i]]; - const uint64_t addr = (uint64_t)(uintptr_t) ptr; - const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; + float tmp[4]; - mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); - wait_on_mask(1 << TAG_VERTEX_BUFFER); + spu_dcache_fetch_unaligned((qword *) tmp, + (uintptr_t)(ptr + swizzle), + sizeof(float)); - (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f) - + (sizeof(float) * swizzle)], sizeof(float)); + chan->f[i] = tmp[0]; } break; } @@ -1899,32 +1898,30 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { - uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB; - struct tgsi_full_declaration decl; - unsigned long decl_addr = (unsigned long) (mach->Declarations+i); - unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f); + union { + struct tgsi_full_declaration decl; + qword buffer[2 * ((sizeof(struct tgsi_full_declaration) + 31) + / 32)]; + } d ALIGN16_ATTRIB; + unsigned ea = (unsigned) (mach->Declarations + pc); - mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); - wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); - memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl)); - exec_declaration( mach, &decl ); + exec_declaration( mach, &d.decl ); } } /* execute instructions, until pc is set to -1 */ while (pc != -1) { - uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB; - struct tgsi_full_instruction inst; - unsigned long inst_addr = (unsigned long) (mach->Instructions + pc); - unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f); - - assert(pc < mach->NumInstructions); - mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); - wait_on_mask(1 << TAG_INSTRUCTION_FETCH); - - memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst)); - exec_instruction( mach, & inst, &pc ); + union { + struct tgsi_full_instruction inst; + qword buffer[2 * ((sizeof(struct tgsi_full_instruction) + 31) + / 32)]; + } i ALIGN16_ATTRIB; + unsigned ea = (unsigned) (mach->Instructions + pc); + + spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); + exec_instruction( mach, & i.inst, &pc ); } #if 0 diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index dbc3705c241..1136dba62d5 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -462,6 +462,14 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); break; } + case CELL_CMD_FLUSH_BUFFER_RANGE: { + struct cell_buffer_range *br = (struct cell_buffer_range *) + &buffer[pos+1]; + + spu_dcache_mark_dirty((unsigned) br->base, br->size); + pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8); + break; + } default: printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); ASSERT(0); diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index e5d9910ff30..f7e4e653e31 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -40,25 +40,7 @@ #include "spu_exec.h" #include "spu_vertex_shader.h" #include "spu_main.h" - -#define CACHE_NAME attribute -#define CACHED_TYPE qword -#define CACHE_TYPE CACHE_TYPE_RO -#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER -#define CACHE_LOG2NNWAY 2 -#define CACHE_LOG2NSETS 6 -#include <cache-api.h> - -/* Yes folks, this is ugly. - */ -#undef CACHE_NWAY -#undef CACHE_NSETS -#define CACHE_NAME attribute -#define CACHE_NWAY 4 -#define CACHE_NSETS (1U << 6) - - -#define DRAW_DBG 0 +#include "spu_dcache.h" typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); @@ -102,44 +84,6 @@ static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { }; -/** - * Fetch between 1 and 32 bytes from an unaligned address - */ -static INLINE void -fetch_unaligned(qword *dst, unsigned ea, unsigned size) -{ - qword tmp[4] ALIGN16_ATTRIB; - const int shift = ea & 0x0f; - const unsigned aligned_start_ea = ea & ~0x0f; - const unsigned aligned_end_ea = (ea + size) & ~0x0f; - const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1; - unsigned i; - - - if (shift == 0) { - /* Data is already aligned. Fetch directly into the destination buffer. - */ - for (i = 0; i < num_entries; i++) { - dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); - } - } else { - /* Fetch data from the cache to the local buffer. - */ - for (i = 0; i < num_entries; i++) { - tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); - } - - - /* Fix the alignment of the data and write to the destination buffer. - */ - for (i = 0; i < ((size + 15) / 16); i++) { - dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift), - (qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16)); - } - } -} - - /** * Fetch vertex attributes for 'count' vertices. */ @@ -182,7 +126,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, printf("SPU: fetching = 0x%llx\n", addr); #endif - fetch_unaligned(& in[idx], addr, bytes_per_entry); + spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry); idx += quads_per_entry; } @@ -200,15 +144,5 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, void spu_update_vertex_fetch( struct spu_vs_context *draw ) { - unsigned i; - - - /* Invalidate the vertex cache. - */ - for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { - CACHELINE_CLEARVALID(i); - } - - draw->vertex_fetch.fetch_func = generic_vertex_fetch; } -- cgit v1.2.3 From 2d1f086c12b6d64f5c3fb80474f26775aeb71370 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Wed, 20 Feb 2008 14:45:08 -0800 Subject: Cell: Fix off-by-one error in spu_dcache_fetch_unaligned An off-by-one error caused an extra qword to be fetched under certain alignment / size combinations. --- src/gallium/drivers/cell/spu/spu_dcache.c | 5 +++-- src/gallium/drivers/cell/spu/spu_exec.c | 7 +++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index 9e30e178804..68aa5c4ae8e 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "cell/common.h" #include "spu_main.h" #include "spu_dcache.h" @@ -50,8 +51,8 @@ spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) { const int shift = ea & 0x0f; const unsigned aligned_start_ea = ea & ~0x0f; - const unsigned aligned_end_ea = (ea + size) & ~0x0f; - const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1; + const unsigned aligned_end_ea = ROUNDUP16(ea + size); + const unsigned num_entries = (aligned_end_ea - aligned_start_ea) / 16; unsigned i; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 94ac6a28850..cf81bee8fde 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -73,6 +73,7 @@ #include "spu_main.h" #include "spu_vertex_shader.h" #include "spu_dcache.h" +#include "cell/common.h" #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 @@ -1900,8 +1901,7 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) for (i = 0; i < mach->NumDeclarations; i++) { union { struct tgsi_full_declaration decl; - qword buffer[2 * ((sizeof(struct tgsi_full_declaration) + 31) - / 32)]; + qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; } d ALIGN16_ATTRIB; unsigned ea = (unsigned) (mach->Declarations + pc); @@ -1915,8 +1915,7 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) while (pc != -1) { union { struct tgsi_full_instruction inst; - qword buffer[2 * ((sizeof(struct tgsi_full_instruction) + 31) - / 32)]; + qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; } i ALIGN16_ATTRIB; unsigned ea = (unsigned) (mach->Instructions + pc); -- cgit v1.2.3 From e78fc9f2f4d89b0cae0d56d84dd16cb76a6757dc Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 21 Feb 2008 09:03:29 -0800 Subject: Cell: Initial scalar implementation of spu_dcache_mark_dirty --- src/gallium/drivers/cell/spu/spu_dcache.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index 68aa5c4ae8e..698a5790bb0 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -26,6 +26,10 @@ #include "spu_main.h" #include "spu_dcache.h" +#define CACHELINE_LOG2SIZE 7 +#define LINE_SIZE (1U << 7) +#define ALIGN_MASK (~(LINE_SIZE - 1)) + #define CACHE_NAME data #define CACHED_TYPE qword #define CACHE_TYPE CACHE_TYPE_RO @@ -60,7 +64,7 @@ spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) /* Data is already aligned. Fetch directly into the destination buffer. */ for (i = 0; i < num_entries; i++) { - dst[i] = cache_rd(data, (ea & ~0x0f) + (i * 16)); + dst[i] = cache_rd(data, ea + (i * 16)); } } else { qword tmp[2] ALIGN16_ATTRIB; @@ -85,17 +89,23 @@ spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) } +/** + * Notify the cache that a range of main memory may have been modified + */ void spu_dcache_mark_dirty(unsigned ea, unsigned size) { unsigned i; + const unsigned aligned_start = (ea & ALIGN_MASK); + const unsigned aligned_end = (ea + size + (LINE_SIZE - 1)) + & ALIGN_MASK; - (void) ea; - (void) size; - /* Invalidate the whole cache for now. - */ for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { - CACHELINE_CLEARVALID(i); + const unsigned entry = __cache_dir[i]; + const unsigned addr = entry & ~0x0f; + + __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end)) + ? (entry & ~CACHELINE_VALID) : entry; } } -- cgit v1.2.3 From 6dd47c264a8642a4e3dbe0b4fc194174743c64fc Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 21 Feb 2008 10:24:29 -0800 Subject: Cell: Add spu_dcache.c to Makefile. This was erroneously missing in previous commits. --- src/gallium/drivers/cell/spu/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 30ef2450ece..c071de1900b 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -18,6 +18,7 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o SOURCES = \ spu_main.c \ spu_blend.c \ + spu_dcache.c \ spu_render.c \ spu_texture.c \ spu_tile.c \ -- cgit v1.2.3 From de5c64e0af0b1a1ce3ee12f361341880dc260868 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 21 Feb 2008 10:32:02 -0800 Subject: Cell: Remove erroneous ALIGN16_ATTRIB attributes If a structure is marked as being aligned the SPE compiler performs extra optimizations (sadly, only -O2 is used) when reading the structure. Since most of the structures sent in batch buffers are only 8-byte aligned, this resulted in mysterous bugs with -O2. --- src/gallium/drivers/cell/common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index f32ad5bfbe6..9a4004535ea 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -137,7 +137,7 @@ struct cell_array_info uint pitch; /**< Byte pitch from one entry to the next. */ uint size; uint function_offset; -} ALIGN16_ATTRIB; +}; struct cell_attribute_fetch_code { @@ -162,7 +162,7 @@ struct cell_shader_info unsigned num_declarations; unsigned num_instructions; unsigned num_immediates; -} ALIGN16_ATTRIB; +}; #define SPU_VERTS_PER_BATCH 64 @@ -175,7 +175,7 @@ struct cell_command_vs float plane[12][4]; unsigned nr_planes; unsigned nr_attrs; -} ALIGN16_ATTRIB; +}; struct cell_command_render -- cgit v1.2.3 From 20fbcbf5801c28865c0bfab3cda45302c8474a66 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 21 Feb 2008 19:07:31 +0000 Subject: [PATCH] softpipe: unbreak sp_setup_pos_vector on non-x86 systems --- src/gallium/drivers/softpipe/sp_fs_exec.c | 35 +++++++++++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_fs_sse.c | 35 ------------------------------- 2 files changed, 35 insertions(+), 35 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 9ad30a76817..8cb0534342d 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -44,6 +44,41 @@ struct sp_exec_fragment_shader { +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * + * This should really be part of the compiled shader. + */ +void +sp_setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + quadpos->xyzw[0].f[1] = x + 1; + quadpos->xyzw[0].f[2] = x; + quadpos->xyzw[0].f[3] = x + 1; + + /* do Y */ + quadpos->xyzw[1].f[0] = y; + quadpos->xyzw[1].f[1] = y; + quadpos->xyzw[1].f[2] = y + 1; + quadpos->xyzw[1].f[3] = y + 1; + + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + static void exec_prepare( struct sp_fragment_shader *base, diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 53050b78230..8095d662ee6 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -63,41 +63,6 @@ struct sp_sse_fragment_shader { }; -/** - * Compute quad X,Y,Z,W for the four fragments in a quad. - * - * This should really be part of the compiled shader. - */ -void -sp_setup_pos_vector(const struct tgsi_interp_coef *coef, - float x, float y, - struct tgsi_exec_vector *quadpos) -{ - uint chan; - /* do X */ - quadpos->xyzw[0].f[0] = x; - quadpos->xyzw[0].f[1] = x + 1; - quadpos->xyzw[0].f[2] = x; - quadpos->xyzw[0].f[3] = x + 1; - - /* do Y */ - quadpos->xyzw[1].f[0] = y; - quadpos->xyzw[1].f[1] = y; - quadpos->xyzw[1].f[2] = y + 1; - quadpos->xyzw[1].f[3] = y + 1; - - /* do Z and W for all fragments in the quad */ - for (chan = 2; chan < 4; chan++) { - const float dadx = coef->dadx[chan]; - const float dady = coef->dady[chan]; - const float a0 = coef->a0[chan] + dadx * x + dady * y; - quadpos->xyzw[chan].f[0] = a0; - quadpos->xyzw[chan].f[1] = a0 + dadx; - quadpos->xyzw[chan].f[2] = a0 + dady; - quadpos->xyzw[chan].f[3] = a0 + dadx + dady; - } -} - static void fs_sse_prepare( struct sp_fragment_shader *base, -- cgit v1.2.3 From eb4dc2dd5ed62e6ccb55ccc2bc13f6a2f3fc1f76 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 21 Feb 2008 16:18:05 -0700 Subject: gallium: new AA point drawing stage AA points are drawn by converting the point to a quad, then modifying the user's fragment shader to compute a coverage value. The final fragment color's alpha is modulated by the coverage value. Fragments outside the point's radius are killed. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_aapoint.c | 875 +++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_context.c | 2 + src/gallium/auxiliary/draw/draw_context.h | 3 + src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_validate.c | 5 + src/gallium/drivers/softpipe/sp_context.c | 6 +- 7 files changed, 892 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/draw/draw_aapoint.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 1ee9eca0ca5..b7e71ed3ec7 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -5,6 +5,7 @@ LIBNAME = draw C_SOURCES = \ draw_aaline.c \ + draw_aapoint.c \ draw_clip.c \ draw_vs_exec.c \ draw_vs_sse.c \ diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c new file mode 100644 index 00000000000..43119cc70b4 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -0,0 +1,875 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA point stage: AA points are converted to quads and rendered with a + * special fragment shader. Another approach would be to use a texture + * map image of a point, but experiments indicate the quality isn't nearly + * as good as this approach. + * + * Note: this looks a lot like draw_aaline.c but there's actually little + * if any code that can be shared. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/* + * Enabling NORMALIZE might give _slightly_ better results. + * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or + * d=x*x+y*y. Since we're working with a unit circle, the later seems + * close enough and saves some costly instructions. + */ +#define NORMALIZE 0 + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aapoint_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; /**< the regular shader */ + void *aapoint_fs; /**< the aa point-augmented shader */ +}; + + +/** + * Subclass of draw_stage + */ +struct aapoint_stage +{ + struct draw_stage stage; + + int psize_slot; + float radius; + + /** this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + /* + * Currently bound state + */ + struct aapoint_fragment_shader *fs; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxInput, maxGeneric; /**< max input index found */ + int tmp0, colorTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for two free temp regs and available input reg for new texcoords. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + struct tgsi_full_instruction newInst; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + const int texInput = aactx->maxInput + 1; + int tmp0; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->tmp0 < 0) + aactx->tmp0 = i; + else if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else + break; + } + } + + assert(aactx->colorTemp != aactx->tmp0); + + tmp0 = aactx->tmp0; + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = texInput; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = tmp0; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + + + /* + * Emit code to compute fragment coverage, kill if outside point radius + * + * Temp reg0 usage: + * t0.x = distance of fragment from center point + * t0.y = boolean, is t0.x > 1 ? + * t0.z = temporary for computing 1/(1-k) value + * t0.w = final coverage value + */ + + /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + + /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ADD; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + +#if NORMALIZE /* OPTIONAL normalization of length */ + /* RSQ t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); +#endif + + /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + /* KILP -t0.yyyy; # if b, KILL */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + + /* SGT t0.y, t0.x, tex.z; # bool b = distance > k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* IF t0.y # if b then */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_IF; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + + { + /* compute coverage factor = (1-d)/(1-k) */ + + /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.z, t0.z; # t0.z = 1 / m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SUB t0.x, 1, t0.x; # d = 1 - d */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + ctx->emit_instruction(ctx, &newInst); + + /* MUL t0.w, t0.x, t0.z; # coverage = d * m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + } + + /* ELSE */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ELSE; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + + { + /* MOV t0.w, tex.w; # coverage = 1.0 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + } + + /* ENDIF */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ENDIF; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + /* add alpha modulation code at tail of program */ + + /* MOV result.color.xyz, colorTemp; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL result.color.w, colorTemp, tmp0.w; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + } + + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aapoint_fs(struct aapoint_stage *aapoint) +{ + const struct pipe_shader_state *orig_fs = &aapoint->fs->state; + struct draw_context *draw = aapoint->stage.draw; + struct pipe_shader_state aapoint_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aapoint_fs = *orig_fs; /* copy to init */ + aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.tmp0 = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aapoint_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aapoint_fs.tokens, 0); +#endif + + aapoint_fs.input_semantic_name[aapoint_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; + aapoint_fs.input_semantic_index[aapoint_fs.num_inputs] = transform.maxGeneric + 1; + aapoint_fs.num_inputs++; + + aapoint->fs->aapoint_fs + = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + + /* advertise the extra post-transform vertex attributes which will have + * the texcoords. + */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) +{ + if (!aapoint->fs->aapoint_fs) { + generate_aapoint_fs(aapoint); + } + aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); +} + + + +static INLINE struct aapoint_stage * +aapoint_stage( struct draw_stage *stage ) +{ + return (struct aapoint_stage *) stage; +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw an AA point by drawing a quad. + */ +static void +aapoint_point(struct draw_stage *stage, struct prim_header *header) +{ + const struct aapoint_stage *aapoint = aapoint_stage(stage); + struct prim_header tri; + struct vertex_header *v[4]; + uint texPos = aapoint->tex_slot; + float radius, *pos, *tex; + uint i; + float k; + + if (aapoint->psize_slot >= 0) { + radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0]; + } + else { + radius = aapoint->radius; + } + + /* + * Note: the texcoords (generic attrib, really) we use are special: + * The S and T components simply vary from -1 to +1. + * The R component is k, below. + * The Q component is 1.0 and will used as a handy constant in the + * fragment shader. + */ + + /* + * k is the threshold distance from the point's center at which + * we begin alpha attenuation (the coverage value). + * Operating within a unit circle, we'll compute the fragment's + * distance 'd' from the center point using the texcoords. + * IF d > 1.0 THEN + * KILL fragment + * ELSE IF d > k THEN + * compute coverage in [0,1] proportional to d in [k, 1]. + * ELSE + * coverage = 1.0; // full coverage + * ENDIF + */ + +#if !NORMALIZE + k = 1.0 / radius; + k = 1.0 - 2.0 * k + k * k; +#else + k = 1.0 - 1.0 / radius; +#endif + + /* allocate/dup new verts */ + for (i = 0; i < 4; i++) { + v[i] = dup_vert(stage, header->v[0], i); + } + + /* new verts */ + pos = v[0]->data[0]; + pos[0] -= radius; + pos[1] -= radius; + + pos = v[1]->data[0]; + pos[0] += radius; + pos[1] -= radius; + + pos = v[2]->data[0]; + pos[0] += radius; + pos[1] += radius; + + pos = v[3]->data[0]; + pos[0] -= radius; + pos[1] += radius; + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, -1, -1, k, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 1, -1, k, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, 1, 1, k, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, -1, 1, k, 1); + + /* emit 2 tris for the quad strip */ + tri.v[0] = v[0]; + tri.v[1] = v[1]; + tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[0]; + tri.v[1] = v[2]; + tri.v[2] = v[3]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aapoint_first_point(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aapoint_stage *aapoint = aapoint_stage(stage); + struct draw_context *draw = stage->draw; + + assert(draw->rasterizer->point_smooth); + + if (draw->rasterizer->point_size <= 2.0) + aapoint->radius = 1.0; + else + aapoint->radius = 0.5f * draw->rasterizer->point_size; + + aapoint->tex_slot = draw->num_vs_outputs; + assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ + draw->extra_vp_outputs.slot = aapoint->tex_slot; + + /* + * Bind our fragprog. + */ + bind_aapoint_fragment_shader(aapoint); + + /* find psize slot in post-transform vertex */ + aapoint->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->state->num_outputs; i++) { + if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + aapoint->psize_slot = i; + break; + } + } + } + + /* now really draw first line */ + stage->point = aapoint_point; + stage->point(stage, header); +} + + +static void +aapoint_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aapoint_stage *aapoint = aapoint_stage(stage); + struct pipe_context *pipe = aapoint->pipe; + + stage->point = aapoint_first_point; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aapoint_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aapoint_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aapoint_stage * +draw_aapoint_stage(struct draw_context *draw) +{ + struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + + draw_alloc_temp_verts( &aapoint->stage, 4 ); + + aapoint->stage.draw = draw; + aapoint->stage.next = NULL; + aapoint->stage.point = aapoint_first_point; + aapoint->stage.line = passthrough_line; + aapoint->stage.tri = passthrough_tri; + aapoint->stage.flush = aapoint_flush; + aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; + aapoint->stage.destroy = aapoint_destroy; + + return aapoint; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a aapoint_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct aapoint_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_aa_pipe_context(struct pipe_context *pipe, struct aapoint_stage *aa) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = aa; + NumContexts++; +} + +static struct aapoint_stage * +aapoint_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aapoint_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); + } + + return aafs; +} + + +static void +aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* save current */ + aapoint->fs = aafs; + /* pass-through */ + aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs); +} + + +static void +aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* pass-through */ + aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs); + FREE(aafs); +} + + +/** + * Called by drivers that want to install this AA point prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA points. + */ +void +draw_install_aapoint_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct aapoint_stage *aapoint; + + /* + * Create / install AA point drawing / prim stage + */ + aapoint = draw_aapoint_stage( draw ); + assert(aapoint); + draw->pipeline.aapoint = &aapoint->stage; + + aapoint->pipe = pipe; + + /* save original driver functions */ + aapoint->driver_create_fs_state = pipe->create_fs_state; + aapoint->driver_bind_fs_state = pipe->bind_fs_state; + aapoint->driver_delete_fs_state = pipe->delete_fs_state; + + /* override the driver's functions */ + pipe->create_fs_state = aapoint_create_fs_state; + pipe->bind_fs_state = aapoint_bind_fs_state; + pipe->delete_fs_state = aapoint_delete_fs_state; + + add_aa_pipe_context(pipe, aapoint); +} diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index b90a9f474de..c28e78d33a3 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -105,6 +105,8 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.validate->destroy( draw->pipeline.validate ); if (draw->pipeline.aaline) draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 82035aa8ada..43b58bc0563 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -99,6 +99,9 @@ boolean draw_use_sse(struct draw_context *draw); void draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); +void +draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); + int draw_find_vs_output(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 492c152ff98..8c469e74b73 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -180,6 +180,7 @@ struct draw_context struct draw_stage *offset; struct draw_stage *unfilled; struct draw_stage *stipple; + struct draw_stage *aapoint; struct draw_stage *aaline; struct draw_stage *wide; struct draw_stage *rasterize; diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 97e40e372b8..5167ef1e754 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -63,6 +63,11 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.aaline; } + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) { + draw->pipeline.aapoint->next = next; + next = draw->pipeline.aapoint; + } + if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines && !draw->rasterizer->line_smooth) || (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 316020cba63..de4c6c18e7f 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -328,8 +328,12 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_set_rasterize_stage(softpipe->draw, softpipe->setup); } - /* enable aaline stage */ + /* plug in AA line/point stages */ draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + + /* sp_prim_setup can do wide points (don't convert to quads) */ + draw_convert_wide_points(softpipe->draw, FALSE); sp_init_surface_functions(softpipe); -- cgit v1.2.3 From 446bfc32a83008e0865ec869bc80b920c907f10f Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 21 Feb 2008 16:56:32 -0700 Subject: gallium: new draw stage for polygon stipple. For hardware without native polygon stipple. Create a 32x32 alpha texture that encodes the stipple pattern. Modify the user's fragment program to sample the texture (with gl_FragCoord) and kill the fragment according to the texel value. Temporarily enabled in softpipe driver, replacing the sp_quad_stipple.c step. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_context.h | 3 + src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_pstipple.c | 717 +++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_validate.c | 6 + src/gallium/drivers/softpipe/sp_context.c | 5 + src/gallium/drivers/softpipe/sp_context.h | 7 + src/gallium/drivers/softpipe/sp_quad.c | 2 + 8 files changed, 742 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_pstipple.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index b7e71ed3ec7..c9980f0b835 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -16,6 +16,7 @@ C_SOURCES = \ draw_flatshade.c \ draw_offset.c \ draw_prim.c \ + draw_pstipple.c \ draw_stipple.c \ draw_twoside.c \ draw_unfilled.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 43b58bc0563..c25301f71df 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -102,6 +102,9 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); void draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); +void +draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); + int draw_find_vs_output(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 8c469e74b73..6abced139ba 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -182,6 +182,7 @@ struct draw_context struct draw_stage *stipple; struct draw_stage *aapoint; struct draw_stage *aaline; + struct draw_stage *pstipple; struct draw_stage *wide; struct draw_stage *rasterize; } pipeline; diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c new file mode 100644 index 00000000000..4048abf8567 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -0,0 +1,717 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Polygon stipple stage: implement polygon stipple with texture map and + * fragment program. The fragment program samples the texture and does + * a fragment kill for the stipple-failing fragments. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct pstip_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *pstip_fs; +}; + + +/** + * Subclass of draw_stage + */ +struct pstip_stage +{ + struct draw_stage stage; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + /* + * Currently bound state + */ + struct pstip_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct pipe_poly_stipple *stipple; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + void (*driver_set_polygon_stipple)(struct pipe_context *, + const struct pipe_poly_stipple *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct pstip_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int wincoordInput; + int maxInput; + int maxSampler; /**< max sampler index found */ + int texTemp; /**< temp registers */ + int numImmed; + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +pstip_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if ((int) decl->u.DeclarationRange.Last > pctx->maxSampler) + pctx->maxSampler = (int) decl->u.DeclarationRange.Last; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + pctx->maxInput = MAX2(pctx->maxInput, decl->u.DeclarationRange.Last); + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + pctx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +static void +pstip_transform_immed(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *immed) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + pctx->numImmed++; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +pstip_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (pctx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction newInst; + uint i; + int wincoordInput; + const int sampler = pctx->maxSampler + 1; + + if (pctx->wincoordInput < 0) + wincoordInput = pctx->maxInput + 1; + else + wincoordInput = pctx->wincoordInput; + + /* find one free temp reg */ + for (i = 0; i < 32; i++) { + if ((pctx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (pctx->texTemp < 0) + pctx->texTemp = i; + else + break; + } + } + assert(pctx->texTemp >= 0); + + if (pctx->wincoordInput < 0) { + /* declare new position input reg */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = wincoordInput; + ctx->emit_declaration(ctx, &decl); + } + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = sampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = pctx->texTemp; + ctx->emit_declaration(ctx, &decl); + + /* emit immediate = {1/32, 1/32, 1, 1} + * The index/position of this immediate will be pctx->numImmed + */ + { + static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; + struct tgsi_full_immediate immed; + uint size = 4; + immed = tgsi_default_full_immediate(); + immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; + ctx->emit_immediate(ctx, &immed); + } + + pctx->firstInstruction = FALSE; + + + /* + * Insert new MUL/TEX/KILP instructions at start of program + * Take gl_FragCoord, divide by 32 (stipple size), sample the + * texture and kill fragment if needed. + * + * We'd like to use non-normalized texcoords to index into a RECT + * texture, but we can only use GL_REPEAT wrap mode with normalized + * texcoords. Darn. + */ + + /* MUL texTemp, INPUT[wincoord], 1/32; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + ctx->emit_instruction(ctx, &newInst); + + /* TEX texTemp, texTemp, sampler; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = sampler; + ctx->emit_instruction(ctx, &newInst); + + /* KILP texTemp; # if texTemp < 0, KILL fragment */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + } + + /* emit this instruction */ + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for doing polygon stipple. + * This will be the user's shader prefixed with a TEX and KIL instruction. + */ +static void +generate_pstip_fs(struct pstip_stage *pstip) +{ + const struct pipe_shader_state *orig_fs = &pstip->fs->state; + /*struct draw_context *draw = pstip->stage.draw;*/ + struct pipe_shader_state pstip_fs; + struct pstip_transform_context transform; + +#define MAX 1000 + + pstip_fs = *orig_fs; /* copy to init */ + pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.wincoordInput = -1; + transform.maxInput = -1; + transform.maxSampler = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = pstip_transform_inst; + transform.base.transform_declaration = pstip_transform_decl; + transform.base.transform_immediate = pstip_transform_immed; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) pstip_fs.tokens, + MAX, &transform.base); + +#if 1 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(pstip_fs.tokens, 0); +#endif + + pstip->sampler_unit = transform.maxSampler + 1; + + if (transform.wincoordInput < 0) { + pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; + pstip_fs.input_semantic_index[pstip_fs.num_inputs] = transform.maxInput; + pstip_fs.num_inputs++; + } + + pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); +} + + +/** + * Load texture image with current stipple pattern. + */ +static void +pstip_update_texture(struct pstip_stage *pstip) +{ + static const uint bit31 = 1 << 31; + struct pipe_context *pipe = pstip->pipe; + struct pipe_surface *surface; + const uint *stipple = pstip->state.stipple->stipple; + uint i, j; + ubyte *data; + + surface = pipe->get_tex_surface(pipe, pstip->texture, 0, 0, 0); + data = pipe_surface_map(surface); + + /* + * Load alpha texture. + * Note: 0 means keep the fragment, 255 means kill it. + * We'll negate the texel value and use KILP which kills if value + * is negative. + */ + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + if (stipple[i] & (bit31 >> j)) { + /* fragment "on" */ + data[i * surface->pitch + j] = 0; + } + else { + /* fragment "off" */ + data[i * surface->pitch + j] = 255; + } + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pstip->texture); +} + + +/** + * Create the texture map we'll use for stippling. + */ +static void +pstip_create_texture(struct pstip_stage *pstip) +{ + struct pipe_context *pipe = pstip->pipe; + struct pipe_texture texTemp; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width[0] = 32; + texTemp.height[0] = 32; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + pstip->texture = pipe->texture_create(pipe, &texTemp); + + //pstip_update_texture(pstip); +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +pstip_create_sampler(struct pstip_stage *pstip) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = pstip->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = 0.0f; + + pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_pstip_fragment_shader(struct pstip_stage *pstip) +{ + if (!pstip->fs->pstip_fs) { + generate_pstip_fs(pstip); + } + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); +} + + + +static INLINE struct pstip_stage * +pstip_stage( struct draw_stage *stage ) +{ + return (struct pstip_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + +static void +pstip_first_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct pstip_stage *pstip = pstip_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = pstip->pipe; + + assert(draw->rasterizer->poly_stipple_enable); + + /* + * Bind our fragprog, sampler and texture + */ + bind_pstip_fragment_shader(pstip); + + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + + /* now really draw first line */ + stage->tri = passthrough_tri; + stage->tri(stage, header); +} + + +static void +pstip_flush(struct draw_stage *stage, unsigned flags) +{ + /*struct draw_context *draw = stage->draw;*/ + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + + stage->tri = pstip_first_tri; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, + pstip->state.sampler[pstip->sampler_unit]); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, + pstip->state.texture[pstip->sampler_unit]); +} + + +static void +pstip_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +pstip_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct pstip_stage * +draw_pstip_stage(struct draw_context *draw) +{ + struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); + + draw_alloc_temp_verts( &pstip->stage, 8 ); + + pstip->stage.draw = draw; + pstip->stage.next = NULL; + pstip->stage.point = passthrough_point; + pstip->stage.line = passthrough_line; + pstip->stage.tri = pstip_first_tri; + pstip->stage.flush = pstip_flush; + pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; + pstip->stage.destroy = pstip_destroy; + + return pstip; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a pstip_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct pstip_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_pstip_pipe_context(struct pipe_context *pipe, struct pstip_stage *pstip) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = pstip; + NumContexts++; +} + +static struct pstip_stage * +pstip_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +pstip_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + } + + return aafs; +} + + +static void +pstip_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* save current */ + pstip->fs = aafs; + /* pass-through */ + pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); +} + + +static void +pstip_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* pass-through */ + pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +pstip_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.sampler[unit] = sampler; + /* pass-through */ + pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); +} + + +static void +pstip_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.texture[sampler] = texture; + /* pass-through */ + pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); +} + + +static void +pstip_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.stipple = stipple; + /* pass-through */ + pstip->driver_set_polygon_stipple(pstip->pipe, stipple); + + pstip_update_texture(pstip); +} + + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_pstipple_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct pstip_stage *pstip; + + /* + * Create / install AA line drawing / prim stage + */ + pstip = draw_pstip_stage( draw ); + assert(pstip); + draw->pipeline.pstipple = &pstip->stage; + + pstip->pipe = pipe; + + /* create special texture, sampler state */ + pstip_create_texture(pstip); + pstip_create_sampler(pstip); + + /* save original driver functions */ + pstip->driver_create_fs_state = pipe->create_fs_state; + pstip->driver_bind_fs_state = pipe->bind_fs_state; + pstip->driver_delete_fs_state = pipe->delete_fs_state; + + pstip->driver_bind_sampler_state = pipe->bind_sampler_state; + pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; + + /* override the driver's functions */ + pipe->create_fs_state = pstip_create_fs_state; + pipe->bind_fs_state = pstip_bind_fs_state; + pipe->delete_fs_state = pstip_delete_fs_state; + + pipe->bind_sampler_state = pstip_bind_sampler_state; + pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->set_polygon_stipple = pstip_set_polygon_stipple; + + add_pstip_pipe_context(pipe, pstip); +} diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 5167ef1e754..efd6793f2bb 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -82,6 +82,12 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) precalc_flat = 1; /* only needed for lines really */ } + if (draw->rasterizer->poly_stipple_enable + && draw->pipeline.pstipple) { + draw->pipeline.pstipple->next = next; + next = draw->pipeline.pstipple; + } + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index de4c6c18e7f..2cdf3c75bf3 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -332,6 +332,11 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + /* sp_prim_setup can do wide points (don't convert to quads) */ draw_convert_wide_points(softpipe->draw, FALSE); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a50cee76486..feeafc70846 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -39,6 +39,13 @@ #include "sp_quad.h" +/** + * This is a temporary variable for testing draw-stage polygon stipple. + * If zero, do stipple in sp_quad_stipple.c + */ +#define USE_DRAW_STAGE_PSTIPPLE 1 + + struct softpipe_winsys; struct softpipe_vbuf_render; struct draw_context; diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 6bd468a51cf..15b5594547d 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -112,7 +112,9 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp_push_quad_first( sp, sp->quad.earlyz ); } +#if !USE_DRAW_STAGE_PSTIPPLE if (sp->rasterizer->poly_stipple_enable) { sp_push_quad_first( sp, sp->quad.polygon_stipple ); } +#endif } -- cgit v1.2.3 From 73e0e567dea3cf4e1591acb3e894eecef812f367 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <darktama@beleth.(none)> Date: Fri, 22 Feb 2008 12:36:48 +1100 Subject: nouveau: fix build --- configs/default | 2 +- src/gallium/drivers/nv40/nv40_miptree.c | 38 +++++++++++++++++++++++++++++++++ src/gallium/drivers/nv40/nv40_surface.c | 31 --------------------------- 3 files changed, 39 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/configs/default b/configs/default index 48ddd29282b..af9ff6f4261 100644 --- a/configs/default +++ b/configs/default @@ -70,7 +70,7 @@ PROGRAM_DIRS = demos redbook samples glsl xdemos # Gallium directories and GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi rtasm util GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) -GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple failover +GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple nv30 nv40 nv50 failover GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) GALLIUM_WINSYS_DIRS = xlib diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 92e6b3a43df..5e1c7ade31b 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -63,6 +63,7 @@ nv40_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) if (!mt) return NULL; mt->base = *pt; + mt->base.refcount = 1; nv40_miptree_layout(mt); mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, @@ -95,10 +96,47 @@ nv40_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) } } +static void +nv40_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) +{ +} + +static struct pipe_surface * +nv40_miptree_surface(struct pipe_context *pipe, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(ws, &ps->buffer, nv40mt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = nv40mt->level[level].pitch / ps->cpp; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv40mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv40mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv40mt->level[level].image_offset[0]; + } + + return ps; +} + void nv40_init_miptree_functions(struct nv40_context *nv40) { nv40->pipe.texture_create = nv40_miptree_create; nv40->pipe.texture_release = nv40_miptree_release; + nv40->pipe.texture_update = nv40_miptree_update; + nv40->pipe.get_tex_surface = nv40_miptree_surface; } diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c index 9726ab4e4dc..df5d7abdbfc 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -73,36 +73,6 @@ nv40_surface_format_supported(struct pipe_context *pipe, return FALSE; } -static struct pipe_surface * -nv40_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = pipe->winsys; - struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; - struct pipe_surface *ps; - - ps = ws->surface_alloc(ws); - if (!ps) - return NULL; - pipe_buffer_reference(ws, &ps->buffer, nv40mt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = nv40mt->level[level].pitch / ps->cpp; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset = nv40mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ps->offset = nv40mt->level[level].image_offset[zslice]; - } else { - ps->offset = nv40mt->level[level].image_offset[0]; - } - - return ps; -} - static void nv40_surface_copy(struct pipe_context *pipe, unsigned do_flip, struct pipe_surface *dest, unsigned destx, unsigned desty, @@ -131,7 +101,6 @@ void nv40_init_surface_functions(struct nv40_context *nv40) { nv40->pipe.is_format_supported = nv40_surface_format_supported; - nv40->pipe.get_tex_surface = nv40_get_tex_surface; nv40->pipe.surface_copy = nv40_surface_copy; nv40->pipe.surface_fill = nv40_surface_fill; } -- cgit v1.2.3 From 5b2ff28a2fd3bb0ca9df569edcaf80e8141ccaa1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <darktama@beleth.(none)> Date: Fri, 22 Feb 2008 13:32:51 +1100 Subject: nv40: rework fragment texture state --- src/gallium/drivers/nv40/nv40_context.h | 2 +- src/gallium/drivers/nv40/nv40_fragtex.c | 36 +++++++++++++++++++++--------- src/gallium/drivers/nv40/nv40_state.c | 2 ++ src/gallium/drivers/nv40/nv40_state_emit.c | 22 +++++++----------- 4 files changed, 36 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 69062a8a202..cbc798fbd6e 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -155,7 +155,6 @@ struct nv40_context { struct nv40_state state; unsigned fallback; - struct nouveau_stateobj *so_fragtex[16]; struct nouveau_stateobj *so_vtxbuf; struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; @@ -209,6 +208,7 @@ extern struct nv40_state_entry nv40_state_blend_colour; extern struct nv40_state_entry nv40_state_zsa; extern struct nv40_state_entry nv40_state_viewport; extern struct nv40_state_entry nv40_state_framebuffer; +extern struct nv40_state_entry nv40_state_fragtex; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 811f3098ba7..826d4a9478a 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -52,7 +52,7 @@ nv40_fragtex_format(uint pipe_format) } -static void +static struct nouveau_stateobj * nv40_fragtex_build(struct nv40_context *nv40, int unit) { struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; @@ -90,7 +90,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) break; default: NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; + return NULL; } if (swizzled) { @@ -117,15 +117,14 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) so_method(so, nv40->hw->curie, NV40TCL_TEX_SIZE1(unit), 1); so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); - so_emit(nv40->nvws, so); - so_ref (so, &nv40->so_fragtex[unit]); - so_ref (NULL, &so); + return so; } -void -nv40_fragtex_bind(struct nv40_context *nv40) +static boolean +nv40_fragtex_validate(struct nv40_context *nv40) { struct nv40_fragment_program *fp = nv40->pipe_state.fragprog; + struct nouveau_stateobj *so; unsigned samplers, unit; samplers = nv40->fp_samplers & ~fp->samplers; @@ -133,9 +132,12 @@ nv40_fragtex_bind(struct nv40_context *nv40) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so_ref(NULL, &nv40->so_fragtex[unit]); - BEGIN_RING(curie, NV40TCL_TEX_ENABLE(unit), 1); - OUT_RING (0); + so = so_new(2, 0); + so_method(so, nv40->hw->curie, NV40TCL_TEX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + so_ref(NULL, &so); + nv40->hw_dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); } samplers = nv40->dirty_samplers & fp->samplers; @@ -143,9 +145,21 @@ nv40_fragtex_bind(struct nv40_context *nv40) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - nv40_fragtex_build(nv40, unit); + so = nv40_fragtex_build(nv40, unit); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + so_ref(NULL, &so); + nv40->hw_dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); } nv40->fp_samplers = fp->samplers; + return FALSE; } +struct nv40_state_entry nv40_state_fragtex = { + .validate = nv40_fragtex_validate, + .dirty = { + .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 84818d6729e..74cbabb023e 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -259,6 +259,7 @@ nv40_sampler_state_bind(struct pipe_context *pipe, unsigned unit, nv40->tex_sampler[unit] = ps; nv40->dirty_samplers |= (1 << unit); + nv40->dirty |= NV40_NEW_SAMPLER; } static void @@ -275,6 +276,7 @@ nv40_set_sampler_texture(struct pipe_context *pipe, unsigned unit, nv40->tex_miptree[unit] = (struct nv40_miptree *)miptree; nv40->dirty_samplers |= (1 << unit); + nv40->dirty |= NV40_NEW_SAMPLER; } static void * diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index a9ca71c5e93..6d87b7b52b2 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -8,6 +8,7 @@ static struct nv40_state_entry *render_states[] = { &nv40_state_scissor, &nv40_state_stipple, &nv40_state_fragprog, + &nv40_state_fragtex, &nv40_state_vertprog, &nv40_state_blend, &nv40_state_blend_colour, @@ -35,6 +36,7 @@ nv40_state_validate(struct nv40_context *nv40) states++; } + nv40->dirty = 0; if (nv40->fallback & NV40_FALLBACK_TNL && !(last_fallback & NV40_FALLBACK_TNL)) { @@ -72,7 +74,8 @@ nv40_state_emit(struct nv40_context *nv40) for (i = 0; i < 16; i++) { if (!(nv40->fp_samplers & (1 << i))) continue; - so_emit_reloc_markers(nv40->nvws, nv40->so_fragtex[i]); + so_emit_reloc_markers(nv40->nvws, + nv40->state.hw[NV40_STATE_FRAGTEX0+i]); } so_emit_reloc_markers(nv40->nvws, nv40->state.hw[NV40_STATE_FRAGPROG]); } @@ -81,20 +84,11 @@ void nv40_emit_hw_state(struct nv40_context *nv40) { nv40_state_validate(nv40); - - if (nv40->dirty_samplers || (nv40->dirty & NV40_NEW_FRAGPROG)) { - nv40_fragtex_bind(nv40); - - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); - nv40->dirty &= ~NV40_NEW_FRAGPROG; - } - nv40_state_emit(nv40); - nv40->dirty_samplers = 0; - nv40->dirty = 0; + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (1); } -- cgit v1.2.3 From c2e36bdd1a58ba6f58c4e72db1f7f64e8bd05901 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <darktama@beleth.(none)> Date: Fri, 22 Feb 2008 13:55:18 +1100 Subject: nv40: move hw_dirty --- src/gallium/drivers/nv40/nv40_context.h | 5 ++--- src/gallium/drivers/nv40/nv40_fragtex.c | 9 +++++---- src/gallium/drivers/nv40/nv40_state_emit.c | 24 ++++++++++++------------ 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index cbc798fbd6e..c533b9e0ef9 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -115,7 +115,9 @@ struct nv40_blend_state { struct nv40_state { unsigned scissor_enabled; unsigned stipple_enabled; + unsigned fp_samplers; + unsigned dirty; struct nouveau_stateobj *hw[NV40_STATE_MAX]; }; @@ -129,13 +131,10 @@ struct nv40_context { int chipset; unsigned dirty; - unsigned hw_dirty; struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; unsigned dirty_samplers; - unsigned fp_samplers; - unsigned vp_samplers; struct { struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 826d4a9478a..3d27a9bf134 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -124,10 +124,11 @@ static boolean nv40_fragtex_validate(struct nv40_context *nv40) { struct nv40_fragment_program *fp = nv40->pipe_state.fragprog; + struct nv40_state *state = &nv40->state; struct nouveau_stateobj *so; unsigned samplers, unit; - samplers = nv40->fp_samplers & ~fp->samplers; + samplers = state->fp_samplers & ~fp->samplers; while (samplers) { unit = ffs(samplers) - 1; samplers &= ~(1 << unit); @@ -137,7 +138,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); so_ref(NULL, &so); - nv40->hw_dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); + state->dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); } samplers = nv40->dirty_samplers & fp->samplers; @@ -148,10 +149,10 @@ nv40_fragtex_validate(struct nv40_context *nv40) so = nv40_fragtex_build(nv40, unit); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); so_ref(NULL, &so); - nv40->hw_dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); + state->dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); } - nv40->fp_samplers = fp->samplers; + nv40->state.fp_samplers = fp->samplers; return FALSE; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 6d87b7b52b2..af09ed47d6e 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -31,7 +31,7 @@ nv40_state_validate(struct nv40_context *nv40) if (nv40->dirty & e->dirty.pipe) { if (e->validate(nv40)) - nv40->hw_dirty |= (1 << e->dirty.hw); + nv40->state.dirty |= (1 << e->dirty.hw); } states++; @@ -60,24 +60,24 @@ nv40_state_validate(struct nv40_context *nv40) static void nv40_state_emit(struct nv40_context *nv40) { - unsigned i; + struct nv40_state *state = &nv40->state; + unsigned i, samplers; - while (nv40->hw_dirty) { - unsigned idx = ffs(nv40->hw_dirty) - 1; - nv40->hw_dirty &= ~(1 << idx); + while (state->dirty) { + unsigned idx = ffs(state->dirty) - 1; - so_ref (nv40->state.hw[idx], &nv40->hw->state[idx]); + so_ref (state->hw[idx], &nv40->hw->state[idx]); so_emit(nv40->nvws, nv40->hw->state[idx]); + state->dirty &= ~(1 << idx); } - so_emit_reloc_markers(nv40->nvws, nv40->state.hw[NV40_STATE_FB]); - for (i = 0; i < 16; i++) { - if (!(nv40->fp_samplers & (1 << i))) - continue; + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { so_emit_reloc_markers(nv40->nvws, - nv40->state.hw[NV40_STATE_FRAGTEX0+i]); + state->hw[NV40_STATE_FRAGTEX0+i]); + samplers &= ~(1 << i); } - so_emit_reloc_markers(nv40->nvws, nv40->state.hw[NV40_STATE_FRAGPROG]); + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); } void -- cgit v1.2.3 From 7b938431d0ab5ccce1e7e2b1c38e1dcbdc6001e8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <darktama@beleth.(none)> Date: Fri, 22 Feb 2008 14:46:48 +1100 Subject: nv40: stateobj start out with 0 refcount --- src/gallium/drivers/nouveau/nouveau_stateobj.h | 2 +- src/gallium/drivers/nv40/nv40_fragprog.c | 1 - src/gallium/drivers/nv40/nv40_fragtex.c | 2 -- src/gallium/drivers/nv40/nv40_state.c | 12 ++++++------ src/gallium/drivers/nv40/nv40_state_blend.c | 1 - src/gallium/drivers/nv40/nv40_state_fb.c | 1 - src/gallium/drivers/nv40/nv40_state_scissor.c | 1 - src/gallium/drivers/nv40/nv40_state_stipple.c | 1 - src/gallium/drivers/nv40/nv40_state_viewport.c | 1 - src/gallium/drivers/nv40/nv40_vbo.c | 1 - src/gallium/drivers/nv40/nv40_vertprog.c | 1 - 11 files changed, 7 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 459cc7d77ae..439c7e4734a 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -32,7 +32,7 @@ so_new(unsigned push, unsigned reloc) struct nouveau_stateobj *so; so = MALLOC(sizeof(struct nouveau_stateobj)); - so->refcount = 1; + so->refcount = 0; so->push = MALLOC(sizeof(unsigned) * push); so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index db2613ef8bb..a4a1ea01e03 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -817,7 +817,6 @@ nv40_fragprog_validate(struct nv40_context *nv40) so_method(so, nv40->hw->curie, NV40TCL_FP_CONTROL, 1); so_data (so, fp->fp_control); so_ref(so, &fp->so); - so_ref(NULL, &so); update_constants: if (fp->nr_consts) { diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 3d27a9bf134..c8a8120f305 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -137,7 +137,6 @@ nv40_fragtex_validate(struct nv40_context *nv40) so_method(so, nv40->hw->curie, NV40TCL_TEX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); state->dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); } @@ -148,7 +147,6 @@ nv40_fragtex_validate(struct nv40_context *nv40) so = nv40_fragtex_build(nv40, unit); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); state->dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); } diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 74cbabb023e..107e60f1798 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -11,7 +11,7 @@ nv40_blend_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nouveau_grobj *curie = nv40->hw->curie; - struct nv40_blend_state *bso = MALLOC(sizeof(*bso)); + struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); struct nouveau_stateobj *so = so_new(16, 0); if (cso->blend_enable) { @@ -47,7 +47,7 @@ nv40_blend_state_create(struct pipe_context *pipe, so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); so_data (so, cso->dither ? 1 : 0); - bso->so = so; + so_ref(so, &bso->so); bso->pipe = *cso; return (void *)bso; } @@ -284,7 +284,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_rasterizer_state *rsso = MALLOC(sizeof(*rsso)); + struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); struct nouveau_stateobj *so = so_new(32, 0); struct nouveau_grobj *curie = nv40->hw->curie; @@ -389,7 +389,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, so_data(so, 0); } - rsso->so = so; + so_ref(so, &rsso->so); rsso->pipe = *cso; return (void *)rsso; } @@ -417,7 +417,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *cso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_zsa_state *zsaso = MALLOC(sizeof(*zsaso)); + struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); struct nouveau_stateobj *so = so_new(32, 0); so_method(so, nv40->hw->curie, NV40TCL_DEPTH_FUNC, 3); @@ -460,7 +460,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - zsaso->so = so; + so_ref(so, &zsaso->so); zsaso->pipe = *cso; return (void *)zsaso; } diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c index b12f8b03dd8..81b927a67a9 100644 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -28,7 +28,6 @@ nv40_state_blend_colour_validate(struct nv40_context *nv40) (float_to_ubyte(bcol->color[2]) << 0))); so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); - so_ref(NULL, &so); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index d3032f1be5a..c3bf4d43a33 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -143,7 +143,6 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) so_data (so, ((h - 1) << 16) | 0); so_ref(so, &nv40->state.hw[NV40_STATE_FB]); - so_ref(NULL, &so); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index 2871fa2516d..ee797094d39 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -22,7 +22,6 @@ nv40_state_scissor_validate(struct nv40_context *nv40) } so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); - so_ref(NULL, &so); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index bd163582a39..aad4d179ac4 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -27,7 +27,6 @@ nv40_state_stipple_validate(struct nv40_context *nv40) } so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); - so_ref(NULL, &so); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 79fcc31a8bf..68820d31339 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -17,7 +17,6 @@ nv40_state_viewport_validate(struct nv40_context *nv40) so_data (so, fui(vpt->scale[3])); so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); - so_ref(NULL, &so); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 3bfcb264db1..5abe4c9af10 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -159,7 +159,6 @@ nv40_vbo_arrays_update(struct nv40_context *nv40, struct pipe_buffer *ib, so_emit(nv40->nvws, vtxfmt); so_emit(nv40->nvws, vtxbuf); so_ref (vtxbuf, &nv40->so_vtxbuf); - so_ref (NULL, &vtxbuf); so_ref (NULL, &vtxfmt); } diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 8a2d2336974..c482964adc0 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -678,7 +678,6 @@ check_gpu_resources: so_data (so, vp->ir); so_data (so, vp->or); so_ref(so, &vp->so); - so_ref(NULL, &so); upload_code = TRUE; } -- cgit v1.2.3 From 69a7c9739bc0f11e66e11ab410d813fa69fe5fc9 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Wed, 20 Feb 2008 22:05:52 +0100 Subject: gallium: Silence compiler warnings on Windows. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 43d5085895f..0ced585c7f3 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -343,7 +343,7 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) switch (wrapMode) { case PIPE_TEX_WRAP_CLAMP: i = ifloor(s); - return CLAMP(i, 0, size-1); + return CLAMP(i, 0, (int) size-1); case PIPE_TEX_WRAP_CLAMP_TO_EDGE: /* fall-through */ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: @@ -366,7 +366,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, switch (wrapMode) { case PIPE_TEX_WRAP_CLAMP: /* Not exactly what the spec says, but it matches NVIDIA output */ - s = CLAMP(s - 0.5F, 0.0, (float) size - 1.0); + s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f); *i0 = ifloor(s); *i1 = *i0 + 1; break; @@ -377,7 +377,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, s -= 0.5F; *i0 = ifloor(s); *i1 = *i0 + 1; - if (*i1 > size - 1) + if (*i1 > (int) size - 1) *i1 = size - 1; break; default: -- cgit v1.2.3 From e0de82fbcb181f5c3e372ed66b692970a9e80766 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Wed, 20 Feb 2008 22:25:18 +0100 Subject: gallium: Fix build on Windows. --- src/gallium/drivers/softpipe/sp_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 6ba0f09e0a1..3d26e8ca669 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -139,7 +139,7 @@ softpipe_texture_update(struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context(pipe); uint unit; for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (softpipe->texture[unit] == texture) { + if (&softpipe->texture[unit]->base == texture) { sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); } } -- cgit v1.2.3 From b4d050ffccf46e5e7b40d8a2f3868cc73fcf110e Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 22 Feb 2008 08:50:37 -0700 Subject: cell: fix build: s/dest/vertex/ --- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index f7ef72e5a2c..42cc47cbfea 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -123,7 +123,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) for (j = 0; j < n; j++) { vs->elts[j] = draw->vs.queue[i + j].elt; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; } for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { -- cgit v1.2.3 From 0bb53709e837ac709f31ace8852599e885f4d090 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 22 Feb 2008 08:52:24 -0700 Subject: cell: added function prototypes to silence warnings --- src/gallium/drivers/cell/ppu/cell_context.h | 2 ++ src/gallium/drivers/cell/ppu/cell_flush.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 3b687bb8689..1433a4925fa 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -134,6 +134,8 @@ extern void cell_vertex_shader_queue_flush(struct draw_context *draw); +/* XXX find a better home for this */ +extern void cell_update_vertex_fetch(struct draw_context *draw); #endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h index eda351b1cbc..7f940ae76b6 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -35,4 +35,8 @@ cell_flush(struct pipe_context *pipe, unsigned flags); extern void cell_flush_int(struct pipe_context *pipe, unsigned flags); +extern void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size); + #endif -- cgit v1.2.3 From 12c14c31b71d4bac494c4470a34e28ec66309254 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 22 Feb 2008 08:56:55 -0700 Subject: gallium: fix brokenb build --- src/gallium/drivers/softpipe/sp_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 3d26e8ca669..6ba0f09e0a1 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -139,7 +139,7 @@ softpipe_texture_update(struct pipe_context *pipe, struct softpipe_context *softpipe = softpipe_context(pipe); uint unit; for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (&softpipe->texture[unit]->base == texture) { + if (softpipe->texture[unit] == texture) { sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); } } -- cgit v1.2.3 From 0a5ed0667e6bd934a150bf7f784349fa7f595309 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 22 Feb 2008 17:18:27 +0100 Subject: nv30: wrong number of parameters --- src/gallium/drivers/nv30/nv30_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 53368561e07..db4dac24c74 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -609,7 +609,7 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, } if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - BEGIN_RING(rankine, NV34TCL_COLOR1_PITCH, 2); + BEGIN_RING(rankine, NV34TCL_COLOR1_PITCH, 1); OUT_RING (rt[1]->pitch * rt[1]->cpp); nv30->rt[1] = rt[1]->buffer; } -- cgit v1.2.3 From d8a9d850b9d63c7398d596fad2dfd2f05e55ef7d Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 22 Feb 2008 22:21:15 +0100 Subject: nv30: init zeta to NULL, use color pitch if no zeta --- src/gallium/drivers/nv30/nv30_state.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index db4dac24c74..aa3fe7837e4 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -537,7 +537,7 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nv30_context *nv30 = nv30_context(pipe); - struct pipe_surface *rt[4], *zeta; + struct pipe_surface *rt[4], *zeta = NULL; uint32_t rt_enable, rt_format, w, h; int i, colour_format = 0, zeta_format = 0; @@ -603,8 +603,15 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, } if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { + uint32_t pitch = rt[0]->pitch * rt[0]->cpp; + if (zeta) { + pitch |= (zeta->pitch * zeta->cpp)<<16; + } else { + pitch |= pitch<<16; + } + BEGIN_RING(rankine, NV34TCL_COLOR0_PITCH, 1); - OUT_RING ( (rt[0]->pitch * rt[0]->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); + OUT_RING ( pitch ); nv30->rt[0] = rt[0]->buffer; } -- cgit v1.2.3 From c74900ee5d80c7c2b7cbe4ed87395526a742a13e Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Fri, 22 Feb 2008 16:48:05 -0700 Subject: gallium/i915: overhaul of fragment shader compilation, constant/immediate allocation Before, fragment shaders were translated to i915 hw code at bind time, rather than create time. Now there's an i915_fragment_shader struct with the expected contents that's created by i915_create_fs_state(). Translation to i915 code takes place there too. Immediates are handled correctly now. During program translation we keep track of which constant buffer slots are free (i.e. not referenced by the shader). Then the TGSI immediates and ancillary immediates (introduced for SIN/COS/etc) are put into those free slots. When it's time to upload the constant buffer, use the fp->constant_flags[] array to determine if we should grab an immediate from the shader, or a user-defined parameter from the gallium constant buffer. --- src/gallium/drivers/i915simple/i915_context.h | 42 ++++++- src/gallium/drivers/i915simple/i915_fpc.h | 23 ++-- src/gallium/drivers/i915simple/i915_fpc_emit.c | 101 +++++----------- .../drivers/i915simple/i915_fpc_translate.c | 132 +++++++++++++-------- src/gallium/drivers/i915simple/i915_state.c | 36 ++++-- .../drivers/i915simple/i915_state_derived.c | 3 +- src/gallium/drivers/i915simple/i915_state_emit.c | 39 ++++-- 7 files changed, 226 insertions(+), 150 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 2d876925b2c..5cc38cdc851 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -79,6 +79,40 @@ #define I915_MAX_CONSTANT 32 +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + struct i915_cache_context; @@ -93,11 +127,6 @@ struct i915_state float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; /** number of constants passed in through a constant buffer */ uint num_user_constants[PIPE_SHADER_TYPES]; - /** user constants, plus extra constants from shader translation */ - uint num_constants[PIPE_SHADER_TYPES]; - - uint *program; - uint program_len; /* texture sampler state */ unsigned sampler[I915_TEX_UNITS][3]; @@ -187,7 +216,8 @@ struct i915_context const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; const struct i915_depth_stencil_state *depth_stencil; const struct i915_rasterizer_state *rasterizer; - const struct pipe_shader_state *fs; + + struct i915_fragment_shader *fs; struct pipe_blend_color blend_color; struct pipe_clip_state clip; diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h index 8c7b68aefb5..250dfe6dbf0 100644 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -44,9 +44,16 @@ * Program translation state */ struct i915_fp_compile { - const struct pipe_shader_state *shader; + struct i915_fragment_shader *shader; /* the shader we're compiling */ - struct vertex_info *vertex_info; + boolean used_constants[I915_MAX_CONSTANT]; + + /** maps TGSI immediate index to constant slot */ + uint num_immediates; + uint immediates_map[I915_MAX_CONSTANT]; + float immediates[I915_MAX_CONSTANT][4]; + + boolean first_instruction; uint declarations[I915_PROGRAM_SIZE]; uint program[I915_PROGRAM_SIZE]; @@ -57,11 +64,6 @@ struct i915_fp_compile { uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - /** points into the i915->current.constants array: */ - float (*constants)[4]; - uint num_constants; - uint constant_flags[I915_MAX_CONSTANT]; /**< status of each constant */ - uint *csr; /**< Cursor, points into program. */ uint *decl; /**< Cursor, points into declarations. */ @@ -155,7 +157,9 @@ swizzle(int reg, uint x, uint y, uint z, uint w) /*********************************************************************** * Public interface for the compiler */ -extern void i915_translate_fragment_program( struct i915_context *i915 ); +extern void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs); @@ -206,8 +210,5 @@ extern void i915_disassemble_program(const uint * program, uint sz); extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...); -extern void -i915_translate_fragment_program(struct i915_context *i915); - #endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c index 74924ff0a1d..a59ee234037 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -61,8 +61,6 @@ (REG_NR_MASK << UREG_NR_SHIFT)) -#define I915_CONSTFLAG_PARAM 0x1f - uint i915_get_temp(struct i915_fp_compile *p) { @@ -235,6 +233,7 @@ uint i915_emit_texld( struct i915_fp_compile *p, uint i915_emit_const1f(struct i915_fp_compile * p, float c0) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg, idx; if (c0 == 0.0) @@ -243,15 +242,15 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0) return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) continue; for (idx = 0; idx < 4; idx++) { - if (!(p->constant_flags[reg] & (1 << idx)) || - p->constants[reg][idx] == c0) { - p->constants[reg][idx] = c0; - p->constant_flags[reg] |= 1 << idx; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + if (!(ifs->constant_flags[reg] & (1 << idx)) || + ifs->constants[reg][idx] == c0) { + ifs->constants[reg][idx] = c0; + ifs->constant_flags[reg] |= 1 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); } } @@ -264,6 +263,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0) uint i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg, idx; if (c0 == 0.0) @@ -277,16 +277,16 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == 0xf || - p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + if (ifs->constant_flags[reg] == 0xf || + ifs->constant_flags[reg] == I915_CONSTFLAG_USER) continue; for (idx = 0; idx < 3; idx++) { - if (!(p->constant_flags[reg] & (3 << idx))) { - p->constants[reg][idx + 0] = c0; - p->constants[reg][idx + 1] = c1; - p->constant_flags[reg] |= 3 << idx; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + if (!(ifs->constant_flags[reg] & (3 << idx))) { + ifs->constants[reg][idx + 0] = c0; + ifs->constants[reg][idx + 1] = c1; + ifs->constant_flags[reg] |= 3 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); } } @@ -302,25 +302,26 @@ uint i915_emit_const4f(struct i915_fp_compile * p, float c0, float c1, float c2, float c3) { + struct i915_fragment_shader *ifs = p->shader; unsigned reg; for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (p->constant_flags[reg] == 0xf && - p->constants[reg][0] == c0 && - p->constants[reg][1] == c1 && - p->constants[reg][2] == c2 && - p->constants[reg][3] == c3) { + if (ifs->constant_flags[reg] == 0xf && + ifs->constants[reg][0] == c0 && + ifs->constants[reg][1] == c1 && + ifs->constants[reg][2] == c2 && + ifs->constants[reg][3] == c3) { return UREG(REG_TYPE_CONST, reg); } - else if (p->constant_flags[reg] == 0) { - - p->constants[reg][0] = c0; - p->constants[reg][1] = c1; - p->constants[reg][2] = c2; - p->constants[reg][3] = c3; - p->constant_flags[reg] = 0xf; - if (reg + 1 > p->num_constants) - p->num_constants = reg + 1; + else if (ifs->constant_flags[reg] == 0) { + + ifs->constants[reg][0] = c0; + ifs->constants[reg][1] = c1; + ifs->constants[reg][2] = c2; + ifs->constants[reg][3] = c3; + ifs->constant_flags[reg] = 0xf; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; return UREG(REG_TYPE_CONST, reg); } } @@ -335,41 +336,3 @@ i915_emit_const4fv(struct i915_fp_compile * p, const float * c) { return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); } - - -#if 00000/*UNUSED*/ -/* Reserve a slot in the constant file for a Mesa state parameter. - * These will later need to be tracked on statechanges, but that is - * done elsewhere. - */ -uint -i915_emit_param4fv(struct i915_fp_compile * p, const float * values) -{ - struct i915_fragment_program *fp = p->fp; - int i; - - for (i = 0; i < fp->nr_params; i++) { - if (fp->param[i].values == values) - return UREG(REG_TYPE_CONST, fp->param[i].reg); - } - - if (p->constants->nr_constants == I915_MAX_CONSTANT || - fp->nr_params == I915_MAX_CONSTANT) { - i915_program_error(p, "i915_emit_param4fv: out of constants\n"); - return 0; - } - - { - int reg = p->constants->nr_constants++; - int i = fp->nr_params++; - - assert (p->constant_flags[reg] == 0); - p->constant_flags[reg] = I915_CONSTFLAG_PARAM; - - fp->param[i].values = values; - fp->param[i].reg = reg; - - return UREG(REG_TYPE_CONST, reg); - } -} -#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 6c1524c768e..afe7e25dce7 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -34,6 +34,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_dump.h" #include "draw/draw_vertex.h" @@ -97,19 +98,19 @@ negate(int reg, int x, int y, int z, int w) } +/** + * In the event of a translation failure, we'll generate a simple color + * pass-through program. + */ static void -i915_use_passthrough_shader(struct i915_context *i915) +i915_use_passthrough_shader(struct i915_fragment_shader *fs) { - debug_printf("**** Using i915 pass-through fragment shader\n"); - - i915->current.program = (uint *) MALLOC(sizeof(passthrough)); - if (i915->current.program) { - memcpy(i915->current.program, passthrough, sizeof(passthrough)); - i915->current.program_len = Elements(passthrough); + fs->program = (uint *) MALLOC(sizeof(passthrough)); + if (fs->program) { + memcpy(fs->program, passthrough, sizeof(passthrough)); + fs->program_len = Elements(passthrough); } - - i915->current.num_constants[PIPE_SHADER_FRAGMENT] = 0; - i915->current.num_user_constants[PIPE_SHADER_FRAGMENT] = 0; + fs->num_constants = 0; } @@ -161,9 +162,6 @@ src_vector(struct i915_fp_compile *p, * We also use a texture coordinate to pass wpos when possible. */ - /* use vertex format info to map a slot number to a VF attrib */ - assert(index < p->vertex_info->num_attribs); - sem_name = p->input_semantic_name[index]; sem_ind = p->input_semantic_index[index]; @@ -201,7 +199,8 @@ src_vector(struct i915_fp_compile *p, break; case TGSI_FILE_IMMEDIATE: - /* XXX unfinished - need to append immediates onto const buffer */ + assert(index < p->num_immediates); + index = p->immediates_map[index]; /* fall-through */ case TGSI_FILE_CONSTANT: src = UREG(REG_TYPE_CONST, index); @@ -896,6 +895,7 @@ static void i915_translate_instructions(struct i915_fp_compile *p, const struct tgsi_token *tokens) { + struct i915_fragment_shader *ifs = p->shader; struct tgsi_parse_context parse; tgsi_parse_init( &parse, tokens ); @@ -928,13 +928,54 @@ i915_translate_instructions(struct i915_fp_compile *p, p->output_semantic_name[ind] = sem; p->output_semantic_index[ind] = semi; } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); + } + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX append the immediate to the const buffer... */ + { + const struct tgsi_full_immediate *imm + = &parse.FullToken.FullImmediate; + const uint pos = p->num_immediates++; + uint j; + for (j = 0; j < imm->Immediate.Size; j++) { + p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; + } + } break; case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; + } + } + } + + p->first_instruction = FALSE; + } + i915_translate_instruction(p, &parse.FullToken.FullInstruction); break; @@ -950,27 +991,28 @@ i915_translate_instructions(struct i915_fp_compile *p, static struct i915_fp_compile * i915_init_compile(struct i915_context *i915, - const struct pipe_shader_state *fs) + struct i915_fragment_shader *ifs) { struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); - p->shader = i915->fs; + p->shader = ifs; - p->vertex_info = &i915->current.vertex_info; - - /* new constants found during translation get appended after the - * user-provided constants. + /* Put new constants at end of const buffer, growing downward. + * The problem is we don't know how many user-defined constants might + * be specified with pipe->set_constant_buffer(). + * Should pre-scan the user's program to determine the highest-numbered + * constant referenced. */ - p->constants = i915->current.constants[PIPE_SHADER_FRAGMENT]; - p->num_constants = i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]; + ifs->num_constants = 0; + memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + + p->first_instruction = TRUE; p->nr_tex_indirect = 1; /* correct? */ p->nr_tex_insn = 0; p->nr_alu_insn = 0; p->nr_decl_insn = 0; - memset(p->constant_flags, 0, sizeof(p->constant_flags)); - p->csr = p->program; p->decl = p->declarations; p->decl_s = 0; @@ -993,6 +1035,7 @@ i915_init_compile(struct i915_context *i915, static void i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) { + struct i915_fragment_shader *ifs = p->shader; unsigned long program_size = (unsigned long) (p->csr - p->program); unsigned long decl_size = (unsigned long) (p->decl - p->declarations); @@ -1008,19 +1051,13 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) if (p->nr_decl_insn > I915_MAX_DECL_INSN) i915_program_error(p, "Exceeded max DECL instructions"); - /* free old program, if present */ - if (i915->current.program) { - FREE(i915->current.program); - i915->current.program_len = 0; - } - if (p->error) { p->NumNativeInstructions = 0; p->NumNativeAluInstructions = 0; p->NumNativeTexInstructions = 0; p->NumNativeTexIndirections = 0; - i915_use_passthrough_shader(i915); + i915_use_passthrough_shader(ifs); } else { p->NumNativeInstructions @@ -1034,24 +1071,20 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) /* Copy compilation results to fragment program struct: */ - i915->current.program + assert(!ifs->program); + ifs->program = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); - if (i915->current.program) { - i915->current.program_len = program_size + decl_size; + if (ifs->program) { + ifs->program_len = program_size + decl_size; - memcpy(i915->current.program, + memcpy(ifs->program, p->declarations, decl_size * sizeof(uint)); - memcpy(i915->current.program + decl_size, + memcpy(ifs->program + decl_size, p->program, program_size * sizeof(uint)); } - - /* update number of constants */ - i915->current.num_constants[PIPE_SHADER_FRAGMENT] = p->num_constants; - assert(i915->current.num_constants[PIPE_SHADER_FRAGMENT] - >= i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]); } /* Release the compilation struct: @@ -1085,7 +1118,7 @@ i915_find_wpos_space(struct i915_fp_compile *p) i915_program_error(p, "No free texcoord for wpos value"); } #else - if (p->shader->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + if (p->shader->state.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { /* frag shader using the fragment position input */ #if 0 assert(0); @@ -1106,7 +1139,7 @@ static void i915_fixup_depth_write(struct i915_fp_compile *p) { /* XXX assuming pos/depth is always in output[0] */ - if (p->shader->output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + if (p->shader->state.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { const uint depth = UREG(REG_TYPE_OD, 0); i915_emit_arith(p, @@ -1121,13 +1154,18 @@ i915_fixup_depth_write(struct i915_fp_compile *p) void -i915_translate_fragment_program( struct i915_context *i915 ) +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs) { - struct i915_fp_compile *p = i915_init_compile(i915, i915->fs); - const struct tgsi_token *tokens = i915->fs->tokens; + struct i915_fp_compile *p = i915_init_compile(i915, fs); + const struct tgsi_token *tokens = fs->state.tokens; i915_find_wpos_space(p); +#if 0 + tgsi_dump(tokens, 0); +#endif + i915_translate_instructions(p, tokens); i915_fixup_depth_write(p); diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index e055eed7e02..e4288d4e319 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -38,6 +38,7 @@ #include "i915_reg.h" #include "i915_state.h" #include "i915_state_inlines.h" +#include "i915_fpc.h" /* The i915 (and related graphics cores) do not support GL_CLAMP. The @@ -416,26 +417,47 @@ static void i915_set_polygon_stipple( struct pipe_context *pipe, } -static void * i915_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) + +static void * +i915_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) { - return 0; + struct i915_context *i915 = i915_context(pipe); + struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader); + if (!ifs) + return NULL; + + ifs->state = *templ; + + /* The shader's compiled to i915 instructions here */ + i915_translate_fragment_program(i915, ifs); + + return ifs; } -static void i915_bind_fs_state(struct pipe_context *pipe, void *fs) +static void +i915_bind_fs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); - i915->fs = (struct pipe_shader_state *)fs; + i915->fs = (struct i915_fragment_shader*) shader; i915->dirty |= I915_NEW_FS; } -static void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +static +void i915_delete_fs_state(struct pipe_context *pipe, void *shader) { - /*do nothing*/ + struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; + + if (ifs->program) + FREE(ifs->program); + ifs->program_len = 0; + + FREE(ifs); } + static void * i915_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 4767584fc60..f654f543cc6 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -43,7 +43,7 @@ */ static void calculate_vertex_layout( struct i915_context *i915 ) { - const struct pipe_shader_state *fs = i915->fs; + const struct pipe_shader_state *fs = &i915->fs->state; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; uint front0 = 0, back0 = 0, front1 = 0, back1 = 0; @@ -164,7 +164,6 @@ void i915_update_derived( struct i915_context *i915 ) i915_update_dynamic( i915 ); if (i915->dirty & I915_NEW_FS) { - i915_translate_fragment_program(i915); i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ } diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 3339287f498..6bbaac4e34c 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -99,7 +99,11 @@ i915_emit_hardware_state(struct i915_context *i915 ) 2 + I915_TEX_UNITS*3 + 2 + I915_TEX_UNITS*3 + 2 + I915_MAX_CONSTANT*4 + +#if 0 i915->current.program_len + +#else + i915->fs->program_len + +#endif 6 ) * 3/2; /* plus 50% margin */ const unsigned relocs = ( I915_TEX_UNITS + @@ -325,15 +329,34 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_PROGRAM) { - const uint nr = i915->current.num_constants[PIPE_SHADER_FRAGMENT]; - assert(nr <= I915_MAX_CONSTANT); - if (nr > 0) { - const uint *c - = (const uint *) i915->current.constants[PIPE_SHADER_FRAGMENT]; + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { uint i; + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif OUT_BATCH(*c++); OUT_BATCH(*c++); OUT_BATCH(*c++); @@ -348,9 +371,9 @@ i915_emit_hardware_state(struct i915_context *i915 ) { uint i; /* we should always have, at least, a pass-through program */ - assert(i915->current.program_len > 0); - for (i = 0; i < i915->current.program_len; i++) { - OUT_BATCH(i915->current.program[i]); + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); } } -- cgit v1.2.3 From aec315f05f860337bccfce827a7c1c80960dd476 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 22 Feb 2008 18:34:31 -0700 Subject: gallium: fix a state validation bug found w/ pointblast.c --- src/gallium/drivers/softpipe/sp_prim_setup.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 7b1e131ee14..b6a3fddb29c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -1165,6 +1165,10 @@ static void setup_begin( struct draw_stage *stage ) struct softpipe_context *sp = setup->softpipe; const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + if (sp->dirty) { + softpipe_update_derived(sp); + } + setup->quad.nr_attrs = fs->num_inputs; sp->quad.first->begin(sp->quad.first); -- cgit v1.2.3 From e8de5c70e3370e9112a5facc870075eea60c4c46 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 23 Feb 2008 14:14:54 +0900 Subject: Bring in several forgotten MSVC fixes. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 4 ++-- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2 +- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 4 +++- src/gallium/drivers/softpipe/sp_fs_exec.c | 4 ++-- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/include/pipe/p_compiler.h | 8 ++++++++ 7 files changed, 18 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 97beb5f72a9..f5b5f4052f8 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -166,6 +166,7 @@ static INLINE void pb_destroy(struct pb_buffer *buf) { assert(buf); + assert(buf->vtbl); buf->vtbl->destroy(buf); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index ff4fd123f36..66256f3fa70 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -192,8 +192,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, } /* Some sanity checks */ - assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size); - assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size); + assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size); + assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size); _glthread_UNLOCK_MUTEX(mm->mutex); return SUPER(mm_buf); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index d7b18dc9c59..ac524414001 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -2455,7 +2455,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { - assert(pc < mach->NumInstructions); + assert(pc < (int) mach->NumInstructions); exec_instruction( mach, mach->Instructions + pc, &pc ); } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index b5c54847e0b..ff74e6117c4 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -299,7 +299,8 @@ static const char *TGSI_SEMANTICS[] = "SEMANTIC_BCOLOR", "SEMANTIC_FOG", "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC," + "SEMANTIC_GENERIC", + "SEMANTIC_NORMAL" }; static const char *TGSI_SEMANTICS_SHORT[] = @@ -310,6 +311,7 @@ static const char *TGSI_SEMANTICS_SHORT[] = "FOG", "PSIZE", "GENERIC", + "NORMAL" }; static const char *TGSI_IMMS[] = diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 8cb0534342d..d5bd7a702f1 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -81,7 +81,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef, static void -exec_prepare( struct sp_fragment_shader *base, +exec_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers ) { @@ -98,7 +98,7 @@ exec_prepare( struct sp_fragment_shader *base, * interface: */ static unsigned -exec_run( struct sp_fragment_shader *base, +exec_run( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 22da4714533..34b2b7d4e24 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -146,7 +146,7 @@ shade_quad_llvm(struct quad_stage *qs, unsigned -run_llvm_fs( struct sp_fragment_shader *base, +run_llvm_fs( const struct sp_fragment_shader *base, struct foo *machine ) { } diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index ab527f2afe3..91f3d2ac2d9 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -42,6 +42,14 @@ #endif +#if defined(__MSC__) + +/* Avoid 'expression is always true' warning */ +#pragma warning(disable: 4296) + +#endif /* __MSC__ */ + + typedef unsigned int uint; typedef unsigned char ubyte; typedef unsigned char boolean; -- cgit v1.2.3 From e9bb63c8e20361597463b2f7f88d84fe2770c8b9 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sun, 24 Feb 2008 02:16:28 +0900 Subject: gallium: MSVC fixes. --- src/gallium/auxiliary/draw/draw_aapoint.c | 6 +++--- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 2 ++ src/gallium/drivers/softpipe/sp_state.h | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 43119cc70b4..27a81f3e90c 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -616,10 +616,10 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) */ #if !NORMALIZE - k = 1.0 / radius; - k = 1.0 - 2.0 * k + k * k; + k = 1.0f / radius; + k = 1.0f - 2.0f * k + k * k; #else - k = 1.0 - 1.0 / radius; + k = 1.0f - 1.0f / radius; #endif /* allocate/dup new verts */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index e2ee72ed1fa..6e217eb2e06 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -285,7 +285,9 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) /* Wait on outstanding fences */ while (fenced_list->numDelayed) { _glthread_UNLOCK_MUTEX(fenced_list->mutex); +#ifndef __MSC__ sched_yield(); +#endif _fenced_buffer_list_check_free(fenced_list, 1); _glthread_LOCK_MUTEX(fenced_list->mutex); } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index ef8cf67d4c3..6ff31e601ff 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -63,14 +63,14 @@ struct tgsi_exec_machine; struct sp_fragment_shader { struct pipe_shader_state shader; - void (*prepare)( struct sp_fragment_shader *shader, + void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers); /* Run the shader - this interface will get cleaned up in the * future: */ - unsigned (*run)( struct sp_fragment_shader *shader, + unsigned (*run)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct quad_header *quad ); -- cgit v1.2.3 From 012391357fcbefd2b34e999eed91a129d5efd77c Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Sat, 23 Feb 2008 16:17:17 -0700 Subject: gallium: disable early Z test if fragment shader contains KIL instruction. Use tgsi_scan_shader() to determine if the fragment shader uses KIL or writes fragment.z --- src/gallium/drivers/softpipe/sp_quad.c | 5 +++-- src/gallium/drivers/softpipe/sp_state.h | 3 +++ src/gallium/drivers/softpipe/sp_state_fs.c | 22 +++++++++++++--------- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 15b5594547d..142dbcc7710 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -56,11 +56,12 @@ sp_build_depth_stencil( void sp_build_quad_pipeline(struct softpipe_context *sp) { - boolean early_depth_test = + boolean early_depth_test = sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - sp->fs->shader.output_semantic_name[0] != TGSI_SEMANTIC_POSITION; + !sp->fs->uses_kill && + !sp->fs->writes_z; /* build up the pipeline in reverse order... */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 6ff31e601ff..5aaa9e346bc 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -63,6 +63,9 @@ struct tgsi_exec_machine; struct sp_fragment_shader { struct pipe_shader_state shader; + boolean uses_kill; + boolean writes_z; + void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b0238f81737..b184ac61bb9 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -36,6 +36,7 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_scan.h" void * @@ -44,21 +45,24 @@ softpipe_create_fs_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state; + struct tgsi_shader_info info; + + tgsi_scan_shader(templ->tokens, &info); if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); state = softpipe_create_fs_llvm( softpipe, templ ); - if (state) - return state; - - state = softpipe_create_fs_sse( softpipe, templ ); - if (state) - return state; - - state = softpipe_create_fs_exec( softpipe, templ ); - + if (!state) { + state = softpipe_create_fs_sse( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_exec( softpipe, templ ); + } + } assert(state); + state->uses_kill = (info.opcode_count[TGSI_OPCODE_KIL] || + info.opcode_count[TGSI_OPCODE_KILP]); + state->writes_z = info.writes_z; return state; } -- cgit v1.2.3 From fdcb9260eea8f9b9deaeeade2a46cffbf3dcaa59 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sun, 24 Feb 2008 17:58:05 +0900 Subject: Add new files. --- src/gallium/auxiliary/draw/SConscript | 3 +++ src/gallium/auxiliary/tgsi/SConscript | 2 ++ src/gallium/drivers/softpipe/SConscript | 3 +++ 3 files changed, 8 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 8e3a8caa74c..3302dc44f78 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -3,6 +3,8 @@ Import('*') draw = env.ConvenienceLibrary( target = 'draw', source = [ + 'draw_aaline.c', + 'draw_aapoint.c', 'draw_clip.c', 'draw_vs_exec.c', 'draw_vs_sse.c', @@ -13,6 +15,7 @@ draw = env.ConvenienceLibrary( 'draw_flatshade.c', 'draw_offset.c', 'draw_prim.c', + 'draw_pstipple.c', 'draw_stipple.c', 'draw_twoside.c', 'draw_unfilled.c', diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 8464bfe9444..4632dcc0728 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -8,6 +8,8 @@ tgsi = env.ConvenienceLibrary( 'util/tgsi_build.c', 'util/tgsi_dump.c', 'util/tgsi_parse.c', + 'util/tgsi_scan.c', + 'util/tgsi_transform.c', 'util/tgsi_util.c', ]) diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index d581ee8d3ca..4c1a6d5df0b 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -5,6 +5,9 @@ env = env.Clone() softpipe = env.ConvenienceLibrary( target = 'softpipe', source = [ + 'sp_fs_exec.c', + 'sp_fs_sse.c', + 'sp_fs_llvm.c', 'sp_clear.c', 'sp_context.c', 'sp_draw_arrays.c', -- cgit v1.2.3 From 14de997d5df48512c751c627ab19d486691f591d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <darktama@beleth.(none)> Date: Mon, 25 Feb 2008 12:26:48 +1100 Subject: nv40: dump meaningful names for surface formats --- src/gallium/drivers/nv40/nv40_fragtex.c | 3 +++ src/gallium/drivers/nv40/nv40_vbo.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index c8a8120f305..7adee8858d0 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -41,6 +41,7 @@ static struct nv40_texture_format * nv40_fragtex_format(uint pipe_format) { struct nv40_texture_format *tf = nv40_texture_formats; + char fs[128]; while (tf->defined) { if (tf->pipe == pipe_format) @@ -48,6 +49,8 @@ nv40_fragtex_format(uint pipe_format) tf++; } + pf_sprint_name(fs, pipe_format); + NOUVEAU_ERR("unknown texture format %s\n", fs); return NULL; } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 5abe4c9af10..753c2fe64b9 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -30,9 +30,13 @@ nv40_vbo_type(uint format) case PIPE_FORMAT_TYPE_UNORM: return NV40TCL_VTXFMT_TYPE_UBYTE; default: - NOUVEAU_ERR("Unknown format 0x%08x\n", format); + { + char fs[128]; + pf_sprint_name(fs, format); + NOUVEAU_ERR("Unknown format %s\n", fs); return NV40TCL_VTXFMT_TYPE_FLOAT; } + } } static boolean -- cgit v1.2.3 From 4058a9012764ce3bc7b90d03c4d79d020540f8e4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 25 Feb 2008 13:29:16 +1100 Subject: nv40: construct vbo state the same way as the rest --- src/gallium/drivers/nv40/nv40_context.h | 11 +- src/gallium/drivers/nv40/nv40_fragtex.c | 4 +- src/gallium/drivers/nv40/nv40_state.c | 2 - src/gallium/drivers/nv40/nv40_state_emit.c | 10 +- src/gallium/drivers/nv40/nv40_vbo.c | 246 ++++++++++++++--------------- 5 files changed, 137 insertions(+), 136 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index c533b9e0ef9..110d9d7ab70 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -54,7 +54,9 @@ enum nv40_state_index { NV40_STATE_VERTTEX1 = 28, NV40_STATE_VERTTEX2 = 29, NV40_STATE_VERTTEX3 = 30, - NV40_STATE_MAX = 31 + NV40_STATE_VTXBUF = 31, + NV40_STATE_VTXFMT = 32, + NV40_STATE_MAX = 33 }; #define NV40_NEW_BLEND (1 << 0) @@ -117,7 +119,7 @@ struct nv40_state { unsigned stipple_enabled; unsigned fp_samplers; - unsigned dirty; + uint64_t dirty; struct nouveau_stateobj *hw[NV40_STATE_MAX]; }; @@ -149,13 +151,13 @@ struct nv40_context { struct pipe_blend_color blend_colour; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; } pipe_state; struct nv40_state state; unsigned fallback; - struct nouveau_stateobj *so_vtxbuf; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; }; @@ -208,6 +210,7 @@ extern struct nv40_state_entry nv40_state_zsa; extern struct nv40_state_entry nv40_state_viewport; extern struct nv40_state_entry nv40_state_framebuffer; extern struct nv40_state_entry nv40_state_fragtex; +extern struct nv40_state_entry nv40_state_vbo; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 7adee8858d0..ed47d707b2f 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -140,7 +140,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) so_method(so, nv40->hw->curie, NV40TCL_TEX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); - state->dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); } samplers = nv40->dirty_samplers & fp->samplers; @@ -150,7 +150,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) so = nv40_fragtex_build(nv40, unit); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); - state->dirty |= (1 << (NV40_STATE_FRAGTEX0 + unit)); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); } nv40->state.fp_samplers = fp->samplers; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 107e60f1798..e6f2754dc51 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -628,7 +628,6 @@ nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index, struct nv40_context *nv40 = nv40_context(pipe); nv40->vtxbuf[index] = *vb; - nv40->dirty |= NV40_NEW_ARRAYS; } @@ -639,7 +638,6 @@ nv40_set_vertex_element(struct pipe_context *pipe, unsigned index, struct nv40_context *nv40 = nv40_context(pipe); nv40->vtxelt[index] = *ve; - nv40->dirty |= NV40_NEW_ARRAYS; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index af09ed47d6e..bb2ce0f7221 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -14,6 +14,7 @@ static struct nv40_state_entry *render_states[] = { &nv40_state_blend_colour, &nv40_state_zsa, &nv40_state_viewport, + &nv40_state_vbo, NULL }; @@ -31,7 +32,7 @@ nv40_state_validate(struct nv40_context *nv40) if (nv40->dirty & e->dirty.pipe) { if (e->validate(nv40)) - nv40->state.dirty |= (1 << e->dirty.hw); + nv40->state.dirty |= (1ULL << e->dirty.hw); } states++; @@ -64,20 +65,21 @@ nv40_state_emit(struct nv40_context *nv40) unsigned i, samplers; while (state->dirty) { - unsigned idx = ffs(state->dirty) - 1; + unsigned idx = ffsll(state->dirty) - 1; so_ref (state->hw[idx], &nv40->hw->state[idx]); so_emit(nv40->nvws, nv40->hw->state[idx]); - state->dirty &= ~(1 << idx); + state->dirty &= ~(1ULL << idx); } so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGTEX0+i]); - samplers &= ~(1 << i); + samplers &= ~(1ULL << i); } so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); } void diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 753c2fe64b9..b5faf06291c 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -39,6 +39,43 @@ nv40_vbo_type(uint format) } } +static boolean +nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, + unsigned ib_size) +{ + unsigned type; + + if (!ib) { + nv40->pipe_state.idxbuf = NULL; + nv40->pipe_state.idxbuf_format = 0xdeadbeef; + return FALSE; + } + + /* No support for 8bit indices, no support at all on 0x4497 chips */ + if (nv40->hw->curie->grclass == NV44TCL || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nv40->pipe_state.idxbuf || + type != nv40->pipe_state.idxbuf_format) { + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->pipe_state.idxbuf = ib; + nv40->pipe_state.idxbuf_format = type; + } + + return TRUE; +} + static boolean nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, struct pipe_vertex_element *ve, @@ -101,104 +138,15 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, return TRUE; } -static void -nv40_vbo_arrays_update(struct nv40_context *nv40, struct pipe_buffer *ib, - unsigned ib_format) -{ - struct nv40_vertex_program *vp = nv40->pipe_state.vertprog; - struct nouveau_stateobj *vtxbuf, *vtxfmt; - unsigned inputs, hw, num_hw; - unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - inputs = vp->ir; - for (hw = 0; hw < 16 && inputs; hw++) { - if (inputs & (1 << hw)) { - num_hw = hw; - inputs &= ~(1 << hw); - } - } - num_hw++; - - vtxbuf = so_new(20, 18); - so_method(vtxbuf, nv40->hw->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); - vtxfmt = so_new(17, 0); - so_method(vtxfmt, nv40->hw->curie, NV40TCL_VTXFMT(0), num_hw); - - inputs = vp->ir; - for (hw = 0; hw < num_hw; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - - if (!(inputs & (1 << hw))) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - - ve = &nv40->vtxelt[hw]; - vb = &nv40->vtxbuf[ve->vertex_buffer_index]; - - if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - - so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV40TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | - (nv40_vbo_ncomp(ve->src_format) << - NV40TCL_VTXFMT_SIZE_SHIFT) | - nv40_vbo_type(ve->src_format))); - } - - if (ib) { - so_method(vtxbuf, nv40->hw->curie, NV40TCL_IDXBUF_ADDRESS, 2); - so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, - 0, NV40TCL_IDXBUF_FORMAT_DMA1); - } - - so_emit(nv40->nvws, vtxfmt); - so_emit(nv40->nvws, vtxbuf); - so_ref (vtxbuf, &nv40->so_vtxbuf); - so_ref (NULL, &vtxfmt); -} - -static boolean -nv40_vbo_validate_state(struct nv40_context *nv40, - struct pipe_buffer *ib, unsigned ib_format) -{ - unsigned vdn = nv40->dirty & NV40_NEW_ARRAYS; - - nv40_emit_hw_state(nv40); - if (vdn || ib) { - nv40_vbo_arrays_update(nv40, ib, ib_format); - nv40->dirty &= ~NV40_NEW_ARRAYS; - } - - so_emit_reloc_markers(nv40->nvws, nv40->so_vtxbuf); - - BEGIN_RING(curie, 0x1710, 1); - OUT_RING (0); /* vtx cache flush */ - - return TRUE; -} - boolean nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); unsigned nr; - boolean ret; - ret = nv40_vbo_validate_state(nv40, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } + nv40_vbo_set_idxbuf(nv40, NULL, 0); + nv40_emit_hw_state(nv40); BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); OUT_RING (nvgl_primitive(mode)); @@ -305,14 +253,9 @@ nv40_draw_elements_inline(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct pipe_winsys *ws = pipe->winsys; - boolean ret; void *map; - ret = nv40_vbo_validate_state(nv40, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } + nv40_emit_hw_state(nv40); map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { @@ -348,30 +291,12 @@ nv40_draw_elements_inline(struct pipe_context *pipe, static boolean nv40_draw_elements_vbo(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - unsigned nr, type; - boolean ret; - - switch (ib_size) { - case 2: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - return FALSE; - } + unsigned nr; - ret = nv40_vbo_validate_state(nv40, ib, type); - if (!ret) { - NOUVEAU_ERR("failed state validation\n"); - return FALSE; - } + nv40_emit_hw_state(nv40); BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); OUT_RING (nvgl_primitive(mode)); @@ -409,19 +334,92 @@ nv40_draw_elements(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - /* 0x4497 doesn't support real index buffers, and there doesn't appear - * to be support on any chipset for 8-bit indices. - */ - if (nv40->hw->curie->grclass == NV44TCL || indexSize == 1) { + if (nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize)) { + nv40_draw_elements_vbo(pipe, mode, start, count); + } else { nv40_draw_elements_inline(pipe, indexBuffer, indexSize, mode, start, count); - } else { - nv40_draw_elements_vbo(pipe, indexBuffer, indexSize, - mode, start, count); } pipe->flush(pipe, 0); return TRUE; } +static boolean +nv40_vbo_validate(struct nv40_context *nv40) +{ + struct nv40_vertex_program *vp = nv40->pipe_state.vertprog; + struct nouveau_stateobj *vtxbuf, *vtxfmt; + struct pipe_buffer *ib = nv40->pipe_state.idxbuf; + unsigned ib_format = nv40->pipe_state.idxbuf_format; + unsigned inputs, hw, num_hw; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + inputs = vp->ir; + for (hw = 0; hw < 16 && inputs; hw++) { + if (inputs & (1 << hw)) { + num_hw = hw; + inputs &= ~(1 << hw); + } + } + num_hw++; + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, nv40->hw->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, nv40->hw->curie, NV40TCL_VTXFMT(0), num_hw); + + inputs = vp->ir; + for (hw = 0; hw < num_hw; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + + if (!(inputs & (1 << hw))) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + + ve = &nv40->vtxelt[hw]; + vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV40TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | + (nv40_vbo_ncomp(ve->src_format) << + NV40TCL_VTXFMT_SIZE_SHIFT) | + nv40_vbo_type(ve->src_format))); + } + + if (ib) { + so_method(vtxbuf, nv40->hw->curie, NV40TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV40TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, nv40->hw->curie, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); + so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); + return FALSE; +} + +struct nv40_state_entry nv40_state_vbo = { + .validate = nv40_vbo_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS, + .hw = 0, + } +}; -- cgit v1.2.3 From 026e2fd3c6eb87a010a9c90341e8a77b09376b5b Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 25 Feb 2008 13:33:08 +1100 Subject: nv40: remove pipe_state struct now. --- src/gallium/drivers/nv40/nv40_context.h | 41 +++++++++++------------- src/gallium/drivers/nv40/nv40_fragprog.c | 4 +-- src/gallium/drivers/nv40/nv40_fragtex.c | 2 +- src/gallium/drivers/nv40/nv40_state.c | 26 +++++++-------- src/gallium/drivers/nv40/nv40_state_blend.c | 4 +-- src/gallium/drivers/nv40/nv40_state_clip.c | 2 +- src/gallium/drivers/nv40/nv40_state_fb.c | 2 +- src/gallium/drivers/nv40/nv40_state_rasterizer.c | 2 +- src/gallium/drivers/nv40/nv40_state_scissor.c | 4 +-- src/gallium/drivers/nv40/nv40_state_stipple.c | 4 +-- src/gallium/drivers/nv40/nv40_state_viewport.c | 2 +- src/gallium/drivers/nv40/nv40_state_zsa.c | 2 +- src/gallium/drivers/nv40/nv40_vbo.c | 18 +++++------ src/gallium/drivers/nv40/nv40_vertprog.c | 4 +-- 14 files changed, 57 insertions(+), 60 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 110d9d7ab70..16cc053ad9f 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -132,32 +132,29 @@ struct nv40_context { int chipset; - unsigned dirty; + /* HW state derived from pipe states */ + struct nv40_state state; + unsigned fallback; + /* Context state */ + unsigned dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct pipe_clip_state clip; + struct nv40_vertex_program *vertprog; + struct nv40_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct nv40_rasterizer_state *rasterizer; + struct nv40_zsa_state *zsa; + struct nv40_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; unsigned dirty_samplers; - - struct { - struct pipe_scissor_state scissor; - unsigned stipple[32]; - struct pipe_clip_state clip; - struct nv40_vertex_program *vertprog; - struct nv40_fragment_program *fragprog; - struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; - struct nv40_rasterizer_state *rasterizer; - struct nv40_zsa_state *zsa; - struct nv40_blend_state *blend; - struct pipe_blend_color blend_colour; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state framebuffer; - struct pipe_buffer *idxbuf; - unsigned idxbuf_format; - } pipe_state; - - struct nv40_state state; - unsigned fallback; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; }; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index a4a1ea01e03..2a8abb32a30 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -790,9 +790,9 @@ nv40_fragprog_upload(struct nv40_context *nv40, static boolean nv40_fragprog_validate(struct nv40_context *nv40) { - struct nv40_fragment_program *fp = nv40->pipe_state.fragprog; + struct nv40_fragment_program *fp = nv40->fragprog; struct pipe_buffer *constbuf = - nv40->pipe_state.constbuf[PIPE_SHADER_FRAGMENT]; + nv40->constbuf[PIPE_SHADER_FRAGMENT]; struct pipe_winsys *ws = nv40->pipe.winsys; struct nouveau_stateobj *so; int i; diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index ed47d707b2f..6be8378c087 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -126,7 +126,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) static boolean nv40_fragtex_validate(struct nv40_context *nv40) { - struct nv40_fragment_program *fp = nv40->pipe_state.fragprog; + struct nv40_fragment_program *fp = nv40->fragprog; struct nv40_state *state = &nv40->state; struct nouveau_stateobj *so; unsigned samplers, unit; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index e6f2754dc51..24335fbc44d 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -57,7 +57,7 @@ nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.blend = hwcso; + nv40->blend = hwcso; nv40->dirty |= NV40_NEW_BLEND; } @@ -399,7 +399,7 @@ nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.rasterizer = hwcso; + nv40->rasterizer = hwcso; nv40->dirty |= NV40_NEW_RAST; } @@ -470,7 +470,7 @@ nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.zsa = hwcso; + nv40->zsa = hwcso; nv40->dirty |= NV40_NEW_ZSA; } @@ -500,7 +500,7 @@ nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.vertprog = hwcso; + nv40->vertprog = hwcso; nv40->dirty |= NV40_NEW_VERTPROG; } @@ -531,7 +531,7 @@ nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.fragprog = hwcso; + nv40->fragprog = hwcso; nv40->dirty |= NV40_NEW_FRAGPROG; } @@ -551,7 +551,7 @@ nv40_set_blend_color(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.blend_colour = *bcol; + nv40->blend_colour = *bcol; nv40->dirty |= NV40_NEW_BCOL; } @@ -561,7 +561,7 @@ nv40_set_clip_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.clip = *clip; + nv40->clip = *clip; nv40->dirty |= NV40_NEW_UCP; } @@ -572,11 +572,11 @@ nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct nv40_context *nv40 = nv40_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv40->pipe_state.constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv40->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; nv40->dirty |= NV40_NEW_VERTPROG; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv40->pipe_state.constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv40->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; nv40->dirty |= NV40_NEW_FRAGPROG; } } @@ -587,7 +587,7 @@ nv40_set_framebuffer_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.framebuffer = *fb; + nv40->framebuffer = *fb; nv40->dirty |= NV40_NEW_FB; } @@ -597,7 +597,7 @@ nv40_set_polygon_stipple(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - memcpy(nv40->pipe_state.stipple, stipple->stipple, 4 * 32); + memcpy(nv40->stipple, stipple->stipple, 4 * 32); nv40->dirty |= NV40_NEW_STIPPLE; } @@ -607,7 +607,7 @@ nv40_set_scissor_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.scissor = *s; + nv40->scissor = *s; nv40->dirty |= NV40_NEW_SCISSOR; } @@ -617,7 +617,7 @@ nv40_set_viewport_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - nv40->pipe_state.viewport = *vpt; + nv40->viewport = *vpt; nv40->dirty |= NV40_NEW_VIEWPORT; } diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c index 81b927a67a9..dd09830aa3d 100644 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -3,7 +3,7 @@ static boolean nv40_state_blend_validate(struct nv40_context *nv40) { - so_ref(nv40->pipe_state.blend->so, &nv40->state.hw[NV40_STATE_BLEND]); + so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); return TRUE; } @@ -19,7 +19,7 @@ static boolean nv40_state_blend_colour_validate(struct nv40_context *nv40) { struct nouveau_stateobj *so = so_new(2, 0); - struct pipe_blend_color *bcol = &nv40->pipe_state.blend_colour; + struct pipe_blend_color *bcol = &nv40->blend_colour; so_method(so, nv40->hw->curie, NV40TCL_BLEND_COLOR, 1); so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | diff --git a/src/gallium/drivers/nv40/nv40_state_clip.c b/src/gallium/drivers/nv40/nv40_state_clip.c index 19f1c3b36de..93e690161f3 100644 --- a/src/gallium/drivers/nv40/nv40_state_clip.c +++ b/src/gallium/drivers/nv40/nv40_state_clip.c @@ -3,7 +3,7 @@ static boolean nv40_state_clip_validate(struct nv40_context *nv40) { - if (nv40->pipe_state.clip.nr) + if (nv40->clip.nr) nv40->fallback |= NV40_FALLBACK_TNL; return FALSE; diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index c3bf4d43a33..3d0ab920030 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -3,7 +3,7 @@ static boolean nv40_state_framebuffer_validate(struct nv40_context *nv40) { - struct pipe_framebuffer_state *fb = &nv40->pipe_state.framebuffer; + struct pipe_framebuffer_state *fb = &nv40->framebuffer; struct pipe_surface *rt[4], *zeta; uint32_t rt_enable, rt_format, w, h; int i, colour_format = 0, zeta_format = 0; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c index 59b35d1d50a..9ecda5990f0 100644 --- a/src/gallium/drivers/nv40/nv40_state_rasterizer.c +++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c @@ -3,7 +3,7 @@ static boolean nv40_state_rasterizer_validate(struct nv40_context *nv40) { - so_ref(nv40->pipe_state.rasterizer->so, + so_ref(nv40->rasterizer->so, &nv40->state.hw[NV40_STATE_RAST]); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index ee797094d39..09ffc49f965 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -3,8 +3,8 @@ static boolean nv40_state_scissor_validate(struct nv40_context *nv40) { - struct pipe_rasterizer_state *rast = &nv40->pipe_state.rasterizer->pipe; - struct pipe_scissor_state *s = &nv40->pipe_state.scissor; + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_scissor_state *s = &nv40->scissor; struct nouveau_stateobj *so; if (nv40->state.hw[NV40_STATE_SCISSOR] && diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index aad4d179ac4..001c396d747 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -3,7 +3,7 @@ static boolean nv40_state_stipple_validate(struct nv40_context *nv40) { - struct pipe_rasterizer_state *rast = &nv40->pipe_state.rasterizer->pipe; + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; struct nouveau_grobj *curie = nv40->hw->curie; struct nouveau_stateobj *so; @@ -19,7 +19,7 @@ nv40_state_stipple_validate(struct nv40_context *nv40) so_data (so, 1); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) - so_data(so, nv40->pipe_state.stipple[i]); + so_data(so, nv40->stipple[i]); } else { so = so_new(2, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 68820d31339..9616be5052a 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -4,7 +4,7 @@ static boolean nv40_state_viewport_validate(struct nv40_context *nv40) { struct nouveau_stateobj *so = so_new(9, 0); - struct pipe_viewport_state *vpt = &nv40->pipe_state.viewport; + struct pipe_viewport_state *vpt = &nv40->viewport; so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); so_data (so, fui(vpt->translate[0])); diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c index 061a3555cbe..fb760677c88 100644 --- a/src/gallium/drivers/nv40/nv40_state_zsa.c +++ b/src/gallium/drivers/nv40/nv40_state_zsa.c @@ -3,7 +3,7 @@ static boolean nv40_state_zsa_validate(struct nv40_context *nv40) { - so_ref(nv40->pipe_state.zsa->so, + so_ref(nv40->zsa->so, &nv40->state.hw[NV40_STATE_ZSA]); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index b5faf06291c..1653ebf2a7e 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -46,8 +46,8 @@ nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, unsigned type; if (!ib) { - nv40->pipe_state.idxbuf = NULL; - nv40->pipe_state.idxbuf_format = 0xdeadbeef; + nv40->idxbuf = NULL; + nv40->idxbuf_format = 0xdeadbeef; return FALSE; } @@ -66,11 +66,11 @@ nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, return FALSE; } - if (ib != nv40->pipe_state.idxbuf || - type != nv40->pipe_state.idxbuf_format) { + if (ib != nv40->idxbuf || + type != nv40->idxbuf_format) { nv40->dirty |= NV40_NEW_ARRAYS; - nv40->pipe_state.idxbuf = ib; - nv40->pipe_state.idxbuf_format = type; + nv40->idxbuf = ib; + nv40->idxbuf_format = type; } return TRUE; @@ -348,10 +348,10 @@ nv40_draw_elements(struct pipe_context *pipe, static boolean nv40_vbo_validate(struct nv40_context *nv40) { - struct nv40_vertex_program *vp = nv40->pipe_state.vertprog; + struct nv40_vertex_program *vp = nv40->vertprog; struct nouveau_stateobj *vtxbuf, *vtxfmt; - struct pipe_buffer *ib = nv40->pipe_state.idxbuf; - unsigned ib_format = nv40->pipe_state.idxbuf_format; + struct pipe_buffer *ib = nv40->idxbuf; + unsigned ib_format = nv40->idxbuf_format; unsigned inputs, hw, num_hw; unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index c482964adc0..d3ed57b1998 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -634,9 +634,9 @@ out_err: static boolean nv40_vertprog_validate(struct nv40_context *nv40) { - struct nv40_vertex_program *vp = nv40->pipe_state.vertprog; + struct nv40_vertex_program *vp = nv40->vertprog; struct pipe_buffer *constbuf = - nv40->pipe_state.constbuf[PIPE_SHADER_VERTEX]; + nv40->constbuf[PIPE_SHADER_VERTEX]; struct nouveau_winsys *nvws = nv40->nvws; struct pipe_winsys *ws = nv40->pipe.winsys; boolean upload_code = FALSE, upload_data = FALSE; -- cgit v1.2.3 From 20839b37ed61b044d6224c0e373ce10d74be4f3d Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 11:13:58 -0700 Subject: gallium/i915: added SGT/SLE opcodes --- .../drivers/i915simple/i915_fpc_translate.c | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index afe7e25dce7..50c9e654095 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -385,6 +385,26 @@ emit_simple_arith(struct i915_fp_compile *p, arg3 ); } + +/** As above, but swap the first two src regs */ +static void +emit_simple_arith_swap2(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + struct tgsi_full_instruction inst2; + + assert(numArgs == 2); + + /* transpose first two registers */ + inst2 = *inst; + inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; + inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + + emit_simple_arith(p, &inst2, opcode, numArgs); +} + + #ifndef M_PI #define M_PI 3.14159265358979323846 #endif @@ -772,6 +792,11 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_SGE, 2); break; + case TGSI_OPCODE_SLE: + /* like SGE, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SGE, 2); + break; + case TGSI_OPCODE_SIN: src0 = src_vector(p, &inst->FullSrcRegisters[0]); tmp = i915_get_utemp(p); @@ -826,6 +851,11 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_SLT, 2); break; + case TGSI_OPCODE_SGT: + /* like SLT, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SLT, 2); + break; + case TGSI_OPCODE_SUB: src0 = src_vector(p, &inst->FullSrcRegisters[0]); src1 = src_vector(p, &inst->FullSrcRegisters[1]); @@ -879,6 +909,7 @@ i915_translate_instruction(struct i915_fp_compile *p, default: i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); + p->error = 1; return; } -- cgit v1.2.3 From c037b4d45a551dc7b7dd33950c2e8df60449061c Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 14:47:13 -0700 Subject: softpipe: use draw_find_vs_output() directly --- src/gallium/drivers/softpipe/sp_state_derived.c | 39 +++++-------------------- 1 file changed, 7 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f9f2c5eaa8f..4c6313001f4 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -34,33 +34,6 @@ #include "sp_state.h" -/** - * Search vertex program's outputs to find a match for the given - * semantic name/index. Return the index of the output slot. - * - * Return 0 if not found. This will cause the fragment program to use - * vertex attrib 0 (position) in the cases where the fragment program - * attempts to use a missing vertex program output. This is an undefined - * condition that users shouldn't hit anyway. - */ -static int -find_vs_output(struct softpipe_context *sp, - const struct pipe_shader_state *vs, - uint semantic_name, - uint semantic_index) -{ - uint i; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == semantic_name && - vs->output_semantic_index[i] == semantic_index) - return i; - } - - /* See if the draw module is introducing a new attribute... */ - return draw_find_vs_output(sp->draw, semantic_name, semantic_index); -} - - /** * Mark the current vertex layout as "invalid". * We'll validate the vertex layout later, when we start to actually @@ -114,24 +87,25 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) int src; switch (fs->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); break; case TGSI_SEMANTIC_COLOR: - src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_COLOR, + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, fs->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_FOG, 0); + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_GENERIC, + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, fs->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; @@ -141,7 +115,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } } - softpipe->psize_slot = find_vs_output(softpipe, vs, TGSI_SEMANTIC_PSIZE, 0); + softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, softpipe->psize_slot); -- cgit v1.2.3 From 846b7fbc6c9cbd57eed01bd04b1da73109935091 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 14:48:31 -0700 Subject: gallium/i915: use draw_find_vs_output() directly, fix broken fogcoords. We now produce the correct 915 vertex layout regardless of the order in which fragment shader inputs are declared. --- .../drivers/i915simple/i915_state_derived.c | 99 ++++++++++++---------- 1 file changed, 53 insertions(+), 46 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index f654f543cc6..fe52fe6e6bd 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -27,104 +27,111 @@ #include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" #include "i915_fpc.h" -#include "pipe/p_shader_tokens.h" + /** - * Determine which post-transform / pre-rasterization vertex attributes - * we need. - * Derived from: fs, setup states. + * Determine the hardware vertex layout. + * Depends on vertex/fragment shader state. */ static void calculate_vertex_layout( struct i915_context *i915 ) { const struct pipe_shader_state *fs = &i915->fs->state; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; - uint front0 = 0, back0 = 0, front1 = 0, back1 = 0; - boolean needW = 0; + boolean texCoords[8], colors[2], fog, needW; uint i; - boolean texCoords[8]; - uint src = 0; + int src; memset(texCoords, 0, sizeof(texCoords)); + colors[0] = colors[1] = fog = needW = FALSE; memset(&vinfo, 0, sizeof(vinfo)); - /* pos */ - draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src++); - /* Note: we'll set the S4_VFMT_XYZ[W] bits below */ - + /* Determine which fragment program inputs are needed. Setup HW vertex + * layout below, in the HW-specific attribute order. + */ for (i = 0; i < fs->num_inputs; i++) { switch (fs->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: break; case TGSI_SEMANTIC_COLOR: - if (fs->input_semantic_index[i] == 0) { - front0 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); - vinfo.hwfmt[0] |= S4_VFMT_COLOR; - } - else { - assert(fs->input_semantic_index[i] == 1); - front1 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++); - vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; - } + assert(fs->input_semantic_index[i] < 2); + colors[fs->input_semantic_index[i]] = TRUE; break; case TGSI_SEMANTIC_GENERIC: /* usually a texcoord */ { const uint unit = fs->input_semantic_index[i]; - uint hwtc; + assert(unit < 8); texCoords[unit] = TRUE; - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - hwtc = TEXCOORDFMT_4D; needW = TRUE; - vinfo.hwfmt[1] |= hwtc << (unit * 4); } break; case TGSI_SEMANTIC_FOG: - debug_printf("i915 fogcoord not implemented yet\n"); - draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++); + fog = TRUE; break; default: assert(0); } - } - /* finish up texcoord fields */ - for (i = 0; i < 8; i++) { - if (!texCoords[i]) { - const uint hwtc = TEXCOORDFMT_NOT_PRESENT; - vinfo.hwfmt[1] |= hwtc << (i* 4); - } - } - - /* go back and fill in the vertex position info now that we have needW */ + + /* pos */ + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; vinfo.emit[0] = EMIT_4F; } else { + draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZ; vinfo.emit[0] = EMIT_3F; } - /* Additional attributes required for setup: Just twosided - * lighting. Edgeflag is dealt with specially by setting bits in - * the vertex header. - */ - if (i915->rasterizer->light_twoside) { - if (front0) { - back0 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + /* hardware point size */ + /* XXX todo */ + + /* primary color */ + if (colors[0]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_COLOR; + } + + /* secondary color */ + if (colors[1]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; + } + + /* fog coord, not fog blend factor */ + if (fog) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; + } + + /* texcoords */ + for (i = 0; i < 8; i++) { + uint hwtc; + if (texCoords[i]) { + hwtc = TEXCOORDFMT_4D; + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } - if (back0) { - back1 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++); + else { + hwtc = TEXCOORDFMT_NOT_PRESENT; } + vinfo.hwfmt[1] |= hwtc << (i * 4); } draw_compute_vertex_size(&vinfo); -- cgit v1.2.3 From b53110c78941e7fcaa41921cce07ca00ec117a97 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 16:16:07 -0700 Subject: gallium/i915: call draw_flush() in i915_flush() --- src/gallium/drivers/i915simple/i915_flush.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index 3c2069b8273..96a54281f11 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" +#include "draw/draw_context.h" #include "i915_context.h" #include "i915_reg.h" #include "i915_batch.h" @@ -44,6 +45,8 @@ static void i915_flush( struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + /* Do we need to emit an MI_FLUSH command to flush the hardware * caches? */ -- cgit v1.2.3 From f41e95755757cb1452697fafa1dd5288390ed57e Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 16:20:04 -0700 Subject: gallium/i915: need to recompute vertex info if vertex shader changes --- src/gallium/drivers/i915simple/i915_context.h | 1 + src/gallium/drivers/i915simple/i915_state.c | 2 ++ src/gallium/drivers/i915simple/i915_state_derived.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 5cc38cdc851..d32dded6bdc 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -263,6 +263,7 @@ struct i915_context #define I915_NEW_TEXTURE 0x800 #define I915_NEW_CONSTANTS 0x1000 #define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 /* Driver's internally generated state flags: diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index e4288d4e319..a35bdf941fc 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -474,6 +474,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) /* just pass-through to draw module */ draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); + + i915->dirty |= I915_NEW_VS; } static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index fe52fe6e6bd..5cf70acdf3b 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -155,7 +155,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) */ void i915_update_derived( struct i915_context *i915 ) { - if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS)) + if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) calculate_vertex_layout( i915 ); if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) -- cgit v1.2.3 From 92650aeaddd1bd729f3a90383f05c8148f678066 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 16:22:58 -0700 Subject: gallium/i915: make sure state is up to date in i915_vbuf_render_get_vertex_info(), also disable bogus assertion --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index c5bf6174f68..9d5f609220a 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -83,6 +83,12 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; + + if (i915->dirty) { + /* make sure we have up to date vertex layout */ + i915_update_derived( i915 ); + } + return &i915->current.vertex_info; } @@ -143,7 +149,8 @@ i915_vbuf_render_draw( struct vbuf_render *render, assert(nr_indices); - assert((i915->dirty & ~I915_NEW_VBO) == 0); + /* this seems to be bogus, since we validate state right after this */ + /*assert((i915->dirty & ~I915_NEW_VBO) == 0);*/ if (i915->dirty) i915_update_derived( i915 ); -- cgit v1.2.3 From 99047e0968882a4e3f9577fa2352d91733181339 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 16:24:47 -0700 Subject: gallium/i915: plug in aaline draw stage --- src/gallium/drivers/i915simple/i915_context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index acfa3494397..36f6429d463 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -298,10 +298,14 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, i915_init_string_functions(i915); i915_init_texture_functions(i915); + /* not working yet: + draw_install_aapoint_stage(i915->draw, &i915->pipe); + */ + draw_install_aaline_stage(i915->draw, &i915->pipe); + i915->pci_id = pci_id; i915->flags.is_i945 = is_i945; - i915->dirty = ~0; i915->hardware_dirty = ~0; -- cgit v1.2.3 From b02fc948348db5559d658251dd3a6d4f3390d686 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 17:01:20 -0700 Subject: gallium/i915: compute vertex size _after_ state validation in emit_prim(). Fixes crash when drawing aa lines. --- src/gallium/drivers/i915simple/i915_prim_emit.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index 44c43259369..6da42b3846c 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -72,6 +72,8 @@ emit_hw_vertex( struct i915_context *i915, uint i; uint count = 0; /* for debug/sanity */ + assert(!i915->dirty); + for (i = 0; i < vinfo->num_attribs; i++) { switch (vinfo->emit[i]) { case EMIT_OMIT: @@ -122,17 +124,19 @@ emit_prim( struct draw_stage *stage, unsigned nr ) { struct i915_context *i915 = setup_stage(stage)->i915; - unsigned vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + unsigned vertex_size; unsigned i; - assert(vertex_size >= 12); /* never smaller than 12 bytes */ - if (i915->dirty) i915_update_derived( i915 ); if (i915->hardware_dirty) i915_emit_hardware_state( i915 ); + /* need to do this after validation! */ + vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + assert(vertex_size >= 12); /* never smaller than 12 bytes */ + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { FLUSH_BATCH(); -- cgit v1.2.3 From 7976a084e792daf0b23c688bfa8f577de141ecca Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 21 Feb 2008 11:01:35 -0800 Subject: Cell: Use multiple DMA tags for the dcache. --- src/gallium/drivers/cell/spu/spu_dcache.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index 698a5790bb0..3baeaea9981 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -33,7 +33,7 @@ #define CACHE_NAME data #define CACHED_TYPE qword #define CACHE_TYPE CACHE_TYPE_RO -#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER +#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) #define CACHE_LOG2NNWAY 2 #define CACHE_LOG2NSETS 6 #include <cache-api.h> diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 5c95d112ac1..d14f1abbe74 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -131,7 +131,10 @@ extern boolean Debug; #define TAG_BATCH_BUFFER 17 #define TAG_MISC 18 #define TAG_TEXTURE_TILE 19 -#define TAG_INSTRUCTION_FETCH 20 +#define TAG_DCACHE0 20 +#define TAG_DCACHE1 21 +#define TAG_DCACHE2 22 +#define TAG_DCACHE3 23 -- cgit v1.2.3 From fb68daceec312a90910dad3882e6ef57c370b7fd Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 21 Feb 2008 16:41:12 -0800 Subject: Cell: Remove unnecessary include files --- src/gallium/drivers/cell/spu/spu_exec.c | 2 -- src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index cf81bee8fde..fff0114a236 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -50,8 +50,6 @@ * Brian Paul */ -#include <libmisc.h> -#include <spu_mfcio.h> #include <transpose_matrix4x4.h> #include <simdmath/ceilf4.h> #include <simdmath/cosf4.h> diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index f7e4e653e31..219fd90cc0e 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -32,8 +32,6 @@ * Ian Romanick <idr@us.ibm.com> */ -#include <spu_mfcio.h> - #include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -- cgit v1.2.3 From a63fd641a01a50e1be51664bf863e01ddaf61d3e Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Fri, 22 Feb 2008 16:27:39 -0800 Subject: cell: Trivial compiler warning clean-ups. --- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 1 + src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 1 - src/gallium/drivers/cell/spu/spu_exec.c | 6 ++++-- src/gallium/drivers/cell/spu/spu_main.c | 5 +++-- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index cbd387f0142..c839fb4d12d 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -38,6 +38,7 @@ #include "cell_context.h" #include "cell_draw_arrays.h" #include "cell_state.h" +#include "cell_flush.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 42cc47cbfea..17141924be8 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -55,7 +55,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) struct cell_command_vs *const vs = &cell_global.command[0].vs; uint64_t *batch; struct cell_array_info *array_info; - struct cell_shader_info *shader_info; unsigned i, j; struct cell_attribute_fetch_code *cf; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index fff0114a236..1560c0f1574 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -149,6 +149,7 @@ spu_exec_machine_init(struct spu_exec_machine *mach, const qword zero = si_il(0); const qword not_zero = si_il(~0); + (void) numSamplers; mach->Samplers = samplers; mach->Processor = processor; mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; @@ -657,9 +658,10 @@ fetch_texel( struct spu_sampler *sampler, qword rgba[4]; qword out[4]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, + (float (*)[4]) rgba); - _transpose_matrix4x4(out, rgba); + _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); r->q = out[0]; g->q = out[1]; b->q = out[2]; diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 1136dba62d5..cc4bafdb3ac 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -38,6 +38,7 @@ #include "spu_tile.h" //#include "spu_test.h" #include "spu_vertex_shader.h" +#include "spu_dcache.h" #include "cell/common.h" #include "pipe/p_defines.h" @@ -434,7 +435,7 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); break; case CELL_CMD_STATE_UNIFORMS: - draw.constants = (float (*)[4]) (uintptr_t) buffer[pos + 1]; + draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; pos += 2; break; case CELL_CMD_STATE_VS_ARRAY_INFO: @@ -583,7 +584,7 @@ main(main_param_t speid, main_param_t argp) one_time_init(); if (Debug) - printf("SPU: main() speid=%lu\n", speid); + printf("SPU: main() speid=%lu\n", (unsigned long) speid); mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ -- cgit v1.2.3 From 2efa7e9489541b6a86c3d46e3d58cbf5bf399189 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Fri, 22 Feb 2008 17:51:55 -0800 Subject: cell: Fix off-by-one error in spu_dcache_fetch_unaligned This time the off-by-one error caused an extra qword to be fetched under certain circumstances when the source ea was not qword aligned. --- src/gallium/drivers/cell/spu/spu_dcache.c | 50 ++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index 3baeaea9981..a1701d80d18 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -49,43 +49,57 @@ /** * Fetch between arbitrary number of bytes from an unaligned address + * + * \param dst Destination data buffer + * \param ea Main memory effective address of source data + * \param size Number of bytes to read + * + * \warning + * As is hinted by the type of the \c dst pointer, this function writes + * multiples of 16-bytes. */ void spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) { const int shift = ea & 0x0f; - const unsigned aligned_start_ea = ea & ~0x0f; - const unsigned aligned_end_ea = ROUNDUP16(ea + size); - const unsigned num_entries = (aligned_end_ea - aligned_start_ea) / 16; + const unsigned read_size = ROUNDUP16(size + shift); + const unsigned last_read = ROUNDUP16(ea + size); + const qword *const last_write = dst + (ROUNDUP16(size) / 16); unsigned i; if (shift == 0) { /* Data is already aligned. Fetch directly into the destination buffer. */ - for (i = 0; i < num_entries; i++) { - dst[i] = cache_rd(data, ea + (i * 16)); + for (i = 0; i < size; i += 16) { + *(dst++) = cache_rd(data, ea + i); } } else { - qword tmp[2] ALIGN16_ATTRIB; - + qword hi; - tmp[0] = cache_rd(data, (ea & ~0x0f)); - for (i = 0; i < (num_entries & ~1); i++) { - const unsigned curr = i & 1; - const unsigned next = curr ^ 1; - tmp[next] = cache_rd(data, (ea & ~0x0f) + (next * 16)); - - dst[i] = si_or((qword) spu_slqwbyte(tmp[curr], shift), - (qword) spu_rlmaskqwbyte(tmp[next], shift - 16)); + /* Please exercise extreme caution when modifying this code. This code + * must not read past the end of the page containing the source data, + * and it must not write more than ((size + 15) / 16) qwords to the + * destination buffer. + */ + ea &= ~0x0f; + hi = cache_rd(data, ea); + for (i = 16; i < read_size; i += 16) { + qword lo = cache_rd(data, ea + i); + + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), + (qword) spu_rlmaskqwbyte(lo, shift - 16)); + hi = lo; } - if (i < num_entries) { - dst[i] = si_or((qword) spu_slqwbyte(tmp[(i & 1)], shift), - si_il(0)); + if (dst != last_write) { + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); } } + + ASSERT((ea + i) == last_read); + ASSERT(dst == last_write); } -- cgit v1.2.3 From 2fc9d0ffac4b44dad1f13443e5dedd545675a7ee Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Mon, 25 Feb 2008 16:15:59 -0800 Subject: cell: Additional changes to match changes in draw/draw_vertex_shader.c --- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 17141924be8..f5c27852c14 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -127,7 +127,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { vs->elts[j] = vs->elts[0]; - vs->vOut[j] = vs->vOut[0]; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; } vs->num_elts = n; @@ -136,5 +136,6 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); } + draw->vs.post_nr = draw->vs.queue_nr; draw->vs.queue_nr = 0; } -- cgit v1.2.3 From 0235b3252100eda553babea42c014445358a2985 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 17:59:51 -0700 Subject: gallium/i915: fix i915_emit_texld() to handle swizzled texcoords Allocate a temporary register, insert MOV instruction, etc. --- src/gallium/drivers/i915simple/i915_fpc_emit.c | 63 +++++++++++++++++----- .../drivers/i915simple/i915_fpc_translate.c | 20 +++++-- 2 files changed, 67 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c index a59ee234037..ad6dab6c117 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -71,10 +71,21 @@ i915_get_temp(struct i915_fp_compile *p) } p->temp_flag |= 1 << (bit - 1); - return UREG(REG_TYPE_R, (bit - 1)); + return bit - 1; } +static void +i915_release_temp(struct i915_fp_compile *p, int reg) +{ + p->temp_flag &= ~(1 << reg); +} + + +/** + * Get unpreserved temporary, a temp whose value is not preserved between + * PS program phases. + */ uint i915_get_utemp(struct i915_fp_compile * p) { @@ -183,41 +194,63 @@ i915_emit_arith(struct i915_fp_compile * p, return dest; } + +/** + * Emit a texture load or texkill instruction. + * \param dest the dest i915 register + * \param destmask the dest register writemask + * \param sampler the i915 sampler register + * \param coord the i915 source texcoord operand + * \param opcode the instruction opcode + */ uint i915_emit_texld( struct i915_fp_compile *p, uint dest, uint destmask, uint sampler, uint coord, - uint op ) + uint opcode ) { - uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + int temp = -1; + if (coord != k) { - /* No real way to work around this in the general case - need to - * allocate and declare a new temporary register (a utemp won't - * do). Will fallback for now. + /* texcoord is swizzled or negated. Need to allocate a new temporary + * register (a utemp / unpreserved temp) won't do. */ - i915_program_error(p, "Can't (yet) swizzle TEX arguments"); - assert(0); - return 0; + uint tempReg; + + temp = i915_get_temp(p); /* get temp reg index */ + printf("***** ALLOC TEMP %d\n", temp); + tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ + + i915_emit_arith( p, A0_MOV, + tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ + 0, /* saturate */ + coord, 0, 0 ); /* src0, src1, src2 */ + + /* new src texcoord is tempReg */ + coord = tempReg; } /* Don't worry about saturate as we only support */ if (destmask != A0_DEST_CHANNEL_ALL) { + /* if not writing to XYZW... */ uint tmp = i915_get_utemp(p); - i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); - return dest; + /* XXX release utemp here? */ } else { assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + /* is the sampler coord a texcoord input reg? */ if (GET_UREG_TYPE(coord) != REG_TYPE_T) { p->nr_tex_indirect++; } - *(p->csr++) = (op | + *(p->csr++) = (opcode | T0_DEST( dest ) | T0_SAMPLER( sampler )); @@ -225,8 +258,12 @@ uint i915_emit_texld( struct i915_fp_compile *p, *(p->csr++) = T2_MBZ; p->nr_tex_insn++; - return dest; } + + if (temp >= 0) + i915_release_temp(p, temp); + + return dest; } diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 50c9e654095..76a2184e9ab 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -575,8 +575,12 @@ i915_translate_instruction(struct i915_fp_compile *p, src0 = src_vector(p, &inst->FullSrcRegisters[0]); tmp = i915_get_utemp(p); - i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ - 0, src0, T0_TEXKILL); + i915_emit_texld(p, + tmp, /* dest reg: a dummy reg */ + A0_DEST_CHANNEL_ALL, /* dest writemask */ + 0, /* sampler */ + src0, /* coord*/ + T0_TEXKILL); /* opcode */ break; case TGSI_OPCODE_LG2: @@ -970,6 +974,16 @@ i915_translate_instructions(struct i915_fp_compile *p, ifs->num_constants = MAX2(ifs->num_constants, i + 1); } } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + i++) { + assert(i < I915_MAX_TEMPORARY); + p->temp_flag |= (1 << i); /* mark temp as used */ + } + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: @@ -1048,7 +1062,7 @@ i915_init_compile(struct i915_context *i915, p->decl = p->declarations; p->decl_s = 0; p->decl_t = 0; - p->temp_flag = 0xffff000; + p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; p->utemp_flag = ~0x7; p->wpos_tex = -1; -- cgit v1.2.3 From 7ed9beef5f08554f126c64aa172cd03fd810f1db Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 18:00:14 -0700 Subject: gallium/i915: remove debug code --- src/gallium/drivers/i915simple/i915_fpc_emit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c index ad6dab6c117..4bdeefb449b 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -220,7 +220,6 @@ uint i915_emit_texld( struct i915_fp_compile *p, uint tempReg; temp = i915_get_temp(p); /* get temp reg index */ - printf("***** ALLOC TEMP %d\n", temp); tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ i915_emit_arith( p, A0_MOV, -- cgit v1.2.3 From f43c44b5c9bbbd37e0d40488f911f81e5f3a0367 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 18:53:22 -0700 Subject: gallium/i915: Fix emit_hw_vertex(): need to use vinfo->src_index[] --- src/gallium/drivers/i915simple/i915_prim_emit.c | 30 +++++++++++++------------ 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index 6da42b3846c..d8de5178f60 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -75,37 +75,39 @@ emit_hw_vertex( struct i915_context *i915, assert(!i915->dirty); for (i = 0; i < vinfo->num_attribs; i++) { + const uint j = vinfo->src_index[i]; + const float *attrib = vertex->data[j]; switch (vinfo->emit[i]) { case EMIT_OMIT: /* no-op */ break; case EMIT_1F: - OUT_BATCH( fui(vertex->data[i][0]) ); + OUT_BATCH( fui(attrib[0]) ); count++; break; case EMIT_2F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); count += 2; break; case EMIT_3F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); - OUT_BATCH( fui(vertex->data[i][2]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); count += 3; break; case EMIT_4F: - OUT_BATCH( fui(vertex->data[i][0]) ); - OUT_BATCH( fui(vertex->data[i][1]) ); - OUT_BATCH( fui(vertex->data[i][2]) ); - OUT_BATCH( fui(vertex->data[i][3]) ); + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + OUT_BATCH( fui(attrib[3]) ); count += 4; break; case EMIT_4UB: - OUT_BATCH( pack_ub4(float_to_ubyte( vertex->data[i][2] ), - float_to_ubyte( vertex->data[i][1] ), - float_to_ubyte( vertex->data[i][0] ), - float_to_ubyte( vertex->data[i][3] )) ); + OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), + float_to_ubyte( attrib[1] ), + float_to_ubyte( attrib[0] ), + float_to_ubyte( attrib[3] )) ); count += 1; break; default: -- cgit v1.2.3 From ba376b33140f722b9f935960f450bbca8873439e Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Mon, 25 Feb 2008 18:53:57 -0700 Subject: gallium/i915: plug in aapoint draw stage --- src/gallium/drivers/i915simple/i915_context.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 36f6429d463..c3955bbd2dd 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -298,10 +298,8 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, i915_init_string_functions(i915); i915_init_texture_functions(i915); - /* not working yet: - draw_install_aapoint_stage(i915->draw, &i915->pipe); - */ draw_install_aaline_stage(i915->draw, &i915->pipe); + draw_install_aapoint_stage(i915->draw, &i915->pipe); i915->pci_id = pci_id; i915->flags.is_i945 = is_i945; -- cgit v1.2.3 From f74279002a0ae0b106bd5410487ef9c0e9b1d8b6 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 26 Feb 2008 10:13:39 -0700 Subject: gallium: added tgsi_shader_field to sp_fragment_shader Use the shader semantic info from there, instead of from pipe_shader_state. Carry this idea to draw module and other drivers... --- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 18 +++++++++--------- src/gallium/drivers/softpipe/sp_quad.c | 2 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 4 ++-- src/gallium/drivers/softpipe/sp_state.h | 6 ++++-- src/gallium/drivers/softpipe/sp_state_derived.c | 9 +++++---- src/gallium/drivers/softpipe/sp_state_fs.c | 16 ++++++++++------ 7 files changed, 32 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 34b2b7d4e24..07d058155f2 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -96,7 +96,7 @@ shade_quad_llvm(struct quad_stage *qs, if (qss->colorOutSlot >= 0) { unsigned i; /* XXX need to handle multiple color outputs someday */ - allvmrt(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); for (i = 0; i < QUAD_SIZE; ++i) { quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0]; diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index b6a3fddb29c..17284539b00 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -514,7 +514,7 @@ setup_fragcoord_coeff(struct setup_stage *setup, uint slot) static void setup_tri_coefficients( struct setup_stage *setup ) { struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; @@ -525,7 +525,7 @@ static void setup_tri_coefficients( struct setup_stage *setup ) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -549,7 +549,7 @@ static void setup_tri_coefficients( struct setup_stage *setup ) assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; @@ -757,7 +757,7 @@ static INLINE void setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) { struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; @@ -779,7 +779,7 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) /* setup interpolation for all the remaining attributes: */ - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -803,7 +803,7 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; @@ -967,7 +967,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); struct softpipe_context *softpipe = setup->softpipe; - const struct pipe_shader_state *fs = &softpipe->fs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_header *v0 = prim->v[0]; const int sizeAttr = setup->softpipe->psize_slot; const float size @@ -1002,7 +1002,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) const_coeff(setup, &setup->posCoef, 0, 2); const_coeff(setup, &setup->posCoef, 0, 3); - for (fragSlot = 0; fragSlot < fs->num_inputs; fragSlot++) { + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { const uint vertSlot = vinfo->src_index[fragSlot]; uint j; @@ -1025,7 +1025,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) assert(0); } - if (fs->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; setup->coef[fragSlot].dadx[1] = 0.0; diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 142dbcc7710..0aaf94021f8 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -61,7 +61,7 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && !sp->fs->uses_kill && - !sp->fs->writes_z; + !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 2f40e09d5c2..1794fb5a616 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -91,7 +91,7 @@ shade_quad( /* store result color */ if (qss->colorOutSlot >= 0) { /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->shader.output_semantic_name[qss->colorOutSlot] + assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); memcpy( quad->outputs.color, @@ -149,7 +149,7 @@ static void shade_begin(struct quad_stage *qs) qss->colorOutSlot = -1; qss->depthOutSlot = -1; for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) { - switch (qss->stage.softpipe->fs->shader.output_semantic_name[i]) { + switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: qss->depthOutSlot = i; break; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 5aaa9e346bc..b1070e185a2 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -32,6 +32,7 @@ #define SP_STATE_H #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" #define SP_NEW_VIEWPORT 0x1 @@ -61,10 +62,11 @@ struct tgsi_exec_machine; * This is starting to look an awful lot like a quad pipeline stage... */ struct sp_fragment_shader { - struct pipe_shader_state shader; + struct pipe_shader_state shader; + + struct tgsi_shader_info info; boolean uses_kill; - boolean writes_z; void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 4c6313001f4..68702eafcf3 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -62,6 +62,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ const struct pipe_shader_state *vs = &softpipe->vs->shader; + const struct sp_fragment_shader *spfs = softpipe->fs; const struct pipe_shader_state *fs = &softpipe->fs->shader; const enum interp_mode colorInterp = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; @@ -83,9 +84,9 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * from the vertex shader. */ vinfo->num_attribs = 0; - for (i = 0; i < fs->num_inputs; i++) { + for (i = 0; i < spfs->info.num_inputs; i++) { int src; - switch (fs->input_semantic_name[i]) { + switch (spfs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_POSITION, 0); @@ -94,7 +95,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) case TGSI_SEMANTIC_COLOR: src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, - fs->input_semantic_index[i]); + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; @@ -106,7 +107,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, - fs->input_semantic_index[i]); + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b184ac61bb9..2715dca0f09 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -45,13 +45,12 @@ softpipe_create_fs_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state; - struct tgsi_shader_info info; - - tgsi_scan_shader(templ->tokens, &info); + /* debug */ if (softpipe->dump_fs) tgsi_dump(templ->tokens, 0); + /* codegen */ state = softpipe_create_fs_llvm( softpipe, templ ); if (!state) { state = softpipe_create_fs_sse( softpipe, templ ); @@ -59,10 +58,15 @@ softpipe_create_fs_state(struct pipe_context *pipe, state = softpipe_create_fs_exec( softpipe, templ ); } } + assert(state); - state->uses_kill = (info.opcode_count[TGSI_OPCODE_KIL] || - info.opcode_count[TGSI_OPCODE_KILP]); - state->writes_z = info.writes_z; + + /* get/save the summary info for this shader */ + tgsi_scan_shader(templ->tokens, &state->info); + + /* convenience field */ + state->uses_kill = (state->info.opcode_count[TGSI_OPCODE_KIL] || + state->info.opcode_count[TGSI_OPCODE_KILP]); return state; } -- cgit v1.2.3 From 33d213b6776701ec16b5c02b111ed31de5e93f43 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Tue, 26 Feb 2008 10:44:44 -0700 Subject: gallium: remove unused var --- src/gallium/drivers/softpipe/sp_state_derived.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 68702eafcf3..aa6e3291167 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -63,7 +63,6 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct pipe_shader_state *vs = &softpipe->vs->shader; const struct sp_fragment_shader *spfs = softpipe->fs; - const struct pipe_shader_state *fs = &softpipe->fs->shader; const enum interp_mode colorInterp = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; uint i; -- cgit v1.2.3 From 4901410293b35ac6bb4759142b50fcc0be8a1b25 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Tue, 26 Feb 2008 10:47:42 -0700 Subject: gallium/i915: Use tgsi_scan_shader() to collect shader info No longer use semantic info in pipe_shader_state. Also, remove redundant semantic info from i915_fp_compile struct. --- src/gallium/drivers/i915simple/i915_context.h | 5 ++++ src/gallium/drivers/i915simple/i915_fpc.h | 6 ---- .../drivers/i915simple/i915_fpc_translate.c | 33 ++++------------------ src/gallium/drivers/i915simple/i915_state.c | 2 ++ .../drivers/i915simple/i915_state_derived.c | 12 ++++---- 5 files changed, 19 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index d32dded6bdc..20cf7d3c3bf 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -35,6 +35,8 @@ #include "draw/draw_vertex.h" +#include "tgsi/util/tgsi_scan.h" + #define I915_TEX_UNITS 8 @@ -89,6 +91,9 @@ struct i915_fragment_shader { struct pipe_shader_state state; + + struct tgsi_shader_info info; + uint *program; uint program_len; diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h index 250dfe6dbf0..80a9576304c 100644 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -58,12 +58,6 @@ struct i915_fp_compile { uint declarations[I915_PROGRAM_SIZE]; uint program[I915_PROGRAM_SIZE]; - uint input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - uint input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - - uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; - uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - uint *csr; /**< Cursor, points into program. */ uint *decl; /**< Cursor, points into declarations. */ diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 76a2184e9ab..c2d9a93c6c8 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -162,8 +162,8 @@ src_vector(struct i915_fp_compile *p, * We also use a texture coordinate to pass wpos when possible. */ - sem_name = p->input_semantic_name[index]; - sem_ind = p->input_semantic_index[index]; + sem_name = p->shader->info.input_semantic_name[index]; + sem_ind = p->shader->info.input_semantic_index[index]; switch (sem_name) { case TGSI_SEMANTIC_POSITION: @@ -265,7 +265,7 @@ get_result_vector(struct i915_fp_compile *p, switch (dest->DstRegister.File) { case TGSI_FILE_OUTPUT: { - uint sem_name = p->output_semantic_name[dest->DstRegister.Index]; + uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; switch (sem_name) { case TGSI_SEMANTIC_POSITION: return UREG(REG_TYPE_OD, 0); @@ -942,28 +942,6 @@ i915_translate_instructions(struct i915_fp_compile *p, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_INPUT) { - /* save input register info for use in src_vector() */ - uint ind, sem, semi; - ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; - semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - /*debug_printf("FS Input DECL [%u] sem %u\n", ind, sem);*/ - p->input_semantic_name[ind] = sem; - p->input_semantic_index[ind] = semi; - } - else if (parse.FullToken.FullDeclaration.Declaration.File - == TGSI_FILE_OUTPUT) { - /* save output register info for use in get_result_vector() */ - uint ind, sem, semi; - ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; - semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - /*debug_printf("FS Output DECL [%u] sem %u\n", ind, sem);*/ - p->output_semantic_name[ind] = sem; - p->output_semantic_index[ind] = semi; - } - else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { uint i; for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; @@ -981,6 +959,7 @@ i915_translate_instructions(struct i915_fp_compile *p, i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; i++) { assert(i < I915_MAX_TEMPORARY); + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ p->temp_flag |= (1 << i); /* mark temp as used */ } } @@ -1163,7 +1142,7 @@ i915_find_wpos_space(struct i915_fp_compile *p) i915_program_error(p, "No free texcoord for wpos value"); } #else - if (p->shader->state.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { /* frag shader using the fragment position input */ #if 0 assert(0); @@ -1184,7 +1163,7 @@ static void i915_fixup_depth_write(struct i915_fp_compile *p) { /* XXX assuming pos/depth is always in output[0] */ - if (p->shader->state.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { const uint depth = UREG(REG_TYPE_OD, 0); i915_emit_arith(p, diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index a35bdf941fc..9df0e125406 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -429,6 +429,8 @@ i915_create_fs_state(struct pipe_context *pipe, ifs->state = *templ; + tgsi_scan_shader(templ->tokens, &ifs->info); + /* The shader's compiled to i915 instructions here */ i915_translate_fragment_program(i915, ifs); diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 5cf70acdf3b..4daccec6e0c 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -43,7 +43,7 @@ */ static void calculate_vertex_layout( struct i915_context *i915 ) { - const struct pipe_shader_state *fs = &i915->fs->state; + const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; boolean texCoords[8], colors[2], fog, needW; @@ -57,18 +57,18 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* Determine which fragment program inputs are needed. Setup HW vertex * layout below, in the HW-specific attribute order. */ - for (i = 0; i < fs->num_inputs; i++) { - switch (fs->input_semantic_name[i]) { + for (i = 0; i < fs->info.num_inputs; i++) { + switch (fs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: break; case TGSI_SEMANTIC_COLOR: - assert(fs->input_semantic_index[i] < 2); - colors[fs->input_semantic_index[i]] = TRUE; + assert(fs->info.input_semantic_index[i] < 2); + colors[fs->info.input_semantic_index[i]] = TRUE; break; case TGSI_SEMANTIC_GENERIC: /* usually a texcoord */ { - const uint unit = fs->input_semantic_index[i]; + const uint unit = fs->info.input_semantic_index[i]; assert(unit < 8); texCoords[unit] = TRUE; needW = TRUE; -- cgit v1.2.3 From 75dac3959f959f0227b4e172696ac43910f9257a Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Tue, 26 Feb 2008 11:18:51 -0800 Subject: cell: Multiple rendering contexts don't work yet Log a message and forcibly exit. This prevents silly fools from thinking there's a bug...instead of just an unimplemented feature. :) --- src/gallium/drivers/cell/ppu/cell_spu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 419e74dc402..973c0b1aa12 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -97,8 +97,18 @@ static void *cell_thread_function(void *arg) void cell_start_spus(struct cell_context *cell) { + static boolean one_time_init = FALSE; uint i, j; + + if (one_time_init) { + fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " + "on Cell.\n"); + abort(); + } + + one_time_init = TRUE; + assert(cell->num_spus <= MAX_SPUS); ASSERT_ALIGN16(&cell_global.command[0]); -- cgit v1.2.3 From 5e29aab1752c3e07ae2ebde4cb00e6550dab0eb2 Mon Sep 17 00:00:00 2001 From: Brian <brian@poulsbo.localnet.net> Date: Tue, 26 Feb 2008 14:29:35 -0700 Subject: gallium: replace draw_convert_wide_points() with draw_wide_point_threshold() Specifying a threshold size is a bit more flexible, and allows the option of converting even 1-pixel points to triangles (set threshold=0). Also, remove 0.25 pixel bias in wide_point(). --- src/gallium/auxiliary/draw/draw_context.c | 10 +++++----- src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/auxiliary/draw/draw_validate.c | 23 +++++++++++++---------- src/gallium/auxiliary/draw/draw_wide_prims.c | 7 ++++--- src/gallium/drivers/softpipe/sp_context.c | 3 --- 5 files changed, 23 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 7dd1c6f6faa..48f00946eae 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -80,7 +80,7 @@ struct draw_context *draw_create( void ) draw->shader_queue_flush = draw_vertex_shader_queue_flush; - draw->convert_wide_points = TRUE; + draw->wide_point_threshold = 1000000.0; /* infinity */ draw->convert_wide_lines = TRUE; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -220,14 +220,14 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, /** - * Tells the draw module whether to convert wide points (size != 1) - * into triangles. + * Tells the draw module to draw points with triangles if their size + * is greater than this threshold. */ void -draw_convert_wide_points(struct draw_context *draw, boolean enable) +draw_wide_point_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->convert_wide_points = enable; + draw->wide_point_threshold = threshold; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 6abced139ba..03e3d3a66b6 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -215,7 +215,7 @@ struct draw_context float plane[12][4]; unsigned nr_planes; - boolean convert_wide_points; /**< convert wide points to tris? */ + float wide_point_threshold; /**< convert pnts to tris if larger than this */ boolean convert_wide_lines; /**< convert wide lines to tris? */ boolean use_sse; diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 3a19dd4cd78..ded7d10c081 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -52,6 +52,19 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) */ stage->next = next; + /* drawing wide lines? */ + wide_lines = (draw->rasterizer->line_width != 1.0 + && draw->convert_wide_lines + && !draw->rasterizer->line_smooth); + + /* drawing large points? */ + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + wide_points = FALSE; + else if (draw->rasterizer->point_size > draw->wide_point_threshold) + wide_points = TRUE; + else + wide_points = FALSE; + /* * NOTE: we build up the pipeline in end-to-start order. * @@ -69,16 +82,6 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.aapoint; } - /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width != 1.0 - && draw->convert_wide_lines - && !draw->rasterizer->line_smooth); - - /* drawing large points? */ - wide_points = (draw->rasterizer->point_size != 1.0 - && draw->convert_wide_points - && !draw->pipeline.aapoint); - if (wide_lines || wide_points || draw->rasterizer->point_sprite) { diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c index 1f8069bdcaa..d967810dd49 100644 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ b/src/gallium/auxiliary/draw/draw_wide_prims.c @@ -219,8 +219,8 @@ static void wide_point( struct draw_stage *stage, half_size = wide->half_point_size; } - left_adj = -half_size + 0.25f; - right_adj = half_size + 0.25f; + left_adj = -half_size; /* + 0.25f;*/ + right_adj = half_size; /* + 0.25f;*/ pos0[0] += left_adj; pos0[1] -= half_size; @@ -266,7 +266,8 @@ static void wide_first_point( struct draw_stage *stage, wide->half_point_size = 0.5f * draw->rasterizer->point_size; - if (draw->rasterizer->point_size != 1.0) { + /* XXX we won't know the real size if it's computed by the vertex shader! */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) { stage->point = wide_point; } else { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2cdf3c75bf3..6a884327e00 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -337,9 +337,6 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); #endif - /* sp_prim_setup can do wide points (don't convert to quads) */ - draw_convert_wide_points(softpipe->draw, FALSE); - sp_init_surface_functions(softpipe); return &softpipe->pipe; -- cgit v1.2.3 From 4da19dbcaa9f3e2d20fffd0145bf0bc756dd7542 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Tue, 26 Feb 2008 19:31:22 -0700 Subject: gallium: remove pipe parameter from pipe_texture_reference() Added pipe field to pipe_texture (temporary, see comments). First step toward context-less texture creation... --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 3 +-- src/gallium/drivers/i915simple/i915_state.c | 3 +-- src/gallium/drivers/i915simple/i915_texture.c | 1 + src/gallium/drivers/i965simple/brw_state.c | 3 +-- src/gallium/drivers/softpipe/sp_state_sampler.c | 2 +- src/gallium/drivers/softpipe/sp_texture.c | 1 + src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- src/gallium/include/pipe/p_inlines.h | 14 +++++++++++++- src/gallium/include/pipe/p_state.h | 5 +++++ src/mesa/state_tracker/st_cb_texture.c | 6 +++--- 10 files changed, 28 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 95bfc29fbeb..075e0a0c471 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -242,8 +242,7 @@ cell_set_sampler_texture(struct pipe_context *pipe, draw_flush(cell->draw); - pipe_texture_reference(pipe, - (struct pipe_texture **) &cell->texture[sampler], + pipe_texture_reference((struct pipe_texture **) &cell->texture[sampler], texture); cell_update_texture_mapping(cell); diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 9df0e125406..27af46bea03 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -532,8 +532,7 @@ static void i915_set_sampler_texture(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); - pipe_texture_reference(pipe, - (struct pipe_texture **) &i915->texture[sampler], + pipe_texture_reference((struct pipe_texture **) &i915->texture[sampler], texture); i915->dirty |= I915_NEW_TEXTURE; diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 1b415a94d40..7fcf4332e14 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -489,6 +489,7 @@ i915_texture_create(struct pipe_context *pipe, tex->base = *templat; tex->base.refcount = 1; + tex->base.pipe = pipe; if (i915->flags.is_i945 ? i945_miptree_layout(pipe, tex) : i915_miptree_layout(pipe, tex)) diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 2fc048bde00..7466fdc403d 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -328,8 +328,7 @@ static void brw_set_sampler_texture(struct pipe_context *pipe, { struct brw_context *brw = brw_context(pipe); - pipe_texture_reference(pipe, - (struct pipe_texture **) &brw->attribs.Texture[unit], + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[unit], texture); brw->state.dirty.brw |= BRW_NEW_TEXTURE; diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 18669a1c6ef..1d6dd17d1d9 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -83,7 +83,7 @@ softpipe_set_sampler_texture(struct pipe_context *pipe, draw_flush(softpipe->draw); assert(unit < PIPE_MAX_SAMPLERS); - pipe_texture_reference(pipe, &softpipe->texture[unit], texture); + pipe_texture_reference(&softpipe->texture[unit], texture); sp_tile_cache_set_texture(pipe, softpipe->tex_cache[unit], texture); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 6ba0f09e0a1..a96447fa7a1 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -90,6 +90,7 @@ softpipe_texture_create(struct pipe_context *pipe, spt->base = *templat; spt->base.refcount = 1; + spt->base.pipe = pipe; softpipe_texture_layout(spt); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index da30dd6c484..0ff93c55277 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -220,7 +220,7 @@ sp_tile_cache_set_texture(struct pipe_context *pipe, assert(!tc->surface); - pipe_texture_reference(pipe, &tc->texture, texture); + pipe_texture_reference(&tc->texture, texture); if (tc->tex_surf_map) { pipe_surface_unmap(tc->tex_surf); diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index de3fa555c5c..21d4827e675 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -97,7 +97,7 @@ pipe_buffer_reference(struct pipe_winsys *winsys, * \sa pipe_surface_reference */ static INLINE void -pipe_texture_reference(struct pipe_context *pipe, struct pipe_texture **ptr, +pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *pt) { assert(ptr); @@ -106,6 +106,7 @@ pipe_texture_reference(struct pipe_context *pipe, struct pipe_texture **ptr, pt->refcount++; if (*ptr) { + struct pipe_context *pipe = (*ptr)->pipe; pipe->texture_release(pipe, ptr); assert(!*ptr); } @@ -114,6 +115,17 @@ pipe_texture_reference(struct pipe_context *pipe, struct pipe_texture **ptr, } +static INLINE void +pipe_texture_release(struct pipe_texture **ptr) +{ + struct pipe_context *pipe; + assert(ptr); + pipe = (*ptr)->pipe; + pipe->texture_release(pipe, ptr); + *ptr = NULL; +} + + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index ddf3c1c79bf..25a6fcc9e63 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -296,6 +296,11 @@ struct pipe_texture /* These are also refcounted: */ unsigned refcount; + + /**< pipe that created the texture + * XXX this'll change to a pipe_winsys (or pipe_screen)... + */ + struct pipe_context *pipe; }; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 778fb536bc7..f5f956f6ead 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -573,7 +573,7 @@ st_TexImage(GLcontext * ctx, st_texture_match_image(stObj->pt, &stImage->base, stImage->face, stImage->level)) { - pipe_texture_reference(ctx->st->pipe, &stImage->pt, stObj->pt); + pipe_texture_reference(&stImage->pt, stObj->pt); assert(stImage->pt); } @@ -1371,7 +1371,7 @@ copy_image_data_to_texture(struct st_context *st, stImage->base.Data = NULL; } - pipe_texture_reference(st->pipe, &stImage->pt, stObj->pt); + pipe_texture_reference(&stImage->pt, stObj->pt); } @@ -1426,7 +1426,7 @@ st_finalize_texture(GLcontext *ctx, if (stObj->pt) ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); - pipe_texture_reference(ctx->st->pipe, &stObj->pt, firstImage->pt); + pipe_texture_reference(&stObj->pt, firstImage->pt); } if (firstImage->base.IsCompressed) { -- cgit v1.2.3 From aa59a937ccf41609081d3f9a4973df5478979785 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Tue, 26 Feb 2008 20:15:14 -0700 Subject: gallium: introduce 'pipe_screen' for context-independent functions This will allow creating textures before a rendering context exists, for example. Only implemented in i915 driver for now. i915pipe->texture_create() just dispatches through to the i915screen->texture_create() to avoid state tracker changes for now. --- src/gallium/drivers/i915simple/Makefile | 1 + src/gallium/drivers/i915simple/i915_context.c | 32 +----- src/gallium/drivers/i915simple/i915_context.h | 7 +- src/gallium/drivers/i915simple/i915_screen.c | 139 +++++++++++++++++++++++ src/gallium/drivers/i915simple/i915_screen.h | 60 ++++++++++ src/gallium/drivers/i915simple/i915_strings.c | 27 ++++- src/gallium/drivers/i915simple/i915_texture.c | 76 +++++++++---- src/gallium/drivers/i915simple/i915_texture.h | 7 +- src/gallium/drivers/i915simple/i915_winsys.h | 8 +- src/gallium/include/pipe/p_context.h | 3 + src/gallium/include/pipe/p_inlines.h | 11 +- src/gallium/include/pipe/p_screen.h | 98 ++++++++++++++++ src/gallium/include/pipe/p_state.h | 2 + src/gallium/winsys/dri/intel/intel_winsys_i915.c | 10 +- 14 files changed, 418 insertions(+), 63 deletions(-) create mode 100644 src/gallium/drivers/i915simple/i915_screen.c create mode 100644 src/gallium/drivers/i915simple/i915_screen.h create mode 100644 src/gallium/include/pipe/p_screen.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index 2a75f5d57ce..3400747a730 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -17,6 +17,7 @@ C_SOURCES = \ i915_state_derived.c \ i915_state_emit.c \ i915_state_sampler.c \ + i915_screen.c \ i915_strings.c \ i915_prim_emit.c \ i915_prim_vbuf.c \ diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index c3955bbd2dd..8478cd76a54 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -234,33 +234,11 @@ static boolean i915_draw_arrays( struct pipe_context *pipe, -struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, - struct i915_winsys *i915_winsys, - unsigned pci_id ) +struct pipe_context *i915_create_context( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct i915_winsys *i915_winsys ) { struct i915_context *i915; - unsigned is_i945 = 0; - - switch (pci_id) { - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - break; - - case PCI_CHIP_I945_G: - case PCI_CHIP_I945_GM: - case PCI_CHIP_I945_GME: - case PCI_CHIP_G33_G: - case PCI_CHIP_Q33_G: - case PCI_CHIP_Q35_G: - is_i945 = 1; - break; - - default: - pipe_winsys->printf(pipe_winsys, - "%s: unknown pci id 0x%x, cannot create context\n", - __FUNCTION__, pci_id); - return NULL; - } i915 = CALLOC_STRUCT(i915_context); if (i915 == NULL) @@ -268,6 +246,7 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, i915->winsys = i915_winsys; i915->pipe.winsys = pipe_winsys; + i915->pipe.screen = screen; i915->pipe.destroy = i915_destroy; i915->pipe.is_format_supported = i915_is_format_supported; @@ -301,9 +280,6 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys, draw_install_aaline_stage(i915->draw, &i915->pipe); draw_install_aapoint_stage(i915->draw, &i915->pipe); - i915->pci_id = pci_id; - i915->flags.is_i945 = is_i945; - i915->dirty = ~0; i915->hardware_dirty = ~0; diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 20cf7d3c3bf..9fb85c122d1 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -245,11 +245,6 @@ struct i915_context unsigned hardware_dirty; unsigned debug; - unsigned pci_id; - - struct { - unsigned is_i945:1; - } flags; }; /* A flag for each state_tracker state object: @@ -322,6 +317,8 @@ void i915_init_surface_functions( struct i915_context *i915 ); void i915_init_state_functions( struct i915_context *i915 ); void i915_init_flush_functions( struct i915_context *i915 ); void i915_init_string_functions( struct i915_context *i915 ); +void i915_init_screen_string_functions(struct pipe_screen *screen); + diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c new file mode 100644 index 00000000000..7e9d971d384 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -0,0 +1,139 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "i915_reg.h" +#include "i915_screen.h" +#include "i915_texture.h" + + +static const char * +i915_get_vendor( struct pipe_screen *pscreen ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +i915_get_name( struct pipe_screen *pscreen ) +{ + static char buffer[128]; + const char *chipset; + + switch (i915_screen(pscreen)->pci_id) { + case PCI_CHIP_I915_G: + chipset = "915G"; + break; + case PCI_CHIP_I915_GM: + chipset = "915GM"; + break; + case PCI_CHIP_I945_G: + chipset = "945G"; + break; + case PCI_CHIP_I945_GM: + chipset = "945GM"; + break; + case PCI_CHIP_I945_GME: + chipset = "945GME"; + break; + case PCI_CHIP_G33_G: + chipset = "G33"; + break; + case PCI_CHIP_Q35_G: + chipset = "Q35"; + break; + case PCI_CHIP_Q33_G: + chipset = "Q33"; + break; + default: + chipset = "unknown"; + break; + } + + sprintf(buffer, "i915 (chipset: %s)", chipset); + return buffer; +} + + + +static void +i915_destroy_screen( struct pipe_screen *screen ) +{ + FREE(screen); +} + + +/** + * Create a new i915_screen object + */ +struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ + struct i915_screen *i915screen = CALLOC_STRUCT(i915_screen); + + if (!i915screen) + return NULL; + + switch (pci_id) { + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + i915screen->is_i945 = FALSE; + break; + + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + case PCI_CHIP_I945_GME: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q33_G: + case PCI_CHIP_Q35_G: + i915screen->is_i945 = TRUE; + break; + + default: + winsys->printf(winsys, + "%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); + return NULL; + } + + i915screen->pci_id = pci_id; + + i915screen->screen.winsys = winsys; + + i915screen->screen.destroy = i915_destroy_screen; + + i915screen->screen.get_name = i915_get_name; + i915screen->screen.get_vendor = i915_get_vendor; + + i915_init_screen_string_functions(&i915screen->screen); + i915_init_screen_texture_functions(&i915screen->screen); + + return &i915screen->screen; +} diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h new file mode 100644 index 00000000000..8394ddbe896 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_SCREEN_H +#define I915_SCREEN_H + + +#include "pipe/p_screen.h" + + +/** + * Subclass of pipe_screen + */ +struct i915_screen +{ + struct pipe_screen screen; + + boolean is_i945; + uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct i915_screen * +i915_screen(struct pipe_screen *pscreen) +{ + return (struct i915_screen *) pscreen; +} + + +extern struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c index 301fedea197..ee62bb2e5de 100644 --- a/src/gallium/drivers/i915simple/i915_strings.c +++ b/src/gallium/drivers/i915simple/i915_strings.c @@ -26,21 +26,31 @@ **************************************************************************/ #include "i915_context.h" +#include "i915_screen.h" #include "i915_reg.h" +/** XXX temporary screen/pipe duplication here */ + + +static const char *i915_get_vendor_screen( struct pipe_screen *screen ) +{ + return "Tungsten Graphics, Inc."; +} + static const char *i915_get_vendor( struct pipe_context *pipe ) { return "Tungsten Graphics, Inc."; } -static const char *i915_get_name( struct pipe_context *pipe ) +static const char *i915_get_name_screen( struct pipe_screen *screen ) { + struct i915_screen *i915screen = i915_screen(screen); static char buffer[128]; const char *chipset; - switch (i915_context(pipe)->pci_id) { + switch (i915screen->pci_id) { case PCI_CHIP_I915_G: chipset = "915G"; break; @@ -75,9 +85,22 @@ static const char *i915_get_name( struct pipe_context *pipe ) } +static const char *i915_get_name( struct pipe_context *pipe ) +{ + return pipe->screen->get_name(pipe->screen); +} + + void i915_init_string_functions(struct i915_context *i915) { i915->pipe.get_name = i915_get_name; i915->pipe.get_vendor = i915_get_vendor; } + +void +i915_init_screen_string_functions(struct pipe_screen *screen) +{ + screen->get_name = i915_get_name_screen; + screen->get_vendor = i915_get_vendor_screen; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 7fcf4332e14..3c9509dee34 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -40,6 +40,7 @@ #include "i915_context.h" #include "i915_texture.h" #include "i915_debug.h" +#include "i915_screen.h" static unsigned minify( unsigned d ) @@ -187,7 +188,7 @@ static const int step_offsets[6][2] = { static boolean -i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) +i915_miptree_layout(struct i915_texture * tex) { struct pipe_texture *pt = &tex->base; unsigned level; @@ -311,7 +312,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) static boolean -i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) +i945_miptree_layout(struct i915_texture * tex) { struct pipe_texture *pt = &tex->base; unsigned level; @@ -479,24 +480,26 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) static struct pipe_texture * -i915_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) +i915_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) { struct i915_texture *tex = CALLOC_STRUCT(i915_texture); if (tex) { - struct i915_context *i915 = i915_context(pipe); + struct i915_screen *i915screen = i915_screen(screen); + struct pipe_winsys *ws = screen->winsys; tex->base = *templat; tex->base.refcount = 1; - tex->base.pipe = pipe; + tex->base.pipe = NULL; + tex->base.screen = screen; - if (i915->flags.is_i945 ? i945_miptree_layout(pipe, tex) : - i915_miptree_layout(pipe, tex)) - tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + if (i915screen->is_i945 ? i945_miptree_layout(tex) : + i915_miptree_layout(tex)) + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); if (!tex->buffer) { FREE(tex); @@ -508,8 +511,17 @@ i915_texture_create(struct pipe_context *pipe, } +static struct pipe_texture * +i915_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) +{ + return pipe->screen->texture_create(pipe->screen, templat); +} + + static void -i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +i915_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -526,7 +538,7 @@ i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(pipe->winsys, &tex->buffer, NULL); + pipe_buffer_reference(screen->winsys, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -538,6 +550,13 @@ i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } +static void +i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + i915_texture_release_screen(pipe->screen, pt); +} + + static void i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) { @@ -549,11 +568,12 @@ i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) * XXX note: same as code in sp_surface.c */ static struct pipe_surface * -i915_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) +i915_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) { struct i915_texture *tex = (struct i915_texture *)pt; + struct pipe_winsys *ws = screen->winsys; struct pipe_surface *ps; unsigned offset; /* in bytes */ @@ -570,11 +590,11 @@ i915_get_tex_surface(struct pipe_context *pipe, assert(zslice == 0); } - ps = pipe->winsys->surface_alloc(pipe->winsys); + ps = ws->surface_alloc(ws); if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); + pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; ps->cpp = pt->cpp; ps->width = pt->width[level]; @@ -586,6 +606,14 @@ i915_get_tex_surface(struct pipe_context *pipe, } +static struct pipe_surface * +i915_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + return i915_get_tex_surface_screen(pipe->screen, pt, face, level, zslice); +} + void i915_init_texture_functions(struct i915_context *i915) @@ -595,3 +623,13 @@ i915_init_texture_functions(struct i915_context *i915) i915->pipe.texture_update = i915_texture_update; i915->pipe.get_tex_surface = i915_get_tex_surface; } + + + +void +i915_init_screen_texture_functions(struct pipe_screen *screen) +{ + screen->texture_create = i915_texture_create_screen; + screen->texture_release = i915_texture_release_screen; + screen->get_tex_surface = i915_get_tex_surface_screen; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h index 6d8d41178f7..7225016a9f4 100644 --- a/src/gallium/drivers/i915simple/i915_texture.h +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -28,11 +28,16 @@ #ifndef I915_TEXTURE_H #define I915_TEXTURE_H -struct pipe_context; +struct i915_context; +struct pipe_screen; extern void i915_init_texture_functions(struct i915_context *i915); +extern void +i915_init_screen_texture_functions(struct pipe_screen *screen); + + #endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index fe49710852b..e6b0ac9c52e 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -52,6 +52,7 @@ struct pipe_buffer; struct pipe_winsys; +struct pipe_screen; /** @@ -107,9 +108,8 @@ struct i915_winsys { #define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) -struct pipe_context *i915_create( struct pipe_winsys *, - struct i915_winsys *, - unsigned pci_id ); - +struct pipe_context *i915_create_context( struct pipe_screen *, + struct pipe_winsys *, + struct i915_winsys * ); #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index f69b52f5e3f..93fcb1c3e96 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -36,6 +36,8 @@ extern "C" { #endif +struct pipe_screen; + struct pipe_state_cache; /* Opaque driver handles: @@ -51,6 +53,7 @@ struct pipe_query; */ struct pipe_context { struct pipe_winsys *winsys; + struct pipe_screen *screen; void *priv; /** context private data (for DRI for example) */ diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 21d4827e675..a7e97fcd7db 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -30,6 +30,7 @@ #include "p_context.h" #include "p_defines.h" +#include "p_screen.h" #include "p_winsys.h" @@ -107,7 +108,15 @@ pipe_texture_reference(struct pipe_texture **ptr, if (*ptr) { struct pipe_context *pipe = (*ptr)->pipe; - pipe->texture_release(pipe, ptr); + /* XXX temporary mess here */ + if (pipe) { + pipe->texture_release(pipe, ptr); + } + else { + struct pipe_screen *screen = (*ptr)->screen; + screen->texture_release(screen, ptr); + } + assert(!*ptr); } diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h new file mode 100644 index 00000000000..6be9a82b68d --- /dev/null +++ b/src/gallium/include/pipe/p_screen.h @@ -0,0 +1,98 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Screen, Adapter or GPU + * + * These are driver functions/facilities that are context independent. + */ + + +#ifndef P_SCREEN_H +#define P_SCREEN_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + + + +#ifdef __cplusplus +extern "C" { +#endif + + + +/** + * Gallium screen/adapter context. Basically everything + * hardware-specific that doesn't actually require a rendering + * context. + */ +struct pipe_screen { + struct pipe_winsys *winsys; + + void (*destroy)( struct pipe_screen * ); + + + /* + * Capability queries + */ + const char *(*get_name)( struct pipe_screen * ); + + const char *(*get_vendor)( struct pipe_screen * ); + + int (*get_param)( struct pipe_screen *, int param ); + + float (*get_paramf)( struct pipe_screen *, int param ); + + boolean (*is_format_supported)( struct pipe_screen *, + enum pipe_format format, + uint type ); + + + /* + * Texture functions + */ + struct pipe_texture * (*texture_create)(struct pipe_screen *, + const struct pipe_texture *templat); + + void (*texture_release)(struct pipe_screen *, + struct pipe_texture **pt); + + /** Get a surface which is a "view" into a texture */ + struct pipe_surface *(*get_tex_surface)(struct pipe_screen *, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice); +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* P_SCREEN_H */ diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 25a6fcc9e63..bb4a6cb23ed 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -63,6 +63,7 @@ extern "C" { /* fwd decls */ +struct pipe_screen; struct pipe_surface; struct pipe_winsys; @@ -301,6 +302,7 @@ struct pipe_texture * XXX this'll change to a pipe_winsys (or pipe_screen)... */ struct pipe_context *pipe; + struct pipe_screen *screen; }; diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index 0ed3890e936..2def1afc31e 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -40,6 +40,7 @@ #include "pipe/p_util.h" #include "i915simple/i915_winsys.h" +#include "i915simple/i915_screen.h" struct intel_i915_winsys { @@ -135,6 +136,7 @@ intel_create_i915simple( struct intel_context *intel, struct pipe_winsys *winsys ) { struct intel_i915_winsys *iws = CALLOC_STRUCT( intel_i915_winsys ); + struct pipe_screen *screen; /* Fill in this struct with callbacks that i915simple will need to * communicate with the window system, buffer manager, etc. @@ -146,9 +148,11 @@ intel_create_i915simple( struct intel_context *intel, iws->winsys.batch_finish = intel_i915_batch_finish; iws->intel = intel; + screen = i915_create_screen(winsys, intel->intelScreen->deviceID); + /* Create the i915simple context: */ - return i915_create( winsys, - &iws->winsys, - intel->intelScreen->deviceID ); + return i915_create_context( screen, + winsys, + &iws->winsys ); } -- cgit v1.2.3 From 94047122571ac78717874bfb42598ab210cd7f80 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 26 Feb 2008 20:12:29 -0700 Subject: cell: insert a (disabled) call to spe_cpu_info_get() Found on the Cell devel forum, but doesn't appear to be available in SDK 2.1. --- src/gallium/drivers/cell/ppu/cell_context.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 98c314f45c4..5cbc922cb90 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -228,6 +228,9 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) * SPU stuff */ cell->num_spus = 6; + /* XXX is this in SDK 3.0 only? + cell->num_spus = spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1); + */ cell_start_spus(cell); -- cgit v1.2.3 From f81b7a6285455e838adb061dcca90036c9f99522 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Wed, 27 Feb 2008 15:59:09 +0900 Subject: gallium: update for new i915_screen.c file; fix some warnings. --- src/gallium/drivers/i915simple/SConscript | 1 + src/gallium/drivers/i915simple/i915_screen.c | 1 + src/gallium/drivers/i915simple/i915_screen.h | 9 +++++++++ src/gallium/drivers/i915simple/i915_winsys.h | 9 +++++++++ 4 files changed, 20 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript index f5fb96b995c..3e1beaea6d8 100644 --- a/src/gallium/drivers/i915simple/SConscript +++ b/src/gallium/drivers/i915simple/SConscript @@ -15,6 +15,7 @@ i915simple = env.ConvenienceLibrary( 'i915_fpc_translate.c', 'i915_prim_emit.c', 'i915_prim_vbuf.c', + 'i915_screen.c', 'i915_state.c', 'i915_state_derived.c', 'i915_state_dynamic.c', diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 7e9d971d384..5630440a5ac 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -30,6 +30,7 @@ #include "pipe/p_winsys.h" #include "i915_reg.h" +#include "i915_context.h" #include "i915_screen.h" #include "i915_texture.h" diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h index 8394ddbe896..73b0ff05ce7 100644 --- a/src/gallium/drivers/i915simple/i915_screen.h +++ b/src/gallium/drivers/i915simple/i915_screen.h @@ -33,6 +33,11 @@ #include "pipe/p_screen.h" +#ifdef __cplusplus +extern "C" { +#endif + + /** * Subclass of pipe_screen */ @@ -57,4 +62,8 @@ extern struct pipe_screen * i915_create_screen(struct pipe_winsys *winsys, uint pci_id); +#ifdef __cplusplus +} +#endif + #endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index e6b0ac9c52e..aea30032818 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -40,6 +40,11 @@ #include "pipe/p_defines.h" +#ifdef __cplusplus +extern "C" { +#endif + + /* Pipe drivers are (meant to be!) independent of both GL and the * window system. The window system provides a buffer manager and a * set of additional hooks for things like command buffer submission, @@ -112,4 +117,8 @@ struct pipe_context *i915_create_context( struct pipe_screen *, struct pipe_winsys *, struct i915_winsys * ); +#ifdef __cplusplus +} +#endif + #endif -- cgit v1.2.3 From e8c0162fa0c4720aea612a617cbd02b83590763c Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 08:58:12 -0700 Subject: gallium: softpipe_init_texture_funcs(), make texture func static --- src/gallium/drivers/softpipe/sp_context.c | 7 +------ src/gallium/drivers/softpipe/sp_texture.c | 19 +++++++++++++++---- src/gallium/drivers/softpipe/sp_texture.h | 16 +--------------- 3 files changed, 17 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 6a884327e00..78acf51433a 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -279,12 +279,7 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, softpipe->pipe.flush = softpipe_flush; softpipe_init_query_funcs( softpipe ); - - /* textures */ - softpipe->pipe.texture_create = softpipe_texture_create; - softpipe->pipe.texture_release = softpipe_texture_release; - softpipe->pipe.texture_update = softpipe_texture_update; - softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; + softpipe_init_texture_funcs( softpipe ); /* * Alloc caches for accessing drawing surfaces and textures. diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index a96447fa7a1..15945219612 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -80,7 +80,7 @@ softpipe_texture_layout(struct softpipe_texture * spt) } -struct pipe_texture * +static struct pipe_texture * softpipe_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) { @@ -108,7 +108,7 @@ softpipe_texture_create(struct pipe_context *pipe, } -void +static void softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) { if (!*pt) @@ -133,7 +133,7 @@ softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) } -void +static void softpipe_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) { @@ -150,7 +150,7 @@ softpipe_texture_update(struct pipe_context *pipe, /** * Called via pipe->get_tex_surface() */ -struct pipe_surface * +static struct pipe_surface * softpipe_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) @@ -183,3 +183,14 @@ softpipe_get_tex_surface(struct pipe_context *pipe, } return ps; } + + + +void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ) +{ + softpipe->pipe.texture_create = softpipe_texture_create; + softpipe->pipe.texture_release = softpipe_texture_release; + softpipe->pipe.texture_update = softpipe_texture_update; + softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 50fc1004274..01b1e28ec5e 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -54,22 +54,8 @@ softpipe_texture(struct pipe_texture *pt) } - -extern struct pipe_texture * -softpipe_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat); - extern void -softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); - -extern void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture); - -extern struct pipe_surface * -softpipe_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice); +softpipe_init_texture_funcs( struct softpipe_context *softpipe ); #endif /* SP_TEXTURE */ -- cgit v1.2.3 From ef6c82b0c13573df1aab7acd6f4f9ef9076f421f Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 09:46:10 -0700 Subject: gallium/i915: remove some redundant code --- src/gallium/drivers/i915simple/i915_context.h | 1 - src/gallium/drivers/i915simple/i915_screen.c | 1 - src/gallium/drivers/i915simple/i915_strings.c | 55 +-------------------------- 3 files changed, 1 insertion(+), 56 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 9fb85c122d1..6401112f835 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -317,7 +317,6 @@ void i915_init_surface_functions( struct i915_context *i915 ); void i915_init_state_functions( struct i915_context *i915 ); void i915_init_flush_functions( struct i915_context *i915 ); void i915_init_string_functions( struct i915_context *i915 ); -void i915_init_screen_string_functions(struct pipe_screen *screen); diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 5630440a5ac..f44ff43c994 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -133,7 +133,6 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id) i915screen->screen.get_name = i915_get_name; i915screen->screen.get_vendor = i915_get_vendor; - i915_init_screen_string_functions(&i915screen->screen); i915_init_screen_texture_functions(&i915screen->screen); return &i915screen->screen; diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c index ee62bb2e5de..14da3bcd726 100644 --- a/src/gallium/drivers/i915simple/i915_strings.c +++ b/src/gallium/drivers/i915simple/i915_strings.c @@ -33,55 +33,9 @@ /** XXX temporary screen/pipe duplication here */ -static const char *i915_get_vendor_screen( struct pipe_screen *screen ) -{ - return "Tungsten Graphics, Inc."; -} - static const char *i915_get_vendor( struct pipe_context *pipe ) { - return "Tungsten Graphics, Inc."; -} - - -static const char *i915_get_name_screen( struct pipe_screen *screen ) -{ - struct i915_screen *i915screen = i915_screen(screen); - static char buffer[128]; - const char *chipset; - - switch (i915screen->pci_id) { - case PCI_CHIP_I915_G: - chipset = "915G"; - break; - case PCI_CHIP_I915_GM: - chipset = "915GM"; - break; - case PCI_CHIP_I945_G: - chipset = "945G"; - break; - case PCI_CHIP_I945_GM: - chipset = "945GM"; - break; - case PCI_CHIP_I945_GME: - chipset = "945GME"; - break; - case PCI_CHIP_G33_G: - chipset = "G33"; - break; - case PCI_CHIP_Q35_G: - chipset = "Q35"; - break; - case PCI_CHIP_Q33_G: - chipset = "Q33"; - break; - default: - chipset = "unknown"; - break; - } - - sprintf(buffer, "i915 (chipset: %s)", chipset); - return buffer; + return pipe->screen->get_vendor(pipe->screen); } @@ -97,10 +51,3 @@ i915_init_string_functions(struct i915_context *i915) i915->pipe.get_name = i915_get_name; i915->pipe.get_vendor = i915_get_vendor; } - -void -i915_init_screen_string_functions(struct pipe_screen *screen) -{ - screen->get_name = i915_get_name_screen; - screen->get_vendor = i915_get_vendor_screen; -} -- cgit v1.2.3 From 4f36cf5858a7e53181c3578685675e15fbfcbb82 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 09:47:46 -0700 Subject: gallium: implement pipe_screen for softpipe driver --- src/gallium/drivers/softpipe/Makefile | 1 + src/gallium/drivers/softpipe/sp_context.c | 64 ++-------- src/gallium/drivers/softpipe/sp_screen.c | 141 +++++++++++++++++++++ src/gallium/drivers/softpipe/sp_texture.c | 57 +++++++-- src/gallium/drivers/softpipe/sp_texture.h | 10 +- src/gallium/drivers/softpipe/sp_winsys.h | 7 +- .../winsys/dri/intel/intel_winsys_softpipe.c | 3 +- src/gallium/winsys/xlib/xm_winsys.c | 3 +- 8 files changed, 220 insertions(+), 66 deletions(-) create mode 100644 src/gallium/drivers/softpipe/sp_screen.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 539ffb77f55..f32db35d587 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ sp_quad_output.c \ sp_quad_stencil.c \ sp_quad_stipple.c \ + sp_screen.c \ sp_state_blend.c \ sp_state_clip.c \ sp_state_derived.c \ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 78acf51433a..4ac1719cbbe 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -143,76 +143,31 @@ static void softpipe_destroy( struct pipe_context *pipe ) } +/* XXX these will go away shortly */ static const char *softpipe_get_name( struct pipe_context *pipe ) { - return "softpipe"; + return pipe->screen->get_name(pipe->screen); } static const char *softpipe_get_vendor( struct pipe_context *pipe ) { - return "Tungsten Graphics, Inc."; + return pipe->screen->get_vendor(pipe->screen); } static int softpipe_get_param(struct pipe_context *pipe, int param) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 1; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; /* max 2Kx2K */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; /* max 2Kx2K */ - default: - return 0; - } + return pipe->screen->get_param(pipe->screen, param); } static float softpipe_get_paramf(struct pipe_context *pipe, int param) { - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 0.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; /* arbitrary */ - - default: - return 0; - } + return pipe->screen->get_paramf(pipe->screen, param); } -struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, - struct softpipe_winsys *softpipe_winsys ) +struct pipe_context * +softpipe_create( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct softpipe_winsys *softpipe_winsys ) { struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); uint i; @@ -226,6 +181,7 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.screen = screen; softpipe->pipe.destroy = softpipe_destroy; /* queries */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c new file mode 100644 index 00000000000..cc3b9625805 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -0,0 +1,141 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "sp_texture.h" +#include "sp_winsys.h" + + +static const char * +softpipe_get_vendor(struct pipe_screen *screen) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +softpipe_get_name(struct pipe_screen *screen) +{ + return "softpipe"; +} + + +static int +softpipe_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + + +static float +softpipe_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + default: + return 0; + } +} + + +static void +softpipe_destroy_screen( struct pipe_screen *screen ) +{ + FREE(screen); +} + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no softpipe_screen). + */ +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *winsys) +{ + struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + + if (!screen) + return NULL; + + screen->winsys = winsys; + + screen->destroy = softpipe_destroy_screen; + + screen->get_name = softpipe_get_name; + screen->get_vendor = softpipe_get_vendor; + screen->get_param = softpipe_get_param; + screen->get_paramf = softpipe_get_paramf; + + softpipe_init_screen_texture_funcs(screen); + + return screen; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 15945219612..f0e8350a4a1 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -80,23 +80,34 @@ softpipe_texture_layout(struct softpipe_texture * spt) } +/* XXX temporary */ static struct pipe_texture * softpipe_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) { + return pipe->screen->texture_create(pipe->screen, templat); +} + + +static struct pipe_texture * +softpipe_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); if (!spt) return NULL; spt->base = *templat; spt->base.refcount = 1; - spt->base.pipe = pipe; + spt->base.pipe = NULL; + spt->base.screen = screen; softpipe_texture_layout(spt); - spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); if (!spt->buffer) { FREE(spt); return NULL; @@ -108,8 +119,17 @@ softpipe_texture_create(struct pipe_context *pipe, } +/* XXX temporary */ static void softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + return pipe->screen->texture_release(pipe->screen, pt); +} + + +static void +softpipe_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -125,7 +145,7 @@ softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); */ - pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); FREE(spt); } @@ -150,21 +170,32 @@ softpipe_texture_update(struct pipe_context *pipe, /** * Called via pipe->get_tex_surface() */ +/* XXX temporary */ static struct pipe_surface * softpipe_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) { + return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); +} + + +static struct pipe_surface * +softpipe_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = softpipe_texture(pt); struct pipe_surface *ps; assert(level <= pt->last_level); - ps = pipe->winsys->surface_alloc(pipe->winsys); + ps = ws->surface_alloc(ws); if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + pipe_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; ps->cpp = pt->cpp; ps->width = pt->width[level]; @@ -176,7 +207,8 @@ softpipe_get_tex_surface(struct pipe_context *pipe, ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * (pt->compressed ? ps->height/4 : ps->height) * ps->width * ps->cpp; - } else { + } + else { assert(face == 0); assert(zslice == 0); } @@ -194,3 +226,12 @@ softpipe_init_texture_funcs( struct softpipe_context *softpipe ) softpipe->pipe.texture_update = softpipe_texture_update; softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; } + + +void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = softpipe_texture_create_screen; + screen->texture_release = softpipe_texture_release_screen; + screen->get_tex_surface = softpipe_get_tex_surface_screen; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 01b1e28ec5e..a7322144e6f 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -29,8 +29,12 @@ #define SP_TEXTURE_H +#include "pipe/p_state.h" + + struct pipe_context; -struct pipe_texture; +struct pipe_screen; +struct softpipe_context; struct softpipe_texture @@ -58,4 +62,8 @@ extern void softpipe_init_texture_funcs( struct softpipe_context *softpipe ); +extern void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen); + + #endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index d6b379f58c6..80f53548081 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -50,8 +50,13 @@ struct pipe_winsys; struct pipe_context; -struct pipe_context *softpipe_create( struct pipe_winsys *, +struct pipe_context *softpipe_create( struct pipe_screen *, + struct pipe_winsys *, struct softpipe_winsys * ); +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *); + + #endif /* SP_WINSYS_H */ diff --git a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c index 9e483bdc9f5..0bc2dc40025 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c @@ -68,6 +68,7 @@ intel_create_softpipe( struct intel_context *intel, struct pipe_winsys *winsys ) { struct intel_softpipe_winsys *isws = CALLOC_STRUCT( intel_softpipe_winsys ); + struct pipe_screen *screen = softpipe_create_screen(winsys); /* Fill in this struct with callbacks that softpipe will need to * communicate with the window system, buffer manager, etc. @@ -77,5 +78,5 @@ intel_create_softpipe( struct intel_context *intel, /* Create the softpipe context: */ - return softpipe_create( winsys, &isws->sws ); + return softpipe_create( screen, winsys, &isws->sws ); } diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 8da596d419e..adebe4020a1 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -457,7 +457,8 @@ xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat) #endif { struct softpipe_winsys *spws = xmesa_get_softpipe_winsys(pixelformat); - pipe = softpipe_create( pws, spws ); + struct pipe_screen *screen = softpipe_create_screen(pws); + pipe = softpipe_create( screen, pws, spws ); if (pipe) pipe->priv = xmesa; -- cgit v1.2.3 From f04736c8be5d30c510e1799ac0c8fa5173516513 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 27 Feb 2008 09:55:17 -0700 Subject: Cell: implement pipe_screen for cell driver --- src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_context.c | 68 +++---------- src/gallium/drivers/cell/ppu/cell_context.h | 2 +- src/gallium/drivers/cell/ppu/cell_screen.c | 147 ++++++++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_screen.h | 41 ++++++++ src/gallium/drivers/cell/ppu/cell_texture.c | 49 ++++++++-- src/gallium/drivers/cell/ppu/cell_texture.h | 4 + src/gallium/winsys/xlib/xm_winsys.c | 6 +- 8 files changed, 254 insertions(+), 64 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_screen.c create mode 100644 src/gallium/drivers/cell/ppu/cell_screen.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 164dde762c0..d38fa6ce073 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -29,6 +29,7 @@ SOURCES = \ cell_state_emit.c \ cell_state_shader.c \ cell_pipe_state.c \ + cell_screen.c \ cell_state_vertex.c \ cell_spu.c \ cell_surface.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 5cbc922cb90..bb1838409fc 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -37,9 +37,12 @@ #include "pipe/p_format.h" #include "pipe/p_util.h" #include "pipe/p_winsys.h" -#include "cell/common.h" +#include "pipe/p_screen.h" + #include "draw/draw_context.h" #include "draw/draw_private.h" + +#include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -76,73 +79,24 @@ cell_is_format_supported( struct pipe_context *pipe, static int cell_get_param(struct pipe_context *pipe, int param) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 1; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; /* max 2Kx2K */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; /* max 2Kx2K */ - default: - return 0; - } + return pipe->screen->get_param(pipe->screen, param); } static float cell_get_paramf(struct pipe_context *pipe, int param) { - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 0.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; /* arbitrary */ - - default: - return 0; - } + return pipe->screen->get_paramf(pipe->screen, param); } - static const char * cell_get_name( struct pipe_context *pipe ) { - return "Cell"; + return pipe->screen->get_name(pipe->screen); } static const char * cell_get_vendor( struct pipe_context *pipe ) { - return "Tungsten Graphics, Inc."; + return pipe->screen->get_vendor(pipe->screen); } @@ -174,7 +128,8 @@ cell_draw_create(struct cell_context *cell) struct pipe_context * -cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) +cell_create_context(struct pipe_screen *screen, + struct cell_winsys *cws) { struct cell_context *cell; uint spu, buf; @@ -187,7 +142,8 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) memset(cell, 0, sizeof(*cell)); cell->winsys = cws; - cell->pipe.winsys = winsys; + cell->pipe.winsys = screen->winsys; + cell->pipe.screen = screen; cell->pipe.destroy = cell_destroy_context; /* queries */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 1433a4925fa..bf27289f3f5 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -128,7 +128,7 @@ cell_context(struct pipe_context *pipe) extern struct pipe_context * -cell_create_context(struct pipe_winsys *ws, struct cell_winsys *cws); +cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws); extern void cell_vertex_shader_queue_flush(struct draw_context *draw); diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c new file mode 100644 index 00000000000..75255c0466a --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -0,0 +1,147 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "cell_screen.h" +#include "cell_texture.h" +#include "cell_winsys.h" + + +static const char * +cell_get_vendor(struct pipe_screen *screen) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +cell_get_name(struct pipe_screen *screen) +{ + return "Cell"; +} + + +static int +cell_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + + +static float +cell_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 0; + } +} + + +static void +cell_destroy_screen( struct pipe_screen *screen ) +{ + FREE(screen); +} + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no cell_screen) but + * that would be the place to put SPU thread/context info... + */ +struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys) +{ + struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + + if (!screen) + return NULL; + + screen->winsys = winsys; + + screen->destroy = cell_destroy_screen; + + screen->get_name = cell_get_name; + screen->get_vendor = cell_get_vendor; + screen->get_param = cell_get_param; + screen->get_paramf = cell_get_paramf; + + cell_init_screen_texture_funcs(screen); + + return screen; +} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h new file mode 100644 index 00000000000..c7e15889d66 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_SCREEN_H +#define CELL_SCREEN_H + + +struct pipe_screen; +struct pipe_winsys; + + +extern struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys); + + +#endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 0edefa5f056..e6398a85fa5 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -83,18 +83,29 @@ static struct pipe_texture * cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) { + return pipe->screen->texture_create(pipe->screen, templat); + +} + +static struct pipe_texture * +cell_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = CALLOC_STRUCT(cell_texture); if (!spt) return NULL; spt->base = *templat; spt->base.refcount = 1; + spt->base.pipe = NULL; + spt->base.screen = screen; cell_texture_layout(spt); - spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); if (!spt->buffer) { FREE(spt); @@ -107,6 +118,14 @@ cell_texture_create(struct pipe_context *pipe, static void cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + return pipe->screen->texture_release(pipe->screen, pt); +} + + +static void +cell_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -122,7 +141,7 @@ cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); */ - pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); FREE(spt); } @@ -146,14 +165,24 @@ cell_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) { + return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); +} + + +static struct pipe_surface * +cell_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = cell_texture(pt); struct pipe_surface *ps; - ps = pipe->winsys->surface_alloc(pipe->winsys); + ps = ws->surface_alloc(ws); if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + pipe_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; ps->cpp = pt->cpp; ps->width = pt->width[level]; @@ -270,3 +299,11 @@ cell_init_texture_functions(struct cell_context *cell) cell->pipe.texture_update = cell_texture_update; cell->pipe.get_tex_surface = cell_get_tex_surface; } + +void +cell_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = cell_texture_create_screen; + screen->texture_release = cell_texture_release_screen; + screen->get_tex_surface = cell_get_tex_surface_screen; +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 824fb3e20fc..fcee069d055 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -68,4 +68,8 @@ extern void cell_init_texture_functions(struct cell_context *cell); +extern void +cell_init_screen_texture_funcs(struct pipe_screen *screen); + + #endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index adebe4020a1..018cf01cd6d 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -36,6 +36,8 @@ #include "glxheader.h" #include "xmesaP.h" +#undef ASSERT + #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" @@ -45,6 +47,7 @@ #ifdef GALLIUM_CELL #include "cell/ppu/cell_context.h" +#include "cell/ppu/cell_screen.h" #include "cell/ppu/cell_winsys.h" #else #define TILE_SIZE 32 /* avoid compilation errors */ @@ -448,7 +451,8 @@ xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat) #ifdef GALLIUM_CELL if (!getenv("GALLIUM_NOCELL")) { struct cell_winsys *cws = cell_get_winsys(pixelformat); - pipe = cell_create_context(pws, cws); + struct pipe_screen *screen = cell_create_screen(pws); + pipe = cell_create_context(screen, cws); if (pipe) pipe->priv = xmesa; return pipe; -- cgit v1.2.3 From 17188e4d5a3707c134fc97976863f0d8e2f1f5ab Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 27 Feb 2008 10:15:37 -0700 Subject: gallium/i915: hook up screen->get_param() --- src/gallium/drivers/i915simple/i915_context.c | 55 +++-------------------- src/gallium/drivers/i915simple/i915_screen.c | 63 +++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 50 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 8478cd76a54..e89db8a1303 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -36,6 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" +#include "pipe/p_screen.h" /** @@ -90,65 +91,19 @@ i915_is_format_supported( struct pipe_context *pipe, } +/* XXX temporary */ static int i915_get_param(struct pipe_context *pipe, int param) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ - default: - return 0; - } + return pipe->screen->get_param(pipe->screen, param); } +/* XXX temporary */ static float i915_get_paramf(struct pipe_context *pipe, int param) { - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 7.5; - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 4.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - - default: - return 0; - } + return pipe->screen->get_paramf(pipe->screen, param); } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index f44ff43c994..b9b4d6b6fac 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -83,6 +83,67 @@ i915_get_name( struct pipe_screen *pscreen ) } +static int +i915_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +i915_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + static void i915_destroy_screen( struct pipe_screen *screen ) @@ -132,6 +193,8 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id) i915screen->screen.get_name = i915_get_name; i915screen->screen.get_vendor = i915_get_vendor; + i915screen->screen.get_param = i915_get_param; + i915screen->screen.get_paramf = i915_get_paramf; i915_init_screen_texture_functions(&i915screen->screen); -- cgit v1.2.3 From 03b5267f52d440b1b357918ed7de2ca948f314e1 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 27 Feb 2008 10:17:57 -0700 Subject: gallium/i965: implement pipe_screen for i965 driver (untested) --- src/gallium/drivers/i965simple/Makefile | 1 + src/gallium/drivers/i965simple/brw_context.c | 70 ++-------- src/gallium/drivers/i965simple/brw_screen.c | 168 ++++++++++++++++++++++++ src/gallium/drivers/i965simple/brw_screen.h | 68 ++++++++++ src/gallium/drivers/i965simple/brw_strings.c | 29 +--- src/gallium/drivers/i965simple/brw_tex_layout.c | 58 ++++++-- src/gallium/drivers/i965simple/brw_tex_layout.h | 32 +++++ src/gallium/drivers/i965simple/brw_winsys.h | 4 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 4 +- 9 files changed, 338 insertions(+), 96 deletions(-) create mode 100644 src/gallium/drivers/i965simple/brw_screen.c create mode 100644 src/gallium/drivers/i965simple/brw_screen.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index cc8580836ce..7b0df0f8508 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -6,6 +6,7 @@ LIBNAME = i965simple C_SOURCES = \ brw_blit.c \ brw_flush.c \ + brw_screen.c \ brw_strings.c \ brw_surface.c \ brw_cc.c \ diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 6fb840708e5..d90b32d4b40 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -40,15 +40,14 @@ #include "pipe/p_winsys.h" #include "pipe/p_context.h" #include "pipe/p_util.h" +#include "pipe/p_screen.h" -/*************************************** - * Mesa's Driver Functions - ***************************************/ #ifndef BRW_DEBUG int BRW_DEBUG = (0); #endif + static void brw_destroy(struct pipe_context *pipe) { struct brw_context *brw = brw_context(pipe); @@ -56,6 +55,7 @@ static void brw_destroy(struct pipe_context *pipe) FREE(brw); } + static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { @@ -74,64 +74,17 @@ static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, static int brw_get_param(struct pipe_context *pipe, int param) { - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ - default: - return 0; - } + return pipe->screen->get_param(pipe->screen, param); } static float brw_get_paramf(struct pipe_context *pipe, int param) { - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 7.5; - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 4.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - - default: - return 0; - } + return pipe->screen->get_paramf(pipe->screen, param); } + static boolean brw_is_format_supported( struct pipe_context *pipe, enum pipe_format format, uint type ) @@ -200,22 +153,23 @@ brw_is_format_supported( struct pipe_context *pipe, -struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys, +struct pipe_context *brw_create(struct pipe_screen *screen, struct brw_winsys *brw_winsys, unsigned pci_id) { struct brw_context *brw; - pipe_winsys->printf(pipe_winsys, - "%s: creating brw_context with pci id 0x%x\n", - __FUNCTION__, pci_id); + screen->winsys->printf(screen->winsys, + "%s: creating brw_context with pci id 0x%x\n", + __FUNCTION__, pci_id); brw = CALLOC_STRUCT(brw_context); if (brw == NULL) return NULL; brw->winsys = brw_winsys; - brw->pipe.winsys = pipe_winsys; + brw->pipe.winsys = screen->winsys; + brw->pipe.screen = screen; brw->pipe.destroy = brw_destroy; brw->pipe.is_format_supported = brw_is_format_supported; diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c new file mode 100644 index 00000000000..7afe070bd8f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -0,0 +1,168 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_tex_layout.h" + + +static const char * +brw_get_vendor( struct pipe_screen *screen ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +brw_get_name( struct pipe_screen *screen ) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_screen(screen)->pci_id) { + case PCI_CHIP_I965_Q: + chipset = "Intel(R) 965Q"; + break; + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_G_1: + chipset = "Intel(R) 965G"; + break; + case PCI_CHIP_I965_GM: + chipset = "Intel(R) 965GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "Intel(R) 965GME/GLE"; + break; + default: + chipset = "unknown"; + break; + } + + sprintf(buffer, "i965 (chipset: %s)", chipset); + return buffer; +} + + +static int +brw_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +brw_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static void +brw_destroy_screen( struct pipe_screen *screen ) +{ + FREE(screen); +} + + +/** + * Create a new brw_screen object + */ +struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ + struct brw_screen *brwscreen = CALLOC_STRUCT(brw_screen); + + if (!brwscreen) + return NULL; + + brwscreen->pci_id = pci_id; + + brwscreen->screen.winsys = winsys; + + brwscreen->screen.destroy = brw_destroy_screen; + + brwscreen->screen.get_name = brw_get_name; + brwscreen->screen.get_vendor = brw_get_vendor; + brwscreen->screen.get_param = brw_get_param; + brwscreen->screen.get_paramf = brw_get_paramf; + + brw_init_screen_texture_funcs(&brwscreen->screen); + + return &brwscreen->screen; +} diff --git a/src/gallium/drivers/i965simple/brw_screen.h b/src/gallium/drivers/i965simple/brw_screen.h new file mode 100644 index 00000000000..d3c70387e61 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct brw_screen +{ + struct pipe_screen screen; + + uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ + return (struct brw_screen *) pscreen; +} + + +extern struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus +} +#endif + +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i965simple/brw_strings.c index 3d9c50961fa..a8f2ce582af 100644 --- a/src/gallium/drivers/i965simple/brw_strings.c +++ b/src/gallium/drivers/i965simple/brw_strings.c @@ -28,39 +28,18 @@ #include "brw_context.h" #include "brw_reg.h" +#include "pipe/p_screen.h" + static const char *brw_get_vendor( struct pipe_context *pipe ) { - return "Tungsten Graphics, Inc."; + return pipe->screen->get_vendor(pipe->screen); } static const char *brw_get_name( struct pipe_context *pipe ) { - static char buffer[128]; - const char *chipset; - - switch (brw_context(pipe)->pci_id) { - case PCI_CHIP_I965_Q: - chipset = "Intel(R) 965Q"; - break; - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_G_1: - chipset = "Intel(R) 965G"; - break; - case PCI_CHIP_I965_GM: - chipset = "Intel(R) 965GM"; - break; - case PCI_CHIP_I965_GME: - chipset = "Intel(R) 965GME/GLE"; - break; - default: - chipset = "unknown"; - break; - } - - sprintf(buffer, "i965 (chipset: %s)", chipset); - return buffer; + return pipe->screen->get_name(pipe->screen); } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 86ce3d0cc38..9753c50143a 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -71,8 +71,6 @@ static unsigned minify( unsigned d ) } -static boolean brw_miptree_layout(struct pipe_context *, struct brw_texture *); - static void intel_miptree_set_image_offset(struct brw_texture *tex, unsigned level, unsigned img, @@ -199,7 +197,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) } } -static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture *tex) +static boolean brw_miptree_layout(struct brw_texture *tex) { struct pipe_texture *pt = &tex->base; /* XXX: these vary depending on image format: @@ -304,17 +302,26 @@ static struct pipe_texture * brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) { + return pipe->screen->texture_create(pipe->screen, templat); +} + + +static struct pipe_texture * +brw_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = CALLOC_STRUCT(brw_texture); if (tex) { tex->base = *templat; tex->base.refcount = 1; - if (brw_miptree_layout(pipe, tex)) - tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + if (brw_miptree_layout(tex)) + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); if (!tex->buffer) { FREE(tex); @@ -328,6 +335,14 @@ brw_texture_create(struct pipe_context *pipe, static void brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + pipe->screen->texture_release(pipe->screen, pt); +} + + +static void +brw_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -337,6 +352,7 @@ brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { + struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = (struct brw_texture *)*pt; uint i; @@ -344,7 +360,7 @@ brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(pipe->winsys, &tex->buffer, NULL); + pipe_buffer_reference(ws, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -371,6 +387,16 @@ brw_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) { + return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); +} + + +static struct pipe_surface * +brw_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = (struct brw_texture *)pt; struct pipe_surface *ps; unsigned offset; /* in bytes */ @@ -388,11 +414,11 @@ brw_get_tex_surface(struct pipe_context *pipe, assert(zslice == 0); } - ps = pipe->winsys->surface_alloc(pipe->winsys); + ps = ws->surface_alloc(ws); if (ps) { assert(ps->format); assert(ps->refcount); - pipe_buffer_reference(pipe->winsys, &ps->buffer, tex->buffer); + pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; ps->cpp = pt->cpp; ps->width = pt->width[level]; @@ -412,3 +438,13 @@ brw_init_texture_functions(struct brw_context *brw) brw->pipe.texture_update = brw_texture_update; brw->pipe.get_tex_surface = brw_get_tex_surface; } + + +void +brw_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = brw_texture_create_screen; + screen->texture_release = brw_texture_release_screen; + screen->get_tex_surface = brw_get_tex_surface_screen; +} + diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h index ed49baeef80..a6b6ba81460 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.h +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -1,12 +1,44 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + + #ifndef BRW_TEX_LAYOUT_H #define BRW_TEX_LAYOUT_H struct brw_context; +struct pipe_screen; extern void brw_init_texture_functions(struct brw_context *brw); +extern void +brw_init_screen_texture_funcs(struct pipe_screen *screen); + #endif diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h index 3523a58614f..b67bd737502 100644 --- a/src/gallium/drivers/i965simple/brw_winsys.h +++ b/src/gallium/drivers/i965simple/brw_winsys.h @@ -53,6 +53,8 @@ struct pipe_buffer; struct pipe_fence_handle; struct pipe_winsys; +struct pipe_screen; + /* The pipe driver currently understands the following chipsets: */ @@ -181,7 +183,7 @@ struct brw_winsys { #define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) -struct pipe_context *brw_create(struct pipe_winsys *, +struct pipe_context *brw_create(struct pipe_screen *, struct brw_winsys *, unsigned pci_id); diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index dbfd37bda29..d55d8c39eb5 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -40,6 +40,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "i965simple/brw_winsys.h" +#include "i965simple/brw_screen.h" #include "brw_aub.h" #include "xm_winsys_aub.h" @@ -565,6 +566,7 @@ struct pipe_context * xmesa_create_i965simple( struct pipe_winsys *winsys ) { struct aub_brw_winsys *iws = CALLOC_STRUCT( aub_brw_winsys ); + struct pipe_screen *screen = brw_create_screen(winsys, 0/* XXX pci_id */); /* Fill in this struct with callbacks that i965simple will need to * communicate with the window system, buffer manager, etc. @@ -583,7 +585,7 @@ xmesa_create_i965simple( struct pipe_winsys *winsys ) /* Create the i965simple context: */ - return brw_create( winsys, + return brw_create( screen, &iws->winsys, 0 ); } -- cgit v1.2.3 From 8383f798b41df9a305e0a33afe8afa028d5d5dfb Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 11:24:35 -0700 Subject: gallium: start removing pipe_context->get_name/vendor/param/paramf These are now per-screen functions, not per-context. State tracker updated, code in drivers and p_context.h disabled. --- src/gallium/drivers/cell/ppu/cell_context.c | 29 ---------------- src/gallium/drivers/failover/fo_context.c | 3 ++ src/gallium/drivers/i915simple/Makefile | 1 - src/gallium/drivers/i915simple/SConscript | 1 - src/gallium/drivers/i915simple/i915_context.c | 24 ------------- src/gallium/drivers/i965simple/Makefile | 1 - src/gallium/drivers/i965simple/SConscript | 1 - src/gallium/drivers/i965simple/brw_context.c | 17 ---------- src/gallium/drivers/softpipe/sp_context.c | 25 -------------- src/gallium/include/pipe/p_context.h | 3 ++ src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_strings.c | 9 ++--- src/mesa/state_tracker/st_extensions.c | 49 ++++++++++++++------------- 13 files changed, 37 insertions(+), 128 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index bb1838409fc..351601473d2 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -77,30 +77,6 @@ cell_is_format_supported( struct pipe_context *pipe, } -static int cell_get_param(struct pipe_context *pipe, int param) -{ - return pipe->screen->get_param(pipe->screen, param); -} - -static float cell_get_paramf(struct pipe_context *pipe, int param) -{ - return pipe->screen->get_paramf(pipe->screen, param); -} - -static const char * -cell_get_name( struct pipe_context *pipe ) -{ - return pipe->screen->get_name(pipe->screen); -} - -static const char * -cell_get_vendor( struct pipe_context *pipe ) -{ - return pipe->screen->get_vendor(pipe->screen); -} - - - static void cell_destroy_context( struct pipe_context *pipe ) { @@ -148,11 +124,6 @@ cell_create_context(struct pipe_screen *screen, /* queries */ cell->pipe.is_format_supported = cell_is_format_supported; - cell->pipe.get_name = cell_get_name; - cell->pipe.get_vendor = cell_get_vendor; - cell->pipe.get_param = cell_get_param; - cell->pipe.get_paramf = cell_get_paramf; - /* state setters */ cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 156f7399b03..d5e54f5d61a 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -117,12 +117,15 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->hw = hw; failover->sw = sw; failover->pipe.winsys = hw->winsys; + failover->pipe.screen = hw->screen; failover->pipe.destroy = failover_destroy; failover->pipe.is_format_supported = hw->is_format_supported; +#if 0 failover->pipe.get_name = hw->get_name; failover->pipe.get_vendor = hw->get_vendor; failover->pipe.get_param = hw->get_param; failover->pipe.get_paramf = hw->get_paramf; +#endif failover->pipe.draw_arrays = failover_draw_arrays; failover->pipe.draw_elements = failover_draw_elements; diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index 3400747a730..41a61a0020b 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -18,7 +18,6 @@ C_SOURCES = \ i915_state_emit.c \ i915_state_sampler.c \ i915_screen.c \ - i915_strings.c \ i915_prim_emit.c \ i915_prim_vbuf.c \ i915_texture.c \ diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript index 3e1beaea6d8..2366e1247f2 100644 --- a/src/gallium/drivers/i915simple/SConscript +++ b/src/gallium/drivers/i915simple/SConscript @@ -22,7 +22,6 @@ i915simple = env.ConvenienceLibrary( 'i915_state_emit.c', 'i915_state_immediate.c', 'i915_state_sampler.c', - 'i915_strings.c', 'i915_surface.c', 'i915_texture.c', ]) diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index e89db8a1303..42355552de1 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -91,22 +91,6 @@ i915_is_format_supported( struct pipe_context *pipe, } -/* XXX temporary */ -static int -i915_get_param(struct pipe_context *pipe, int param) -{ - return pipe->screen->get_param(pipe->screen, param); -} - - -/* XXX temporary */ -static float -i915_get_paramf(struct pipe_context *pipe, int param) -{ - return pipe->screen->get_paramf(pipe->screen, param); -} - - static void i915_destroy( struct pipe_context *pipe ) { struct i915_context *i915 = i915_context( pipe ); @@ -205,8 +189,6 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, i915->pipe.destroy = i915_destroy; i915->pipe.is_format_supported = i915_is_format_supported; - i915->pipe.get_param = i915_get_param; - i915->pipe.get_paramf = i915_get_paramf; i915->pipe.clear = i915_clear; @@ -229,7 +211,6 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, i915_init_surface_functions(i915); i915_init_state_functions(i915); i915_init_flush_functions(i915); - i915_init_string_functions(i915); i915_init_texture_functions(i915); draw_install_aaline_stage(i915->draw, &i915->pipe); @@ -242,11 +223,6 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, */ i915->batch_start = NULL; - /* - * XXX we could plug GL selection/feedback into the drawing pipeline - * by specifying a different setup/render stage. - */ - return &i915->pipe; } diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 7b0df0f8508..8589ebdf964 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -7,7 +7,6 @@ C_SOURCES = \ brw_blit.c \ brw_flush.c \ brw_screen.c \ - brw_strings.c \ brw_surface.c \ brw_cc.c \ brw_clip.c \ diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript index 74621de84c9..e0cc78826ec 100644 --- a/src/gallium/drivers/i965simple/SConscript +++ b/src/gallium/drivers/i965simple/SConscript @@ -35,7 +35,6 @@ i965simple = env.ConvenienceLibrary( 'brw_state_cache.c', 'brw_state_pool.c', 'brw_state_upload.c', - 'brw_strings.c', 'brw_surface.c', 'brw_tex_layout.c', 'brw_urb.c', diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index d90b32d4b40..e0a40372867 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -71,20 +71,6 @@ static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, } -static int -brw_get_param(struct pipe_context *pipe, int param) -{ - return pipe->screen->get_param(pipe->screen, param); -} - - -static float -brw_get_paramf(struct pipe_context *pipe, int param) -{ - return pipe->screen->get_paramf(pipe->screen, param); -} - - static boolean brw_is_format_supported( struct pipe_context *pipe, enum pipe_format format, uint type ) @@ -173,15 +159,12 @@ struct pipe_context *brw_create(struct pipe_screen *screen, brw->pipe.destroy = brw_destroy; brw->pipe.is_format_supported = brw_is_format_supported; - brw->pipe.get_param = brw_get_param; - brw->pipe.get_paramf = brw_get_paramf; brw->pipe.clear = brw_clear; brw_init_surface_functions(brw); brw_init_texture_functions(brw); brw_init_state_functions(brw); brw_init_flush_functions(brw); - brw_init_string_functions(brw); brw_init_draw_functions( brw ); diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 4ac1719cbbe..6a88c277aa6 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -143,27 +143,6 @@ static void softpipe_destroy( struct pipe_context *pipe ) } -/* XXX these will go away shortly */ -static const char *softpipe_get_name( struct pipe_context *pipe ) -{ - return pipe->screen->get_name(pipe->screen); -} - -static const char *softpipe_get_vendor( struct pipe_context *pipe ) -{ - return pipe->screen->get_vendor(pipe->screen); -} - -static int softpipe_get_param(struct pipe_context *pipe, int param) -{ - return pipe->screen->get_param(pipe->screen, param); -} - -static float softpipe_get_paramf(struct pipe_context *pipe, int param) -{ - return pipe->screen->get_paramf(pipe->screen, param); -} - struct pipe_context * softpipe_create( struct pipe_screen *screen, struct pipe_winsys *pipe_winsys, @@ -186,10 +165,6 @@ softpipe_create( struct pipe_screen *screen, /* queries */ softpipe->pipe.is_format_supported = softpipe_is_format_supported; - softpipe->pipe.get_name = softpipe_get_name; - softpipe->pipe.get_vendor = softpipe_get_vendor; - softpipe->pipe.get_param = softpipe_get_param; - softpipe->pipe.get_paramf = softpipe_get_paramf; /* state setters */ softpipe->pipe.create_blend_state = softpipe_create_blend_state; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 93fcb1c3e96..38ee95bdadd 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -66,12 +66,15 @@ struct pipe_context { boolean (*is_format_supported)( struct pipe_context *pipe, enum pipe_format format, uint type ); +#if 0 + /* XXX obsolete, moved into pipe_screen */ const char *(*get_name)( struct pipe_context *pipe ); const char *(*get_vendor)( struct pipe_context *pipe ); int (*get_param)( struct pipe_context *pipe, int param ); float (*get_paramf)( struct pipe_context *pipe, int param ); +#endif /* diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index c8376989804..6245ce057f9 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -649,7 +649,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, /* XXX if DrawPixels image is larger than max texture size, break * it up into chunks. */ - maxSize = 1 << (pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + maxSize = 1 << (pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); assert(width <= maxSize); assert(height <= maxSize); diff --git a/src/mesa/state_tracker/st_cb_strings.c b/src/mesa/state_tracker/st_cb_strings.c index c344df0ff1e..247519ab3de 100644 --- a/src/mesa/state_tracker/st_cb_strings.c +++ b/src/mesa/state_tracker/st_cb_strings.c @@ -35,6 +35,7 @@ #include "main/macros.h" #include "main/version.h" #include "pipe/p_context.h" +#include "pipe/p_screen.h" #include "pipe/p_winsys.h" #include "st_context.h" #include "st_cb_strings.h" @@ -45,11 +46,11 @@ static const GLubyte * st_get_string(GLcontext * ctx, GLenum name) { struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = st->pipe->screen; switch (name) { case GL_VENDOR: { - const char *vendor = pipe->get_vendor( pipe ); + const char *vendor = screen->get_vendor( screen ); const char *tungsten = "Tungsten Graphics, Inc."; /* Tungsten developed the state_tracker module (and much of @@ -68,8 +69,8 @@ st_get_string(GLcontext * ctx, GLenum name) case GL_RENDERER: snprintf(st->renderer, sizeof(st->renderer), "Gallium %s, %s on %s", ST_VERSION_STRING, - pipe->get_name( pipe ), - pipe->winsys->get_name( pipe->winsys )); + screen->get_name( screen ), + screen->winsys->get_name( screen->winsys )); return (GLubyte *) st->renderer; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 97d28d77c4b..99d2a5fb9e9 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -32,6 +32,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_screen.h" #include "st_context.h" #include "st_extensions.h" @@ -64,19 +65,19 @@ static int clamp(int a, int min, int max) */ void st_init_limits(struct st_context *st) { - struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = st->pipe->screen; struct gl_constants *c = &st->ctx->Const; c->MaxTextureLevels - = min(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_LEVELS), + = min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS), MAX_TEXTURE_LEVELS); c->Max3DTextureLevels - = min(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS), + = min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_3D_LEVELS), MAX_3D_TEXTURE_LEVELS); c->MaxCubeTextureLevels - = min(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS), + = min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS), MAX_CUBE_TEXTURE_LEVELS); c->MaxTextureRectSize @@ -84,31 +85,31 @@ void st_init_limits(struct st_context *st) c->MaxTextureImageUnits = c->MaxTextureCoordUnits - = min(pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS), + = min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS), MAX_TEXTURE_IMAGE_UNITS); c->MaxDrawBuffers - = clamp(pipe->get_param(pipe, PIPE_CAP_MAX_RENDER_TARGETS), + = clamp(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS), 1, MAX_DRAW_BUFFERS); c->MaxLineWidth - = max(1.0, pipe->get_paramf(pipe, PIPE_CAP_MAX_LINE_WIDTH)); + = max(1.0, screen->get_paramf(screen, PIPE_CAP_MAX_LINE_WIDTH)); c->MaxLineWidthAA - = max(1.0, pipe->get_paramf(pipe, PIPE_CAP_MAX_LINE_WIDTH_AA)); + = max(1.0, screen->get_paramf(screen, PIPE_CAP_MAX_LINE_WIDTH_AA)); c->MaxPointSize - = max(1.0, pipe->get_paramf(pipe, PIPE_CAP_MAX_POINT_WIDTH)); + = max(1.0, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH)); c->MaxPointSizeAA - = max(1.0, pipe->get_paramf(pipe, PIPE_CAP_MAX_POINT_WIDTH_AA)); + = max(1.0, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH_AA)); c->MaxTextureMaxAnisotropy - = max(2.0, pipe->get_paramf(pipe, PIPE_CAP_MAX_TEXTURE_ANISOTROPY)); + = max(2.0, screen->get_paramf(screen, PIPE_CAP_MAX_TEXTURE_ANISOTROPY)); c->MaxTextureLodBias - = pipe->get_paramf(pipe, PIPE_CAP_MAX_TEXTURE_LOD_BIAS); + = screen->get_paramf(screen, PIPE_CAP_MAX_TEXTURE_LOD_BIAS); st->bitmap_texcoord_bias - = pipe->get_paramf(pipe, PIPE_CAP_BITMAP_TEXCOORD_BIAS); + = screen->get_paramf(screen, PIPE_CAP_BITMAP_TEXCOORD_BIAS); } @@ -117,7 +118,7 @@ void st_init_limits(struct st_context *st) */ void st_init_extensions(struct st_context *st) { - struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = st->pipe->screen; GLcontext *ctx = st->ctx; /* @@ -163,11 +164,11 @@ void st_init_extensions(struct st_context *st) /* * Extensions that depend on the driver/hardware: */ - if (pipe->get_param(pipe, PIPE_CAP_MAX_RENDER_TARGETS) > 0) { + if (screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS) > 0) { ctx->Extensions.ARB_draw_buffers = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_GLSL)) { + if (screen->get_param(screen, PIPE_CAP_GLSL)) { ctx->Extensions.ARB_fragment_shader = GL_TRUE; ctx->Extensions.ARB_vertex_shader = GL_TRUE; ctx->Extensions.ARB_shader_objects = GL_TRUE; @@ -175,37 +176,37 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.ARB_shading_language_120 = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_NPOT_TEXTURES)) { + if (screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) { ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE; ctx->Extensions.NV_texture_rectangle = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS) > 1) { + if (screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS) > 1) { ctx->Extensions.ARB_multitexture = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_TWO_SIDED_STENCIL)) { + if (screen->get_param(screen, PIPE_CAP_TWO_SIDED_STENCIL)) { ctx->Extensions.ATI_separate_stencil = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_S3TC)) { + if (screen->get_param(screen, PIPE_CAP_S3TC)) { ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_ANISOTROPIC_FILTER)) { + if (screen->get_param(screen, PIPE_CAP_ANISOTROPIC_FILTER)) { ctx->Extensions.EXT_texture_filter_anisotropic = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_POINT_SPRITE)) { + if (screen->get_param(screen, PIPE_CAP_POINT_SPRITE)) { ctx->Extensions.ARB_point_sprite = GL_TRUE; ctx->Extensions.NV_point_sprite = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_OCCLUSION_QUERY)) { + if (screen->get_param(screen, PIPE_CAP_OCCLUSION_QUERY)) { ctx->Extensions.ARB_occlusion_query = GL_TRUE; } - if (pipe->get_param(pipe, PIPE_CAP_TEXTURE_SHADOW_MAP)) { + if (screen->get_param(screen, PIPE_CAP_TEXTURE_SHADOW_MAP)) { ctx->Extensions.ARB_depth_texture = GL_TRUE; ctx->Extensions.ARB_shadow = GL_TRUE; ctx->Extensions.EXT_shadow_funcs = GL_TRUE; -- cgit v1.2.3 From 364f8cad0f8f02fd39d9c51ea0774d349121b58d Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 13:58:06 -0700 Subject: gallium: move is_format_supported() to pipe_screen struct --- src/gallium/drivers/cell/ppu/cell_context.c | 23 --------- src/gallium/drivers/cell/ppu/cell_screen.c | 19 ++++++++ src/gallium/drivers/failover/fo_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.c | 55 --------------------- src/gallium/drivers/i915simple/i915_screen.c | 48 +++++++++++++++++++ src/gallium/drivers/i965simple/brw_context.c | 69 --------------------------- src/gallium/drivers/i965simple/brw_screen.c | 67 ++++++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_context.c | 26 ---------- src/gallium/drivers/softpipe/sp_screen.c | 24 ++++++++++ src/gallium/include/pipe/p_context.h | 19 +------- src/mesa/state_tracker/st_cb_drawpixels.c | 5 +- src/mesa/state_tracker/st_format.c | 45 +++++++++-------- src/mesa/state_tracker/st_gen_mipmap.c | 3 +- 13 files changed, 190 insertions(+), 215 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 351601473d2..2301df5ba52 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -57,26 +57,6 @@ -static boolean -cell_is_format_supported( struct pipe_context *pipe, - enum pipe_format format, uint type ) -{ - /*struct cell_context *cell = cell_context( pipe );*/ - - switch (type) { - case PIPE_TEXTURE: - /* cell supports all texture formats, XXX for now anyway */ - return TRUE; - case PIPE_SURFACE: - /* cell supports all (off-screen) surface formats, XXX for now */ - return TRUE; - default: - assert(0); - return FALSE; - } -} - - static void cell_destroy_context( struct pipe_context *pipe ) { @@ -122,9 +102,6 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.screen = screen; cell->pipe.destroy = cell_destroy_context; - /* queries */ - cell->pipe.is_format_supported = cell_is_format_supported; - /* state setters */ cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; cell->pipe.set_vertex_element = cell_set_vertex_element; diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 75255c0466a..124670df255 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -112,6 +112,24 @@ cell_get_paramf(struct pipe_screen *screen, int param) } +static boolean +cell_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, uint type ) +{ + switch (type) { + case PIPE_TEXTURE: + /* cell supports all texture formats, XXX for now anyway */ + return TRUE; + case PIPE_SURFACE: + /* cell supports all (off-screen) surface formats, XXX for now */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + static void cell_destroy_screen( struct pipe_screen *screen ) { @@ -140,6 +158,7 @@ cell_create_screen(struct pipe_winsys *winsys) screen->get_vendor = cell_get_vendor; screen->get_param = cell_get_param; screen->get_paramf = cell_get_paramf; + screen->is_format_supported = cell_is_format_supported; cell_init_screen_texture_funcs(screen); diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index d5e54f5d61a..f559cc0d479 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -119,8 +119,8 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.winsys = hw->winsys; failover->pipe.screen = hw->screen; failover->pipe.destroy = failover_destroy; - failover->pipe.is_format_supported = hw->is_format_supported; #if 0 + failover->pipe.is_format_supported = hw->is_format_supported; failover->pipe.get_name = hw->get_name; failover->pipe.get_vendor = hw->get_vendor; failover->pipe.get_param = hw->get_param; diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 42355552de1..15ff2360b74 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -39,58 +39,6 @@ #include "pipe/p_screen.h" -/** - * Query format support for creating a texture, drawing surface, etc. - * \param format the format to test - * \param type one of PIPE_TEXTURE, PIPE_SURFACE - */ -static boolean -i915_is_format_supported( struct pipe_context *pipe, - enum pipe_format format, uint type ) -{ - static const enum pipe_format tex_supported[] = { - PIPE_FORMAT_R8G8B8A8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_U_L8, - PIPE_FORMAT_U_A8, - PIPE_FORMAT_U_I8, - PIPE_FORMAT_U_A8_L8, - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, - PIPE_FORMAT_S8Z24_UNORM, - PIPE_FORMAT_NONE /* list terminator */ - }; - static const enum pipe_format surface_supported[] = { - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_S8Z24_UNORM, - /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ - PIPE_FORMAT_NONE /* list terminator */ - }; - const enum pipe_format *list; - uint i; - - switch (type) { - case PIPE_TEXTURE: - list = tex_supported; - break; - case PIPE_SURFACE: - list = surface_supported; - break; - default: - assert(0); - } - - for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { - if (list[i] == format) - return TRUE; - } - - return FALSE; -} - - static void i915_destroy( struct pipe_context *pipe ) { struct i915_context *i915 = i915_context( pipe ); @@ -101,8 +49,6 @@ static void i915_destroy( struct pipe_context *pipe ) } - - static boolean i915_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, @@ -188,7 +134,6 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, i915->pipe.screen = screen; i915->pipe.destroy = i915_destroy; - i915->pipe.is_format_supported = i915_is_format_supported; i915->pipe.clear = i915_clear; diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index b9b4d6b6fac..8d7bf0b33e0 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -145,6 +145,53 @@ i915_get_paramf(struct pipe_screen *screen, int param) } +static boolean +i915_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, uint type ) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_U_L8, + PIPE_FORMAT_U_A8, + PIPE_FORMAT_U_I8, + PIPE_FORMAT_U_A8_L8, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format surface_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + switch (type) { + case PIPE_TEXTURE: + list = tex_supported; + break; + case PIPE_SURFACE: + list = surface_supported; + break; + default: + assert(0); + } + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + static void i915_destroy_screen( struct pipe_screen *screen ) { @@ -195,6 +242,7 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id) i915screen->screen.get_vendor = i915_get_vendor; i915screen->screen.get_param = i915_get_param; i915screen->screen.get_paramf = i915_get_paramf; + i915screen->screen.is_format_supported = i915_is_format_supported; i915_init_screen_texture_functions(&i915screen->screen); diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index e0a40372867..7c908da6726 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -71,74 +71,6 @@ static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, } -static boolean -brw_is_format_supported( struct pipe_context *pipe, - enum pipe_format format, uint type ) -{ -#if 0 - /* XXX: This is broken -- rewrite if still needed. */ - static const unsigned tex_supported[] = { - PIPE_FORMAT_U_R8_G8_B8_A8, - PIPE_FORMAT_U_A8_R8_G8_B8, - PIPE_FORMAT_U_R5_G6_B5, - PIPE_FORMAT_U_L8, - PIPE_FORMAT_U_A8, - PIPE_FORMAT_U_I8, - PIPE_FORMAT_U_L8_A8, - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, - PIPE_FORMAT_S8_Z24, - }; - - - /* Actually a lot more than this - add later: - */ - static const unsigned render_supported[] = { - PIPE_FORMAT_U_A8_R8_G8_B8, - PIPE_FORMAT_U_R5_G6_B5, - }; - - /* - */ - static const unsigned z_stencil_supported[] = { - PIPE_FORMAT_U_Z16, - PIPE_FORMAT_U_Z32, - PIPE_FORMAT_S8_Z24, - }; - - switch (type) { - case PIPE_RENDER_FORMAT: - *numFormats = Elements(render_supported); - return render_supported; - - case PIPE_TEX_FORMAT: - *numFormats = Elements(tex_supported); - return render_supported; - - case PIPE_Z_STENCIL_FORMAT: - *numFormats = Elements(render_supported); - return render_supported; - - default: - *numFormats = 0; - return NULL; - } -#else - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - return TRUE; - default: - return FALSE; - }; - return FALSE; -#endif -} - - - - struct pipe_context *brw_create(struct pipe_screen *screen, struct brw_winsys *brw_winsys, unsigned pci_id) @@ -158,7 +90,6 @@ struct pipe_context *brw_create(struct pipe_screen *screen, brw->pipe.screen = screen; brw->pipe.destroy = brw_destroy; - brw->pipe.is_format_supported = brw_is_format_supported; brw->pipe.clear = brw_clear; brw_init_surface_functions(brw); diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index 7afe070bd8f..5be369fe521 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -133,6 +133,72 @@ brw_get_paramf(struct pipe_screen *screen, int param) } +static boolean +brw_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, uint type ) +{ +#if 0 + /* XXX: This is broken -- rewrite if still needed. */ + static const unsigned tex_supported[] = { + PIPE_FORMAT_U_R8_G8_B8_A8, + PIPE_FORMAT_U_A8_R8_G8_B8, + PIPE_FORMAT_U_R5_G6_B5, + PIPE_FORMAT_U_L8, + PIPE_FORMAT_U_A8, + PIPE_FORMAT_U_I8, + PIPE_FORMAT_U_L8_A8, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8_Z24, + }; + + + /* Actually a lot more than this - add later: + */ + static const unsigned render_supported[] = { + PIPE_FORMAT_U_A8_R8_G8_B8, + PIPE_FORMAT_U_R5_G6_B5, + }; + + /* + */ + static const unsigned z_stencil_supported[] = { + PIPE_FORMAT_U_Z16, + PIPE_FORMAT_U_Z32, + PIPE_FORMAT_S8_Z24, + }; + + switch (type) { + case PIPE_RENDER_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + case PIPE_TEX_FORMAT: + *numFormats = Elements(tex_supported); + return render_supported; + + case PIPE_Z_STENCIL_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + default: + *numFormats = 0; + return NULL; + } +#else + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + return TRUE; + default: + return FALSE; + }; + return FALSE; +#endif +} + + static void brw_destroy_screen( struct pipe_screen *screen ) { @@ -161,6 +227,7 @@ brw_create_screen(struct pipe_winsys *winsys, uint pci_id) brwscreen->screen.get_vendor = brw_get_vendor; brwscreen->screen.get_param = brw_get_param; brwscreen->screen.get_paramf = brw_get_paramf; + brwscreen->screen.is_format_supported = brw_is_format_supported; brw_init_screen_texture_funcs(&brwscreen->screen); diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 6a88c277aa6..fa16ed94e8d 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -47,29 +47,6 @@ -/** - * Query format support for creating a texture, drawing surface, etc. - * \param format the format to test - * \param type one of PIPE_TEXTURE, PIPE_SURFACE - */ -static boolean -softpipe_is_format_supported( struct pipe_context *pipe, - enum pipe_format format, uint type ) -{ - switch (type) { - case PIPE_TEXTURE: - /* softpipe supports all texture formats */ - return TRUE; - case PIPE_SURFACE: - /* softpipe supports all (off-screen) surface formats */ - return TRUE; - default: - assert(0); - return FALSE; - } -} - - /** * Map any drawing surfaces which aren't already mapped */ @@ -163,9 +140,6 @@ softpipe_create( struct pipe_screen *screen, softpipe->pipe.screen = screen; softpipe->pipe.destroy = softpipe_destroy; - /* queries */ - softpipe->pipe.is_format_supported = softpipe_is_format_supported; - /* state setters */ softpipe->pipe.create_blend_state = softpipe_create_blend_state; softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index cc3b9625805..1850a1ced33 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -107,6 +107,29 @@ softpipe_get_paramf(struct pipe_screen *screen, int param) } +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +softpipe_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, uint type ) +{ + switch (type) { + case PIPE_TEXTURE: + /* softpipe supports all texture formats */ + return TRUE; + case PIPE_SURFACE: + /* softpipe supports all (off-screen) surface formats */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + static void softpipe_destroy_screen( struct pipe_screen *screen ) { @@ -134,6 +157,7 @@ softpipe_create_screen(struct pipe_winsys *winsys) screen->get_vendor = softpipe_get_vendor; screen->get_param = softpipe_get_param; screen->get_paramf = softpipe_get_paramf; + screen->is_format_supported = softpipe_is_format_supported; softpipe_init_screen_texture_funcs(screen); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 38ee95bdadd..bb345df153b 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -59,24 +59,6 @@ struct pipe_context { void (*destroy)( struct pipe_context * ); - /* - * Queries - */ - /** type is one of PIPE_SURFACE, PIPE_TEXTURE, etc. */ - boolean (*is_format_supported)( struct pipe_context *pipe, - enum pipe_format format, uint type ); - -#if 0 - /* XXX obsolete, moved into pipe_screen */ - const char *(*get_name)( struct pipe_context *pipe ); - - const char *(*get_vendor)( struct pipe_context *pipe ); - - int (*get_param)( struct pipe_context *pipe, int param ); - float (*get_paramf)( struct pipe_context *pipe, int param ); -#endif - - /* * Drawing. * Return false on fallbacks (temporary??) @@ -210,6 +192,7 @@ struct pipe_context { /* * Texture functions + * XXX these are moving to pipe_screen... */ struct pipe_texture * (*texture_create)(struct pipe_context *pipe, const struct pipe_texture *templat); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 6245ce057f9..0f2c6307dd4 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -993,6 +993,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, const GLubyte *bitmap) { struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_surface *surface; uint format = 0, cpp, comp; ubyte *dest; @@ -1000,12 +1001,12 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, int row, col; /* find a texture format we know */ - if (pipe->is_format_supported( pipe, PIPE_FORMAT_U_I8, PIPE_TEXTURE )) { + if (screen->is_format_supported( screen, PIPE_FORMAT_U_I8, PIPE_TEXTURE )) { format = PIPE_FORMAT_U_I8; cpp = 1; comp = 0; } - else if (pipe->is_format_supported( pipe, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_TEXTURE )) { + else if (screen->is_format_supported( screen, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_TEXTURE )) { format = PIPE_FORMAT_A8R8G8B8_UNORM; cpp = 4; comp = 3; /* alpha channel */ /*XXX little-endian dependency */ diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 2a23445ca2b..9aeda65a5c8 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -40,6 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_screen.h" #include "st_context.h" #include "st_format.h" @@ -288,9 +289,10 @@ default_rgba_format(struct pipe_context *pipe, uint type) PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R5G6B5_UNORM }; + struct pipe_screen *screen = pipe->screen; uint i; for (i = 0; i < Elements(colorFormats); i++) { - if (pipe->is_format_supported( pipe, colorFormats[i], type )) { + if (screen->is_format_supported( screen, colorFormats[i], type )) { return colorFormats[i]; } } @@ -304,7 +306,8 @@ default_rgba_format(struct pipe_context *pipe, uint type) static GLuint default_deep_rgba_format(struct pipe_context *pipe, uint type) { - if (pipe->is_format_supported(pipe, PIPE_FORMAT_R16G16B16A16_SNORM, type)) { + struct pipe_screen *screen = pipe->screen; + if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_SNORM, type)) { return PIPE_FORMAT_R16G16B16A16_SNORM; } return PIPE_FORMAT_NONE; @@ -323,9 +326,10 @@ default_depth_format(struct pipe_context *pipe, uint type) PIPE_FORMAT_S8Z24_UNORM, PIPE_FORMAT_Z24S8_UNORM }; + struct pipe_screen *screen = pipe->screen; uint i; for (i = 0; i < Elements(zFormats); i++) { - if (pipe->is_format_supported( pipe, zFormats[i], type )) { + if (screen->is_format_supported( screen, zFormats[i], type )) { return zFormats[i]; } } @@ -341,6 +345,7 @@ default_depth_format(struct pipe_context *pipe, uint type) enum pipe_format st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) { + struct pipe_screen *screen = pipe->screen; uint surfType = PIPE_SURFACE; switch (internalFormat) { @@ -359,12 +364,12 @@ st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) case GL_RGBA4: case GL_RGBA2: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_A4R4G4B4_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A4R4G4B4_UNORM, surfType )) return PIPE_FORMAT_A4R4G4B4_UNORM; return default_rgba_format( pipe, surfType ); case GL_RGB5_A1: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_A1R5G5B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, surfType )) return PIPE_FORMAT_A1R5G5B5_UNORM; return default_rgba_format( pipe, surfType ); @@ -377,9 +382,9 @@ st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) case GL_RGB5: case GL_RGB4: case GL_R3_G3_B2: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_A1R5G5B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, surfType )) return PIPE_FORMAT_A1R5G5B5_UNORM; - if (pipe->is_format_supported( pipe, PIPE_FORMAT_R5G6B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_R5G6B5_UNORM, surfType )) return PIPE_FORMAT_R5G6B5_UNORM; return default_rgba_format( pipe, surfType ); @@ -389,7 +394,7 @@ st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) case GL_ALPHA12: case GL_ALPHA16: case GL_COMPRESSED_ALPHA: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_U_A8, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_U_A8, surfType )) return PIPE_FORMAT_U_A8; return default_rgba_format( pipe, surfType ); @@ -400,7 +405,7 @@ st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) case GL_LUMINANCE12: case GL_LUMINANCE16: case GL_COMPRESSED_LUMINANCE: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_U_L8, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_U_L8, surfType )) return PIPE_FORMAT_U_A8; return default_rgba_format( pipe, surfType ); @@ -413,7 +418,7 @@ st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) case GL_LUMINANCE12_ALPHA12: case GL_LUMINANCE16_ALPHA16: case GL_COMPRESSED_LUMINANCE_ALPHA: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_U_A8_L8, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_U_A8_L8, surfType )) return PIPE_FORMAT_U_A8_L8; return default_rgba_format( pipe, surfType ); @@ -423,7 +428,7 @@ st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) case GL_INTENSITY12: case GL_INTENSITY16: case GL_COMPRESSED_INTENSITY: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_U_I8, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_U_I8, surfType )) return PIPE_FORMAT_U_I8; return default_rgba_format( pipe, surfType ); @@ -454,17 +459,17 @@ st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) #endif case GL_DEPTH_COMPONENT16: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_Z16_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z16_UNORM, surfType )) return PIPE_FORMAT_Z16_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT24: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) return PIPE_FORMAT_S8Z24_UNORM; - if (pipe->is_format_supported( pipe, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) return PIPE_FORMAT_Z24S8_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT32: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_Z32_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z32_UNORM, surfType )) return PIPE_FORMAT_Z32_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT: @@ -475,19 +480,19 @@ st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) case GL_STENCIL_INDEX4_EXT: case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_U_S8, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_U_S8, surfType )) return PIPE_FORMAT_U_S8; - if (pipe->is_format_supported( pipe, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) return PIPE_FORMAT_S8Z24_UNORM; - if (pipe->is_format_supported( pipe, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) return PIPE_FORMAT_Z24S8_UNORM; return PIPE_FORMAT_NONE; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: - if (pipe->is_format_supported( pipe, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) return PIPE_FORMAT_S8Z24_UNORM; - if (pipe->is_format_supported( pipe, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) return PIPE_FORMAT_Z24S8_UNORM; return PIPE_FORMAT_NONE; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index c9765b20036..2b163106028 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -227,6 +227,7 @@ st_render_mipmap(struct st_context *st, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb; struct pipe_sampler_state sampler; void *sampler_cso; @@ -237,7 +238,7 @@ st_render_mipmap(struct st_context *st, assert(target != GL_TEXTURE_3D); /* not done yet */ /* check if we can render in the texture's format */ - if (!pipe->is_format_supported(pipe, pt->format, PIPE_SURFACE)) { + if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) { return FALSE; } -- cgit v1.2.3 From 6f715dcc219071e574e363a9db4365c9c31ebbd3 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 14:21:12 -0700 Subject: gallium: remove pipe_context->texture_create/release/get_tex_surface() These functions are now per-screen, not per-context. --- src/gallium/auxiliary/draw/draw_aaline.c | 5 ++- src/gallium/auxiliary/draw/draw_pstipple.c | 6 ++- src/gallium/drivers/cell/ppu/cell_texture.c | 30 ------------- src/gallium/drivers/failover/fo_context.c | 4 +- src/gallium/drivers/i915simple/i915_texture.c | 28 ------------ src/gallium/drivers/i965simple/brw_tex_layout.c | 30 ------------- src/gallium/drivers/softpipe/sp_texture.c | 60 ++++++------------------- src/gallium/drivers/softpipe/sp_tile_cache.c | 3 +- src/gallium/include/pipe/p_context.h | 18 +------- src/gallium/include/pipe/p_inlines.h | 18 +++----- src/mesa/state_tracker/st_cb_drawpixels.c | 8 ++-- src/mesa/state_tracker/st_cb_fbo.c | 9 ++-- src/mesa/state_tracker/st_cb_texture.c | 28 ++++++------ src/mesa/state_tracker/st_gen_mipmap.c | 7 +-- src/mesa/state_tracker/st_texture.c | 16 ++++--- 15 files changed, 71 insertions(+), 199 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 73a02a32e42..be6cfd3b6a3 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -362,6 +362,7 @@ static void aaline_create_texture(struct aaline_stage *aaline) { struct pipe_context *pipe = aaline->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_texture texTemp; uint level; @@ -374,7 +375,7 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.depth[0] = 1; texTemp.cpp = 1; - aaline->texture = pipe->texture_create(pipe, &texTemp); + aaline->texture = screen->texture_create(screen, &texTemp); /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost @@ -388,7 +389,7 @@ aaline_create_texture(struct aaline_stage *aaline) assert(aaline->texture->width[level] == aaline->texture->height[level]); - surface = pipe->get_tex_surface(pipe, aaline->texture, 0, level, 0); + surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); data = pipe_surface_map(surface); for (i = 0; i < size; i++) { diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 1ab04cd9598..efc88bf038b 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -348,12 +348,13 @@ pstip_update_texture(struct pstip_stage *pstip) { static const uint bit31 = 1 << 31; struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_surface *surface; const uint *stipple = pstip->state.stipple->stipple; uint i, j; ubyte *data; - surface = pipe->get_tex_surface(pipe, pstip->texture, 0, 0, 0); + surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0); data = pipe_surface_map(surface); /* @@ -389,6 +390,7 @@ static void pstip_create_texture(struct pstip_stage *pstip) { struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_texture texTemp; memset(&texTemp, 0, sizeof(texTemp)); @@ -400,7 +402,7 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.depth[0] = 1; texTemp.cpp = 1; - pstip->texture = pipe->texture_create(pipe, &texTemp); + pstip->texture = screen->texture_create(screen, &texTemp); //pstip_update_texture(pstip); } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index e6398a85fa5..28cadad6ed2 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -79,14 +79,6 @@ cell_texture_layout(struct cell_texture * spt) } -static struct pipe_texture * -cell_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) -{ - return pipe->screen->texture_create(pipe->screen, templat); - -} - static struct pipe_texture * cell_texture_create_screen(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -116,13 +108,6 @@ cell_texture_create_screen(struct pipe_screen *screen, } -static void -cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - return pipe->screen->texture_release(pipe->screen, pt); -} - - static void cell_texture_release_screen(struct pipe_screen *screen, struct pipe_texture **pt) @@ -157,18 +142,6 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) } -/** - * Called via pipe->get_tex_surface() - */ -static struct pipe_surface * -cell_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); -} - - static struct pipe_surface * cell_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, @@ -294,10 +267,7 @@ cell_update_texture_mapping(struct cell_context *cell) void cell_init_texture_functions(struct cell_context *cell) { - cell->pipe.texture_create = cell_texture_create; - cell->pipe.texture_release = cell_texture_release; cell->pipe.texture_update = cell_texture_update; - cell->pipe.get_tex_surface = cell_get_tex_surface; } void diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index f559cc0d479..afc0d7eb1ec 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -143,10 +143,12 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.surface_copy = hw->surface_copy; failover->pipe.surface_fill = hw->surface_fill; +#if 0 failover->pipe.texture_create = hw->texture_create; failover->pipe.texture_release = hw->texture_release; - failover->pipe.texture_update = hw->texture_update; failover->pipe.get_tex_surface = hw->get_tex_surface; +#endif + failover->pipe.texture_update = hw->texture_update; failover->pipe.flush = hw->flush; diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 3c9509dee34..9cdf3418a96 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -511,14 +511,6 @@ i915_texture_create_screen(struct pipe_screen *screen, } -static struct pipe_texture * -i915_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) -{ - return pipe->screen->texture_create(pipe->screen, templat); -} - - static void i915_texture_release_screen(struct pipe_screen *screen, struct pipe_texture **pt) @@ -550,13 +542,6 @@ i915_texture_release_screen(struct pipe_screen *screen, } -static void -i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - i915_texture_release_screen(pipe->screen, pt); -} - - static void i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) { @@ -606,26 +591,13 @@ i915_get_tex_surface_screen(struct pipe_screen *screen, } -static struct pipe_surface * -i915_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - return i915_get_tex_surface_screen(pipe->screen, pt, face, level, zslice); -} - - void i915_init_texture_functions(struct i915_context *i915) { - i915->pipe.texture_create = i915_texture_create; - i915->pipe.texture_release = i915_texture_release; i915->pipe.texture_update = i915_texture_update; - i915->pipe.get_tex_surface = i915_get_tex_surface; } - void i915_init_screen_texture_functions(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 9753c50143a..b24ac87c378 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -298,14 +298,6 @@ static boolean brw_miptree_layout(struct brw_texture *tex) } -static struct pipe_texture * -brw_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) -{ - return pipe->screen->texture_create(pipe->screen, templat); -} - - static struct pipe_texture * brw_texture_create_screen(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -333,13 +325,6 @@ brw_texture_create_screen(struct pipe_screen *screen, } -static void -brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - pipe->screen->texture_release(pipe->screen, pt); -} - - static void brw_texture_release_screen(struct pipe_screen *screen, struct pipe_texture **pt) @@ -379,18 +364,6 @@ brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) } -/* - * XXX note: same as code in sp_surface.c - */ -static struct pipe_surface * -brw_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); -} - - static struct pipe_surface * brw_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, @@ -433,10 +406,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, void brw_init_texture_functions(struct brw_context *brw) { - brw->pipe.texture_create = brw_texture_create; - brw->pipe.texture_release = brw_texture_release; brw->pipe.texture_update = brw_texture_update; - brw->pipe.get_tex_surface = brw_get_tex_surface; } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index f0e8350a4a1..7c027653135 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -80,15 +80,6 @@ softpipe_texture_layout(struct softpipe_texture * spt) } -/* XXX temporary */ -static struct pipe_texture * -softpipe_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) -{ - return pipe->screen->texture_create(pipe->screen, templat); -} - - static struct pipe_texture * softpipe_texture_create_screen(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -119,14 +110,6 @@ softpipe_texture_create_screen(struct pipe_screen *screen, } -/* XXX temporary */ -static void -softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - return pipe->screen->texture_release(pipe->screen, pt); -} - - static void softpipe_texture_release_screen(struct pipe_screen *screen, struct pipe_texture **pt) @@ -153,33 +136,6 @@ softpipe_texture_release_screen(struct pipe_screen *screen, } -static void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); - uint unit; - for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (softpipe->texture[unit] == texture) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); - } - } -} - - -/** - * Called via pipe->get_tex_surface() - */ -/* XXX temporary */ -static struct pipe_surface * -softpipe_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); -} - - static struct pipe_surface * softpipe_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, @@ -217,14 +173,24 @@ softpipe_get_tex_surface_screen(struct pipe_screen *screen, } +static void +softpipe_texture_update(struct pipe_context *pipe, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint unit; + for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { + if (softpipe->texture[unit] == texture) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); + } + } +} + void softpipe_init_texture_funcs( struct softpipe_context *softpipe ) { - softpipe->pipe.texture_create = softpipe_texture_create; - softpipe->pipe.texture_release = softpipe_texture_release; softpipe->pipe.texture_update = softpipe_texture_update; - softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 0ff93c55277..4caf2dd3fc9 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -489,6 +489,7 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, struct softpipe_tile_cache *tc, int x, int y, int z, int face, int level) { + struct pipe_screen *screen = pipe->screen; /* tile pos in framebuffer: */ const int tile_x = x & ~(TILE_SIZE - 1); const int tile_y = y & ~(TILE_SIZE - 1); @@ -514,7 +515,7 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, if (tc->tex_surf_map) pipe_surface_unmap(tc->tex_surf); - tc->tex_surf = pipe->get_tex_surface(pipe, tc->texture, face, level, z); + tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z); tc->tex_surf_map = pipe_surface_map(tc->tex_surf); tc->tex_face = face; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index bb345df153b..d0f25d7d467 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -189,30 +189,16 @@ struct pipe_context { struct pipe_surface *ps, unsigned clearValue); - - /* - * Texture functions - * XXX these are moving to pipe_screen... - */ - struct pipe_texture * (*texture_create)(struct pipe_context *pipe, - const struct pipe_texture *templat); - - void (*texture_release)(struct pipe_context *pipe, - struct pipe_texture **pt); - /** * Called when texture data is changed. * Note: we could pass some hints about which mip levels or cube faces * have changed... + * XXX this may go away - could pass a 'write' flag to get_tex_surface() */ void (*texture_update)(struct pipe_context *pipe, struct pipe_texture *texture); - /** Get a surface which is a "view" into a texture */ - struct pipe_surface *(*get_tex_surface)(struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level, - unsigned zslice); + /* Flush rendering: */ diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index a7e97fcd7db..274f76a383f 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -107,15 +107,9 @@ pipe_texture_reference(struct pipe_texture **ptr, pt->refcount++; if (*ptr) { - struct pipe_context *pipe = (*ptr)->pipe; - /* XXX temporary mess here */ - if (pipe) { - pipe->texture_release(pipe, ptr); - } - else { - struct pipe_screen *screen = (*ptr)->screen; - screen->texture_release(screen, ptr); - } + struct pipe_screen *screen = (*ptr)->screen; + assert(screen); + screen->texture_release(screen, ptr); assert(!*ptr); } @@ -127,10 +121,10 @@ pipe_texture_reference(struct pipe_texture **ptr, static INLINE void pipe_texture_release(struct pipe_texture **ptr) { - struct pipe_context *pipe; + struct pipe_screen *screen; assert(ptr); - pipe = (*ptr)->pipe; - pipe->texture_release(pipe, ptr); + screen = (*ptr)->screen; + screen->texture_release(screen, ptr); *ptr = NULL; } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 0f2c6307dd4..ff236adc5cc 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -456,6 +456,7 @@ make_texture(struct st_context *st, { GLcontext *ctx = st->ctx; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; const struct gl_texture_format *mformat; struct pipe_texture *pt; enum pipe_format pipeFormat; @@ -493,7 +494,7 @@ make_texture(struct st_context *st, /* we'll do pixel transfer in a fragment shader */ ctx->_ImageTransferState = 0x0; - surface = pipe->get_tex_surface(pipe, pt, 0, 0, 0); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0); /* map texture surface */ dest = pipe_surface_map(surface); @@ -1031,7 +1032,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, printf("st_Bitmap (sourcing from PBO not implemented yet)\n"); } - surface = pipe->get_tex_surface(pipe, pt, 0, 0, 0); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0); /* map texture surface */ dest = pipe_surface_map(surface); @@ -1207,6 +1208,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; struct st_renderbuffer *rbRead; struct st_vertex_program *stvp; struct st_fragment_program *stfp; @@ -1248,7 +1250,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, if (!pt) return; - psTex = pipe->get_tex_surface(pipe, pt, 0, 0, 0); + psTex = screen->get_tex_surface(screen, pt, 0, 0, 0); if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { srcy = ctx->DrawBuffer->Height - srcy - height; diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 781425b5469..5384252a8e5 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -307,6 +307,7 @@ st_render_texture(GLcontext *ctx, struct st_renderbuffer *strb; struct gl_renderbuffer *rb; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_texture *pt; assert(!att->Renderbuffer); @@ -332,10 +333,10 @@ st_render_texture(GLcontext *ctx, rb->Height = pt->height[att->TextureLevel]; /* the renderbuffer's surface is inside the texture */ - strb->surface = pipe->get_tex_surface(pipe, pt, - att->CubeMapFace, - att->TextureLevel, - att->Zoffset); + strb->surface = screen->get_tex_surface(screen, pt, + att->CubeMapFace, + att->TextureLevel, + att->Zoffset); assert(strb->surface); init_renderbuffer_bits(strb, pt->format); diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index f5f956f6ead..1ba31733120 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -144,12 +144,11 @@ st_NewTextureObject(GLcontext * ctx, GLuint name, GLenum target) static void st_DeleteTextureObject(GLcontext *ctx, - struct gl_texture_object *texObj) + struct gl_texture_object *texObj) { struct st_texture_object *stObj = st_texture_object(texObj); - if (stObj->pt) - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); _mesa_delete_texture_object(ctx, texObj); } @@ -163,7 +162,7 @@ st_FreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage) DBG("%s\n", __FUNCTION__); if (stImage->pt) { - ctx->st->pipe->texture_release(ctx->st->pipe, &stImage->pt); + pipe_texture_release(&stImage->pt); } if (texImage->Data) { @@ -537,7 +536,7 @@ st_TexImage(GLcontext * ctx, * Release any old malloced memory. */ if (stImage->pt) { - ctx->st->pipe->texture_release(ctx->st->pipe, &stImage->pt); + pipe_texture_release(&stImage->pt); assert(!texImage->Data); } else if (texImage->Data) { @@ -556,7 +555,7 @@ st_TexImage(GLcontext * ctx, stImage->face, stImage->level)) { DBG("release it\n"); - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); assert(!stObj->pt); } @@ -1025,6 +1024,7 @@ fallback_copy_texsubimage(GLcontext *ctx, GLsizei width, GLsizei height) { struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; const uint face = texture_face(target); struct pipe_texture *pt = stImage->pt; struct pipe_surface *src_surf, *dest_surf; @@ -1042,8 +1042,7 @@ fallback_copy_texsubimage(GLcontext *ctx, src_surf = strb->surface; - dest_surf = pipe->get_tex_surface(pipe, pt, - face, level, destZ); + dest_surf = screen->get_tex_surface(screen, pt, face, level, destZ); /* buffer for one row */ data = (GLfloat *) malloc(width * 4 * sizeof(GLfloat)); @@ -1096,6 +1095,7 @@ do_copy_texsubimage(GLcontext *ctx, struct gl_framebuffer *fb = ctx->ReadBuffer; struct st_renderbuffer *strb; struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_surface *dest_surface; uint dest_format, src_format; uint do_flip = FALSE; @@ -1126,8 +1126,8 @@ do_copy_texsubimage(GLcontext *ctx, src_format = strb->surface->format; dest_format = stImage->pt->format; - dest_surface = pipe->get_tex_surface(pipe, stImage->pt, stImage->face, - stImage->level, destZ); + dest_surface = screen->get_tex_surface(screen, stImage->pt, stImage->face, + stImage->level, destZ); if (src_format == dest_format && ctx->_ImageTransferState == 0x0 && @@ -1352,7 +1352,7 @@ copy_image_data_to_texture(struct st_context *st, stImage->face ); - st->pipe->texture_release(st->pipe, &stImage->pt); + pipe_texture_release(&stImage->pt); } else { assert(stImage->base.Data != NULL); @@ -1408,7 +1408,7 @@ st_finalize_texture(GLcontext *ctx, */ if (firstImage->base.Border) { if (stObj->pt) { - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); } return GL_FALSE; } @@ -1424,7 +1424,7 @@ st_finalize_texture(GLcontext *ctx, firstImage->pt->last_level >= stObj->lastLevel) { if (stObj->pt) - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); pipe_texture_reference(&stObj->pt, firstImage->pt); } @@ -1450,7 +1450,7 @@ st_finalize_texture(GLcontext *ctx, stObj->pt->depth[0] != firstImage->base.Depth || stObj->pt->cpp != cpp || stObj->pt->compressed != firstImage->base.IsCompressed)) { - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); } diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 2b163106028..243dc0b1d08 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -276,7 +276,7 @@ st_render_mipmap(struct st_context *st, /* * Setup framebuffer / dest surface */ - fb.cbufs[0] = pipe->get_tex_surface(pipe, pt, face, dstLevel, zslice); + fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); pipe->set_framebuffer_state(pipe, &fb); /* @@ -325,6 +325,7 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) { struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_winsys *ws = pipe->winsys; struct pipe_texture *pt = st_get_texobj_texture(texObj); const uint baseLevel = texObj->BaseLevel; @@ -345,8 +346,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, const ubyte *srcData; ubyte *dstData; - srcSurf = pipe->get_tex_surface(pipe, pt, face, srcLevel, zslice); - dstSurf = pipe->get_tex_surface(pipe, pt, face, dstLevel, zslice); + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); srcData = (ubyte *) ws->buffer_map(ws, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index ad284170e44..c2b0aa8a4aa 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -77,6 +77,7 @@ st_texture_create(struct st_context *st, GLuint compress_byte) { struct pipe_texture pt, *newtex; + struct pipe_screen *screen = st->pipe->screen; assert(target <= PIPE_TEXTURE_CUBE); @@ -96,7 +97,7 @@ st_texture_create(struct st_context *st, pt.compressed = compress_byte ? 1 : 0; pt.cpp = pt.compressed ? compress_byte : st_sizeof_format(format); - newtex = st->pipe->texture_create(st->pipe, &pt); + newtex = screen->texture_create(screen, &pt); assert(!newtex || newtex->refcount == 1); @@ -183,11 +184,12 @@ GLubyte * st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, GLuint zoffset) { + struct pipe_screen *screen = st->pipe->screen; struct pipe_texture *pt = stImage->pt; DBG("%s \n", __FUNCTION__); - stImage->surface = st->pipe->get_tex_surface(st->pipe, pt, stImage->face, - stImage->level, zoffset); + stImage->surface = screen->get_tex_surface(screen, pt, stImage->face, + stImage->level, zoffset); return pipe_surface_map(stImage->surface); } @@ -239,6 +241,7 @@ st_texture_image_data(struct pipe_context *pipe, void *src, GLuint src_row_pitch, GLuint src_image_pitch) { + struct pipe_screen *screen = pipe->screen; GLuint depth = dst->depth[level]; GLuint i; GLuint height = 0; @@ -251,7 +254,7 @@ st_texture_image_data(struct pipe_context *pipe, if(dst->compressed) height /= 4; - dst_surface = pipe->get_tex_surface(pipe, dst, face, level, i); + dst_surface = screen->get_tex_surface(screen, dst, face, level, i); st_surface_data(pipe, dst_surface, 0, 0, /* dstx, dsty */ @@ -275,6 +278,7 @@ st_texture_image_copy(struct pipe_context *pipe, struct pipe_texture *src, GLuint face) { + struct pipe_screen *screen = pipe->screen; GLuint width = dst->width[dstLevel]; GLuint height = dst->height[dstLevel]; GLuint depth = dst->depth[dstLevel]; @@ -299,8 +303,8 @@ st_texture_image_copy(struct pipe_context *pipe, assert(src->width[srcLevel] == width); assert(src->height[srcLevel] == height); - dst_surface = pipe->get_tex_surface(pipe, dst, face, dstLevel, i); - src_surface = pipe->get_tex_surface(pipe, src, face, srcLevel, i); + dst_surface = screen->get_tex_surface(screen, dst, face, dstLevel, i); + src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i); pipe->surface_copy(pipe, FALSE, -- cgit v1.2.3 From cc5ffd762227345d0a5bf9e9356dd83a8b2a8b33 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 14:28:17 -0700 Subject: gallium: remove obsolete *_strings.c files --- src/gallium/drivers/i915simple/i915_strings.c | 53 --------------------------- src/gallium/drivers/i965simple/brw_strings.c | 51 -------------------------- 2 files changed, 104 deletions(-) delete mode 100644 src/gallium/drivers/i915simple/i915_strings.c delete mode 100644 src/gallium/drivers/i965simple/brw_strings.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c deleted file mode 100644 index 14da3bcd726..00000000000 --- a/src/gallium/drivers/i915simple/i915_strings.c +++ /dev/null @@ -1,53 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "i915_context.h" -#include "i915_screen.h" -#include "i915_reg.h" - - -/** XXX temporary screen/pipe duplication here */ - - -static const char *i915_get_vendor( struct pipe_context *pipe ) -{ - return pipe->screen->get_vendor(pipe->screen); -} - - -static const char *i915_get_name( struct pipe_context *pipe ) -{ - return pipe->screen->get_name(pipe->screen); -} - - -void -i915_init_string_functions(struct i915_context *i915) -{ - i915->pipe.get_name = i915_get_name; - i915->pipe.get_vendor = i915_get_vendor; -} diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i965simple/brw_strings.c deleted file mode 100644 index a8f2ce582af..00000000000 --- a/src/gallium/drivers/i965simple/brw_strings.c +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "brw_context.h" -#include "brw_reg.h" - -#include "pipe/p_screen.h" - - -static const char *brw_get_vendor( struct pipe_context *pipe ) -{ - return pipe->screen->get_vendor(pipe->screen); -} - - -static const char *brw_get_name( struct pipe_context *pipe ) -{ - return pipe->screen->get_name(pipe->screen); -} - - -void -brw_init_string_functions(struct brw_context *brw) -{ - brw->pipe.get_name = brw_get_name; - brw->pipe.get_vendor = brw_get_vendor; -} -- cgit v1.2.3 From 1c50ea2cd9ab8752793c99b4a7a2a6656bdde1ac Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Wed, 27 Feb 2008 13:40:23 -0800 Subject: cell: Use unified data cache for textures too --- src/gallium/drivers/cell/spu/spu_main.c | 2 + src/gallium/drivers/cell/spu/spu_main.h | 3 +- src/gallium/drivers/cell/spu/spu_texture.c | 184 +++++++++++------------------ 3 files changed, 72 insertions(+), 117 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index cc4bafdb3ac..59300028d4d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -286,6 +286,8 @@ cmd_state_texture(const struct cell_command_texture *texture) { spu.texture.width, spu.texture.height, 0.0, 0.0}; spu.tex_size_mask = (vector unsigned int) { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; + spu.tex_size_x_mask = spu_splats(spu.texture.width - 1); + spu.tex_size_y_mask = spu_splats(spu.texture.height - 1); } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index d14f1abbe74..a13edd17029 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -107,6 +107,8 @@ struct spu_global vector float tex_size; vector unsigned int tex_size_mask; /**< == int(size - 1) */ + vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ + vector unsigned int tex_size_y_mask; /**< == int(size - 1) */ vector float (*sample_texture)(vector float texcoord); @@ -130,7 +132,6 @@ extern boolean Debug; #define TAG_INDEX_BUFFER 16 #define TAG_BATCH_BUFFER 17 #define TAG_MISC 18 -#define TAG_TEXTURE_TILE 19 #define TAG_DCACHE0 20 #define TAG_DCACHE1 21 #define TAG_DCACHE2 22 diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 3962aaa4a9b..67eb08196a4 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -31,19 +31,7 @@ #include "spu_texture.h" #include "spu_tile.h" #include "spu_colorpack.h" - - -/** - * Number of texture tiles to cache. - * Note that this will probably be the largest consumer of SPU local store/ - * memory for this driver! - */ -#define CACHE_SIZE 16 - -static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; - -static vector unsigned int tex_tile_xy[CACHE_SIZE]; - +#include "spu_dcache.h" /** @@ -52,78 +40,60 @@ static vector unsigned int tex_tile_xy[CACHE_SIZE]; void invalidate_tex_cache(void) { - /* XXX memset? */ - uint i; - for (i = 0; i < CACHE_SIZE; i++) { - tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U }); - } + spu_dcache_mark_dirty((unsigned) spu.texture.start, + 4 * spu.texture.width * spu.texture.height); } -/** - * Return the cache pos/index which corresponds to tile (tx,ty) - */ -static INLINE uint -cache_pos(vector unsigned int txty) +static uint +get_texel(vec_uint4 coordinate) { - uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE; - return pos; + vec_uint4 tmp; + unsigned x = spu_extract(coordinate, 0); + unsigned y = spu_extract(coordinate, 1); + const unsigned tiles_per_row = spu.texture.width / TILE_SIZE; + unsigned tile_offset = sizeof(tile_t) * ((y / TILE_SIZE * tiles_per_row) + + (x / TILE_SIZE)); + unsigned texel_offset = 4 * (((y % TILE_SIZE) * TILE_SIZE) + + (x % TILE_SIZE)); + + spu_dcache_fetch_unaligned((qword *) & tmp, + spu.texture.start + tile_offset + texel_offset, + 4); + return spu_extract(tmp, 0); } -/** - * Make sure the tile for texel (i,j) is present, return its position/index - * in the cache. - */ -static uint -get_tex_tile(vector unsigned int ij) +static void +get_four_texels(vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { - /* tile address: tx,ty */ - const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */ - const uint pos = cache_pos(txty); - - if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) || - (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) { - - /* texture cache miss, fetch tile from main memory */ - const uint tiles_per_row = spu.texture.width / TILE_SIZE; - const uint bytes_per_tile = sizeof(tile_t); - const void *src = (const ubyte *) spu.texture.start - + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile; - - printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", - spu.init.id, - spu_extract(txty,0), - spu_extract(txty,1), - pos, - spu_extract(tex_tile_xy[pos],0), - spu_extract(tex_tile_xy[pos],1)); - - ASSERT_ALIGN16(tex_tiles[pos].ui); - ASSERT_ALIGN16(src); - - mfc_get(tex_tiles[pos].ui, /* dest */ - (unsigned int) src, - bytes_per_tile, /* size */ - TAG_TEXTURE_TILE, - 0, /* tid */ - 0 /* rid */); - - wait_on_mask(1 << TAG_TEXTURE_TILE); - - tex_tile_xy[pos] = txty; - } - else { -#if 0 - printf("SPU %u: tex cache HIT at %d, %d\n", - spu.init.id, tx, ty); -#endif - } - - return pos; + const unsigned texture_ea = (uintptr_t) spu.texture.start; + vec_uint4 tile_x = spu_rlmask(x, -5); + vec_uint4 tile_y = spu_rlmask(y, -5); + const qword offset_x = si_andi((qword) x, 0x1f); + const qword offset_y = si_andi((qword) y, 0x1f); + + const qword tiles_per_row = (qword) spu_splats(spu.texture.width / TILE_SIZE); + const qword tile_size = (qword) spu_splats(sizeof(tile_t)); + + qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); + tile_offset = si_mpy((qword) tile_offset, tile_size); + + qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); + texel_offset = si_mpyui(texel_offset, 4); + + vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); + + spu_dcache_fetch_unaligned((qword *) & texels[0], + texture_ea + spu_extract(offset, 0), 4); + spu_dcache_fetch_unaligned((qword *) & texels[1], + texture_ea + spu_extract(offset, 1), 4); + spu_dcache_fetch_unaligned((qword *) & texels[2], + texture_ea + spu_extract(offset, 2), 4); + spu_dcache_fetch_unaligned((qword *) & texels[3], + texture_ea + spu_extract(offset, 3), 4); } - /** * Get texture sample at texcoord. * XXX this is extremely primitive for now. @@ -134,9 +104,7 @@ sample_texture_nearest(vector float texcoord) vector float tc = spu_mul(texcoord, spu.tex_size); vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ - vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ - uint pos = get_tex_tile(itc); - uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; + uint texel = get_texel(itc); return spu_unpack_A8R8G8B8(texel); } @@ -144,49 +112,33 @@ sample_texture_nearest(vector float texcoord) vector float sample_texture_bilinear(vector float texcoord) { - static const vector unsigned int offset10 = {1, 0, 0, 0}; - static const vector unsigned int offset01 = {0, 1, 0, 0}; + static const vec_uint4 offset_x = {0, 0, 1, 1}; + static const vec_uint4 offset_y = {0, 1, 0, 1}; vector float tc = spu_mul(texcoord, spu.tex_size); tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ /* integer texcoords S,T: */ - vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ - vector unsigned int itc01 = spu_add(itc00, offset01); - vector unsigned int itc10 = spu_add(itc00, offset10); - vector unsigned int itc11 = spu_add(itc10, offset01); - - /* mask (GL_REPEAT) */ - itc00 = spu_and(itc00, spu.tex_size_mask); - itc01 = spu_and(itc01, spu.tex_size_mask); - itc10 = spu_and(itc10, spu.tex_size_mask); - itc11 = spu_and(itc11, spu.tex_size_mask); - - /* intra tile addr */ - vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); - vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1); - vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); - vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); - - /* get tile cache positions */ - uint pos00 = get_tex_tile(itc00); - uint pos01, pos10, pos11; - if ((spu_extract(ij00, 0) < TILE_SIZE-1) && - (spu_extract(ij00, 1) < TILE_SIZE-1)) { - /* all texels are in the same tile */ - pos01 = pos10 = pos11 = pos00; - } - else { - pos01 = get_tex_tile(itc01); - pos10 = get_tex_tile(itc10); - pos11 = get_tex_tile(itc11); - } - - /* get texels from tiles and convert to float[4] */ - vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); - vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); - vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); - vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); + vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */ + + vec_uint4 texels[4]; + + vec_uint4 x = spu_splats(spu_extract(itc, 0)); + vec_uint4 y = spu_splats(spu_extract(itc, 1)); + + x = spu_add(x, offset_x); + y = spu_add(y, offset_y); + + x = spu_and(x, spu.tex_size_x_mask); + y = spu_and(y, spu.tex_size_y_mask); + + get_four_texels(x, y, texels); + + vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); + vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); + vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0)); + vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0)); + /* Compute weighting factors in [0,1] * Multiply texcoord by 1024, AND with 1023, convert back to float. -- cgit v1.2.3 From c66ec5c7a2966df3e3456dfca3eb17c294b30dd5 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 15:06:55 -0700 Subject: gallium: remove uses_kill field from softpipe_shader --- src/gallium/drivers/softpipe/sp_quad.c | 2 +- src/gallium/drivers/softpipe/sp_state.h | 2 -- src/gallium/drivers/softpipe/sp_state_fs.c | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 0aaf94021f8..8603c1a3677 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -60,7 +60,7 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && - !sp->fs->uses_kill && + !sp->fs->info.uses_kill && !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index b1070e185a2..895976467c5 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -66,8 +66,6 @@ struct sp_fragment_shader { struct tgsi_shader_info info; - boolean uses_kill; - void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 2715dca0f09..eb641ed321d 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -64,9 +64,6 @@ softpipe_create_fs_state(struct pipe_context *pipe, /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &state->info); - /* convenience field */ - state->uses_kill = (state->info.opcode_count[TGSI_OPCODE_KIL] || - state->info.opcode_count[TGSI_OPCODE_KILP]); return state; } -- cgit v1.2.3 From 80a9b5e1d962c17530e3bcb34c0d1ac4aae8fa7f Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 15:11:12 -0700 Subject: gallium: remove unnecessary tgsi_interp_coef decl --- src/gallium/drivers/softpipe/sp_state.h | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 895976467c5..3943d4ed2b5 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -53,7 +53,6 @@ struct tgsi_sampler; -struct tgsi_interp_coef; struct tgsi_exec_machine; -- cgit v1.2.3 From 0e1e1f12f47d5b1d49f68930b05eadf1143e1396 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 15:45:41 -0700 Subject: gallium/i915: remove unneeded assignment --- src/gallium/drivers/i915simple/i915_texture.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 9cdf3418a96..ef5adff550e 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -491,7 +491,6 @@ i915_texture_create_screen(struct pipe_screen *screen, tex->base = *templat; tex->base.refcount = 1; - tex->base.pipe = NULL; tex->base.screen = screen; if (i915screen->is_i945 ? i945_miptree_layout(tex) : -- cgit v1.2.3 From 681b78fc60b2e60cf9f84802932bf9d2defd28c2 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 15:48:00 -0700 Subject: gallium: remove unnecessary assignment --- src/gallium/drivers/softpipe/sp_texture.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 7c027653135..c605ed925b2 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -91,7 +91,6 @@ softpipe_texture_create_screen(struct pipe_screen *screen, spt->base = *templat; spt->base.refcount = 1; - spt->base.pipe = NULL; spt->base.screen = screen; softpipe_texture_layout(spt); -- cgit v1.2.3 From 6b9a7eb460fe0a9c958b837f2ed49c2d4e303ebc Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 16:03:40 -0700 Subject: gallium: remove dependencies on pipe_shader_state's semantic info --- src/gallium/drivers/softpipe/sp_prim_setup.c | 4 ++-- src/gallium/drivers/softpipe/sp_quad_fs.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 17284539b00..5c8b0bf69cc 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -1163,13 +1163,13 @@ static void setup_begin( struct draw_stage *stage ) { struct setup_stage *setup = setup_stage(stage); struct softpipe_context *sp = setup->softpipe; - const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + const struct sp_fragment_shader *fs = &setup->softpipe->fs; if (sp->dirty) { softpipe_update_derived(sp); } - setup->quad.nr_attrs = fs->num_inputs; + setup->quad.nr_attrs = fs->info.num_inputs; sp->quad.first->begin(sp->quad.first); diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 1794fb5a616..1fbb2e38c4e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -148,7 +148,7 @@ static void shade_begin(struct quad_stage *qs) /* find output slots for depth, color */ qss->colorOutSlot = -1; qss->depthOutSlot = -1; - for (i = 0; i < qss->stage.softpipe->fs->shader.num_outputs; i++) { + for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: qss->depthOutSlot = i; -- cgit v1.2.3 From ea7e86dd4d1e7dbef2642da73bb1980723ae49ef Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 16:05:16 -0700 Subject: gallium/i965: remove dependencies on pipe_shader_state's semantic info The brw_shader_info struct is rendundant and could be removed... --- src/gallium/drivers/i965simple/brw_context.h | 8 ++++++-- src/gallium/drivers/i965simple/brw_draw_upload.c | 2 +- src/gallium/drivers/i965simple/brw_sf.c | 2 +- src/gallium/drivers/i965simple/brw_shader_info.c | 5 +++-- src/gallium/drivers/i965simple/brw_state.c | 9 +++++++-- src/gallium/drivers/i965simple/brw_wm_decl.c | 8 ++++---- 6 files changed, 22 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 65664d853dd..fbc89c889bb 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -38,6 +38,8 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" + #include "brw_structs.h" #include "brw_winsys.h" @@ -203,7 +205,8 @@ struct brw_shader_info { struct brw_vertex_program { struct pipe_shader_state program; - struct brw_shader_info info; + struct tgsi_shader_info info; + struct brw_shader_info info2; /* XXX get rid of this */ int id; }; @@ -211,7 +214,8 @@ struct brw_vertex_program { struct brw_fragment_program { struct pipe_shader_state program; - struct brw_shader_info info; + struct tgsi_shader_info info; + struct brw_shader_info info2; /* XXX get rid of this */ boolean UsesDepth; boolean UsesKill; diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c index aa85d93866a..9c0c78c2369 100644 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -256,7 +256,7 @@ boolean brw_upload_vertex_elements( struct brw_context *brw ) struct brw_vertex_element_packet vep; unsigned i; - unsigned nr_enabled = brw->attribs.VertexProgram->program.num_inputs; + unsigned nr_enabled = brw->attribs.VertexProgram->info.num_inputs; memset(&vep, 0, sizeof(vep)); diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c index 7c83b81c858..54ce5ed9f16 100644 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -133,7 +133,7 @@ static void upload_sf_prog( struct brw_context *brw ) key.vp_output_count = brw->vs.prog_data->outputs_written; /* BRW_NEW_FS */ - key.fp_input_count = brw->attribs.FragmentProgram->info.nr_regs[TGSI_FILE_INPUT]; + key.fp_input_count = brw->attribs.FragmentProgram->info2.nr_regs[TGSI_FILE_INPUT]; /* BRW_NEW_REDUCED_PRIMITIVE */ diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index a762a870fe9..e7e063dead3 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -6,8 +6,9 @@ #include "tgsi/util/tgsi_parse.h" - - +/** + * XXX try to get rid of this. See tgsi_scan_shader() and tgsi_shader_info. + */ void brw_shader_info(const struct tgsi_token *tokens, struct brw_shader_info *info ) { diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 7466fdc403d..6e464652002 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -175,8 +175,10 @@ static void * brw_create_fs_state(struct pipe_context *pipe, brw_fp->program = *shader; brw_fp->id = brw_context(pipe)->program_id++; + tgsi_scan_shader(shader->tokens, &brw_fp->info); + brw_shader_info(shader->tokens, - &brw_fp->info); + &brw_fp->info2); tgsi_dump(shader->tokens, 0); @@ -211,8 +213,11 @@ static void *brw_create_vs_state(struct pipe_context *pipe, */ brw_vp->program = *shader; brw_vp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_vp->info); + brw_shader_info(shader->tokens, - &brw_vp->info); + &brw_vp->info2); tgsi_dump(shader->tokens, 0); diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index 97418a52e7f..afea042bf0a 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -259,7 +259,7 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Then a copy of our part of the CURBE entry: */ { - int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT]; + int nr_constants = c->fp->info2.nr_regs[TGSI_FILE_CONSTANT]; int index = 0; c->prog_data.max_const = 4*nr_constants; @@ -282,7 +282,7 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Next we receive the plane coefficients for parameter * interpolation: */ - for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) { + for (i = 0; i < c->fp->info2.nr_regs[TGSI_FILE_INPUT]; i++) { c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); c->reg_index += 2; } @@ -302,11 +302,11 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Now allocate room for the interpolated inputs and staging * registers for the outputs: */ - for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) + for (i = 0; i < c->fp->info2.nr_regs[TGSI_FILE_INPUT]; i++) for (j = 0; j < 4; j++) c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); - for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++) + for (i = 0; i < c->fp->info2.nr_regs[TGSI_FILE_OUTPUT]; i++) for (j = 0; j < 4; j++) c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); -- cgit v1.2.3 From bad54d0b4dbe62aed6fad1d2725f7fe52a987440 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 16:09:17 -0700 Subject: gallium/i965: remove UsesDepth, UsesKill - use tgsi_shader_info instead --- src/gallium/drivers/i965simple/brw_context.h | 2 -- src/gallium/drivers/i965simple/brw_wm.c | 4 ++-- src/gallium/drivers/i965simple/brw_wm_state.c | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index fbc89c889bb..170d50c5c37 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -218,8 +218,6 @@ struct brw_fragment_program { struct brw_shader_info info2; /* XXX get rid of this */ boolean UsesDepth; - boolean UsesKill; - boolean ComputesDepth; int id; }; diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c index 539b1707444..1c4b5b5ede2 100644 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -94,11 +94,11 @@ static void brw_wm_populate_key( struct brw_context *brw, /* Build the index for table lookup */ /* BRW_NEW_DEPTH_STENCIL */ - if (fp->UsesKill || + if (fp->info.uses_kill || brw->attribs.DepthStencil->alpha.enabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->ComputesDepth) + if (fp->info.writes_z) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; if (brw->attribs.DepthStencil->depth.enabled) diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c index 5ccd4888423..f3aa36b07f8 100644 --- a/src/gallium/drivers/i965simple/brw_wm_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -117,11 +117,11 @@ static void upload_wm_unit(struct brw_context *brw ) if (fp->UsesDepth) wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - if (fp->ComputesDepth) + if (fp->info.writes_z) wm.wm5.program_computes_depth = 1; /* BRW_NEW_ALPHA_TEST */ - if (fp->UsesKill || + if (fp->info.uses_kill || brw->attribs.DepthStencil->alpha.enabled) wm.wm5.program_uses_killpixel = 1; -- cgit v1.2.3 From 7ba1afb03308685eb07d6b88184906ac42f60c2b Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 16:11:14 -0700 Subject: gallium/i965: added const to silence warnings --- src/gallium/drivers/i965simple/brw_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 170d50c5c37..642db5b5a26 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -504,7 +504,7 @@ struct brw_context /* Arrays with buffer objects to copy non-bufferobj arrays into * for upload: */ - struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; + const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; -- cgit v1.2.3 From f504d87240542016213569b5da89e251adebc31d Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 16:11:26 -0700 Subject: gallium/i965: silence warnings --- src/gallium/drivers/i965simple/brw_surface.c | 1 + src/gallium/drivers/i965simple/brw_vs_emit.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index dc4846d39fc..c99a91dcf76 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -164,6 +164,7 @@ brw_surface_fill(struct pipe_context *pipe, void brw_init_surface_functions(struct brw_context *brw) { + (void) brw_surface_data; /* silence warning */ brw->pipe.surface_copy = brw_surface_copy; brw->pipe.surface_fill = brw_surface_fill; } diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 05df4860eda..33bbdc95c68 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -1051,7 +1051,7 @@ static void process_instruction(struct brw_vs_compile *c, { struct brw_reg args[3], dst; struct brw_compile *p = &c->func; - struct brw_indirect stack_index = brw_indirect(0, 0); + /*struct brw_indirect stack_index = brw_indirect(0, 0);*/ unsigned i; unsigned index; unsigned file; -- cgit v1.2.3 From dacf91fe587a777eed95b9767bc6b4ccdc7de71c Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 16:22:08 -0700 Subject: gallium/i965: remove brw_shader_info struct The info it contained is now found in tgsi_shader_info. Added a few assertions to catch potential misunderstandings about register counts vs. highest register index used. --- src/gallium/drivers/i965simple/Makefile | 1 - src/gallium/drivers/i965simple/SConscript | 1 - src/gallium/drivers/i965simple/brw_context.h | 13 +------------ src/gallium/drivers/i965simple/brw_sf.c | 2 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 5 ++++- src/gallium/drivers/i965simple/brw_state.h | 7 ------- src/gallium/drivers/i965simple/brw_wm_decl.c | 18 ++++++++++++++---- 8 files changed, 21 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 8589ebdf964..e97146e57ca 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -30,7 +30,6 @@ C_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ - brw_shader_info.c \ brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript index e0cc78826ec..c0825c4de37 100644 --- a/src/gallium/drivers/i965simple/SConscript +++ b/src/gallium/drivers/i965simple/SConscript @@ -29,7 +29,6 @@ i965simple = env.ConvenienceLibrary( 'brw_sf.c', 'brw_sf_emit.c', 'brw_sf_state.c', - 'brw_shader_info.c', 'brw_state.c', 'brw_state_batch.c', 'brw_state_cache.c', diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 642db5b5a26..4da3a8cffcf 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -197,33 +197,22 @@ struct brw_state_flags { }; -struct brw_shader_info { - int nr_regs[8]; /* TGSI_FILE_* */ -}; - - - struct brw_vertex_program { struct pipe_shader_state program; struct tgsi_shader_info info; - struct brw_shader_info info2; /* XXX get rid of this */ int id; }; - struct brw_fragment_program { struct pipe_shader_state program; struct tgsi_shader_info info; - struct brw_shader_info info2; /* XXX get rid of this */ - boolean UsesDepth; + boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ int id; }; - - struct pipe_setup_linkage { struct { unsigned vp_output:5; diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c index 54ce5ed9f16..c3b815a82be 100644 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -133,7 +133,7 @@ static void upload_sf_prog( struct brw_context *brw ) key.vp_output_count = brw->vs.prog_data->outputs_written; /* BRW_NEW_FS */ - key.fp_input_count = brw->attribs.FragmentProgram->info2.nr_regs[TGSI_FILE_INPUT]; + key.fp_input_count = brw->attribs.FragmentProgram->info.file_max[TGSI_FILE_INPUT] + 1; /* BRW_NEW_REDUCED_PRIMITIVE */ diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index e7e063dead3..f4694a4433b 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -7,7 +7,7 @@ /** - * XXX try to get rid of this. See tgsi_scan_shader() and tgsi_shader_info. + * XXX this obsolete new and no longer compiled. */ void brw_shader_info(const struct tgsi_token *tokens, struct brw_shader_info *info ) diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 6e464652002..6744a8aa4f7 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -177,8 +177,10 @@ static void * brw_create_fs_state(struct pipe_context *pipe, tgsi_scan_shader(shader->tokens, &brw_fp->info); +#if 0 brw_shader_info(shader->tokens, &brw_fp->info2); +#endif tgsi_dump(shader->tokens, 0); @@ -216,9 +218,10 @@ static void *brw_create_vs_state(struct pipe_context *pipe, tgsi_scan_shader(shader->tokens, &brw_vp->info); +#if 0 brw_shader_info(shader->tokens, &brw_vp->info2); - +#endif tgsi_dump(shader->tokens, 0); return (void *)brw_vp; diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h index 258e9a556e2..de0a6371b84 100644 --- a/src/gallium/drivers/i965simple/brw_state.h +++ b/src/gallium/drivers/i965simple/brw_state.h @@ -148,11 +148,4 @@ void brw_invalidate_pools( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); -/* brw_shader_info.c - */ - -void brw_shader_info(const struct tgsi_token *tokens, - struct brw_shader_info *info ); - - #endif diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index afea042bf0a..b50d768062a 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -259,9 +259,12 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Then a copy of our part of the CURBE entry: */ { - int nr_constants = c->fp->info2.nr_regs[TGSI_FILE_CONSTANT]; + int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1; int index = 0; + /* XXX number of constants, or highest numbered constant? */ + assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]); + c->prog_data.max_const = 4*nr_constants; for (i = 0; i < nr_constants; i++) { for (j = 0; j < 4; j++, index++) @@ -282,7 +285,8 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Next we receive the plane coefficients for parameter * interpolation: */ - for (i = 0; i < c->fp->info2.nr_regs[TGSI_FILE_INPUT]; i++) { + assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs); + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) { c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); c->reg_index += 2; } @@ -302,11 +306,17 @@ static void prealloc_reg(struct brw_wm_compile *c) /* Now allocate room for the interpolated inputs and staging * registers for the outputs: */ - for (i = 0; i < c->fp->info2.nr_regs[TGSI_FILE_INPUT]; i++) + /* XXX do we want to loop over the _number_ of inputs/outputs or loop + * to the highest input/output index that's used? + * Probably the same, actually. + */ + assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs); + assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs); + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) for (j = 0; j < 4; j++) c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); - for (i = 0; i < c->fp->info2.nr_regs[TGSI_FILE_OUTPUT]; i++) + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++) for (j = 0; j < 4; j++) c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); -- cgit v1.2.3 From d612b6fa9b5674e001755265e37924815646ad1a Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 27 Feb 2008 16:17:15 -0700 Subject: cell: fix minor get_tex_surface() breakage --- src/gallium/drivers/cell/ppu/cell_texture.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 28cadad6ed2..bf6f6d3058c 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -210,6 +210,7 @@ static void cell_tile_texture(struct cell_context *cell, struct cell_texture *texture) { + struct pipe_screen *screen = cell->pipe.screen; uint face = 0, level = 0, zslice = 0; struct pipe_surface *surf; const uint w = texture->base.width[0], h = texture->base.height[0]; @@ -221,7 +222,7 @@ cell_tile_texture(struct cell_context *cell, assert(w % TILE_SIZE == 0); assert(h % TILE_SIZE == 0); - surf = cell_get_tex_surface(&cell->pipe, &texture->base, face, level, zslice); + surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice); ASSERT(surf); src = (const uint *) pipe_surface_map(surf); -- cgit v1.2.3 From 46da2f42a8e1bd88086cc17afc58738956d8b699 Mon Sep 17 00:00:00 2001 From: Brian <brian@poulsbo.localnet.net> Date: Wed, 27 Feb 2008 17:21:01 -0700 Subject: gallium/i965: added const to silence warning --- src/gallium/drivers/i965simple/brw_vs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h index 0e58f043b02..070f9dfcaef 100644 --- a/src/gallium/drivers/i965simple/brw_vs.h +++ b/src/gallium/drivers/i965simple/brw_vs.h @@ -52,7 +52,7 @@ struct brw_vs_compile { struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; - struct brw_vertex_program *vp; + const struct brw_vertex_program *vp; unsigned nr_inputs; -- cgit v1.2.3 From 627efcaa8009bb7ed6a7f266f8122df800bb2706 Mon Sep 17 00:00:00 2001 From: Brian <brian@poulsbo.localnet.net> Date: Wed, 27 Feb 2008 17:21:29 -0700 Subject: gallium/i965: remove more dependencies on pipe_shader_state semantic info --- src/gallium/drivers/i965simple/brw_vs.c | 4 ++-- src/gallium/drivers/i965simple/brw_vs_emit.c | 4 ++-- src/gallium/drivers/i965simple/brw_wm_decl.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c index 738c6346d5e..92327e896db 100644 --- a/src/gallium/drivers/i965simple/brw_vs.c +++ b/src/gallium/drivers/i965simple/brw_vs.c @@ -50,8 +50,8 @@ static void do_vs_prog( struct brw_context *brw, brw_init_compile(&c.func); c.vp = vp; - c.prog_data.outputs_written = vp->program.num_outputs; - c.prog_data.inputs_read = vp->program.num_inputs; + c.prog_data.outputs_written = vp->info.num_outputs; + c.prog_data.inputs_read = vp->info.num_inputs; #if 0 if (c.key.copy_edgeflag) { diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 33bbdc95c68..9020fcc0012 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -86,7 +86,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c, /* Allocate input regs: */ - c->nr_inputs = c->vp->program.num_inputs; + c->nr_inputs = c->vp->info.num_inputs; for (i = 0; i < c->nr_inputs; i++) { c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -99,7 +99,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c, c->nr_outputs = 0; c->first_output = reg; mrf = 4; - for (i = 0; i < c->vp->program.num_outputs; i++) { + for (i = 0; i < c->vp->info.num_outputs; i++) { c->nr_outputs++; #if 0 if (i == VERT_RESULT_HPOS) { diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index b50d768062a..74ccfd494a4 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -292,7 +292,7 @@ static void prealloc_reg(struct brw_wm_compile *c) } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = (c->fp->program.num_inputs + 1) * 2; + c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2; c->prog_data.curb_read_length = nr_curbe_regs; /* That's the end of the payload, now we can start allocating registers. -- cgit v1.2.3 From 1774b177b858f9f87d00e54b0bf00e9634e375e9 Mon Sep 17 00:00:00 2001 From: Brian <brian@i915.localnet.net> Date: Wed, 27 Feb 2008 18:46:54 -0700 Subject: gallium: added draw_num_vs_outputs() to query number of post-transform vertex attribs --- src/gallium/auxiliary/draw/draw_context.c | 13 +++++++++++++ src/gallium/auxiliary/draw/draw_context.h | 4 ++++ src/gallium/drivers/softpipe/sp_state_derived.c | 4 ++-- 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 64ada8ce047..3500c34811c 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -281,6 +281,19 @@ draw_find_vs_output(struct draw_context *draw, } +/** + * Return number of vertex shader outputs. + */ +uint +draw_num_vs_outputs(struct draw_context *draw) +{ + uint count = draw->vertex_shader->info.num_outputs; + if (draw->extra_vp_outputs.slot >= 0) + count++; + return count; +} + + /** * Allocate space for temporary post-transform vertices, such as for clipping. */ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index d6685f479b1..99bfef55f43 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -112,6 +112,10 @@ int draw_find_vs_output(struct draw_context *draw, uint semantic_name, uint semantic_index); +uint +draw_num_vs_outputs(struct draw_context *draw); + + /* * Vertex shader functions diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index aa6e3291167..eafbaed4b94 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -61,7 +61,6 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ - const struct pipe_shader_state *vs = &softpipe->vs->shader; const struct sp_fragment_shader *spfs = softpipe->fs; const enum interp_mode colorInterp = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; @@ -74,7 +73,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; vinfo_vbuf->num_attribs = 0; draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - vinfo_vbuf->size = 4 * vs->num_outputs + /* size in dwords or floats */ + vinfo_vbuf->size = 4 * draw_num_vs_outputs(softpipe->draw) + sizeof(struct vertex_header) / 4; } -- cgit v1.2.3 From 5c0a089a5d13baa7a427b70852223990da5f175c Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 28 Feb 2008 13:52:30 +0900 Subject: gallium: Remove extra level of indirecttion. --- src/gallium/drivers/softpipe/sp_prim_setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 5c8b0bf69cc..2feee5c485e 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -1163,7 +1163,7 @@ static void setup_begin( struct draw_stage *stage ) { struct setup_stage *setup = setup_stage(stage); struct softpipe_context *sp = setup->softpipe; - const struct sp_fragment_shader *fs = &setup->softpipe->fs; + const struct sp_fragment_shader *fs = setup->softpipe->fs; if (sp->dirty) { softpipe_update_derived(sp); -- cgit v1.2.3 From 626b8d177b5fdacfac70c09216c02b1c833b91ea Mon Sep 17 00:00:00 2001 From: Michel Dänzer <michel@tungstengraphics.com> Date: Thu, 28 Feb 2008 09:07:38 +0000 Subject: Make sure struct pipe_screen is declared. --- src/gallium/drivers/softpipe/sp_winsys.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index 80f53548081..fc372dba27b 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -46,6 +46,7 @@ struct softpipe_winsys { }; +struct pipe_screen; struct pipe_winsys; struct pipe_context; -- cgit v1.2.3 From 6144c2bd65974f4e5b74936e26451a1ad75cb349 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 28 Feb 2008 17:57:54 -0700 Subject: cell: remove obsolete texture field --- src/gallium/drivers/cell/ppu/cell_texture.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index bf6f6d3058c..e235421107e 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -90,7 +90,6 @@ cell_texture_create_screen(struct pipe_screen *screen, spt->base = *templat; spt->base.refcount = 1; - spt->base.pipe = NULL; spt->base.screen = screen; cell_texture_layout(spt); -- cgit v1.2.3 From ebe3b34ad225e320a09bb4069ce4d24808386327 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 28 Feb 2008 18:02:05 -0700 Subject: cell: convert all points/lines to tris for the time being Allows more programs to look correct. We'll want native points/lines someday. --- src/gallium/drivers/cell/ppu/cell_context.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 2301df5ba52..ccbbd1d331c 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -128,6 +128,10 @@ cell_create_context(struct pipe_screen *screen, cell_init_vbuf(cell); draw_set_rasterize_stage(cell->draw, cell->vbuf); + /* convert all points/lines to tris for the time being */ + draw_wide_point_threshold(cell->draw, 0.0); + draw_wide_line_threshold(cell->draw, 0.0); + /* * SPU stuff */ -- cgit v1.2.3 From 84cc07dc89c0ebce4ad55b4b3684d4420a202683 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 29 Feb 2008 15:03:57 +1100 Subject: nouveau: implement pipe_screen Untested on NV3x/NV5x. Quite possibly broken. --- src/gallium/drivers/nouveau/nouveau_winsys.h | 15 +- src/gallium/drivers/nv30/Makefile | 1 + src/gallium/drivers/nv30/nv30_context.c | 86 +----------- src/gallium/drivers/nv30/nv30_context.h | 2 +- src/gallium/drivers/nv30/nv30_miptree.c | 47 ++++++- src/gallium/drivers/nv30/nv30_screen.c | 151 +++++++++++++++++++++ src/gallium/drivers/nv30/nv30_screen.h | 20 +++ src/gallium/drivers/nv30/nv30_surface.c | 72 ---------- src/gallium/drivers/nv40/Makefile | 1 + src/gallium/drivers/nv40/nv40_context.c | 85 +----------- src/gallium/drivers/nv40/nv40_context.h | 2 + src/gallium/drivers/nv40/nv40_miptree.c | 25 ++-- src/gallium/drivers/nv40/nv40_screen.c | 151 +++++++++++++++++++++ src/gallium/drivers/nv40/nv40_screen.h | 20 +++ src/gallium/drivers/nv40/nv40_surface.c | 41 ------ src/gallium/drivers/nv50/Makefile | 1 + src/gallium/drivers/nv50/nv50_context.c | 91 +------------ src/gallium/drivers/nv50/nv50_context.h | 2 + src/gallium/drivers/nv50/nv50_miptree.c | 29 +++- src/gallium/drivers/nv50/nv50_screen.c | 117 ++++++++++++++++ src/gallium/drivers/nv50/nv50_screen.h | 20 +++ src/gallium/drivers/nv50/nv50_surface.c | 10 -- src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 16 ++- .../winsys/dri/nouveau/nouveau_winsys_softpipe.c | 30 ++-- 24 files changed, 623 insertions(+), 412 deletions(-) create mode 100644 src/gallium/drivers/nv30/nv30_screen.c create mode 100644 src/gallium/drivers/nv30/nv30_screen.h create mode 100644 src/gallium/drivers/nv40/nv40_screen.c create mode 100644 src/gallium/drivers/nv40/nv40_screen.h create mode 100644 src/gallium/drivers/nv50/nv50_screen.c create mode 100644 src/gallium/drivers/nv50/nv50_screen.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index b5e470cfaa4..98d95e94a58 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -49,13 +49,22 @@ struct nouveau_winsys { unsigned, unsigned, unsigned, unsigned, unsigned); }; +extern struct pipe_screen * +nv30_screen_create(struct pipe_winsys *ws, unsigned chipset); + extern struct pipe_context * -nv30_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); +nv30_create(struct pipe_screen *, struct nouveau_winsys *); + +extern struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, unsigned chipset); extern struct pipe_context * -nv40_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); +nv40_create(struct pipe_screen *, struct nouveau_winsys *); + +extern struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, unsigned chipset); extern struct pipe_context * -nv50_create(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); +nv50_create(struct pipe_screen *, struct nouveau_winsys *); #endif diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index b7c252fc986..3f80fb87c9b 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -11,6 +11,7 @@ DRIVER_SOURCES = \ nv30_fragtex.c \ nv30_miptree.c \ nv30_query.c \ + nv30_screen.c \ nv30_state.c \ nv30_state_emit.c \ nv30_surface.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index e9afeb80176..b8452e23b16 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -4,80 +4,7 @@ #include "pipe/p_util.h" #include "nv30_context.h" - -static const char * -nv30_get_name(struct pipe_context *pipe) -{ - struct nv30_context *nv30 = nv30_context(pipe); - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv30->chipset); - return buffer; -} - -static const char * -nv30_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv30_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 2; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv30_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} +#include "nv30_screen.h" static void nv30_flush(struct pipe_context *pipe, unsigned flags) @@ -338,9 +265,10 @@ nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) #define NV35TCL_CHIPSET_3X_MASK 0x000001e0 struct pipe_context * -nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, - unsigned chipset) +nv30_create(struct pipe_screen *screen, struct nouveau_winsys *nvws) { + struct pipe_winsys *pipe_winsys = screen->winsys; + unsigned chipset = nv30_screen(screen)->chipset; struct nv30_context *nv30; int rankine_class = 0, ret; @@ -404,12 +332,9 @@ nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, /* Pipe context setup */ nv30->pipe.winsys = pipe_winsys; + nv30->pipe.screen = screen; nv30->pipe.destroy = nv30_destroy; - nv30->pipe.get_name = nv30_get_name; - nv30->pipe.get_vendor = nv30_get_vendor; - nv30->pipe.get_param = nv30_get_param; - nv30->pipe.get_paramf = nv30_get_paramf; nv30->pipe.draw_arrays = nv30_draw_arrays; nv30->pipe.draw_elements = nv30_draw_elements; @@ -420,7 +345,6 @@ nv30_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, nv30_init_query_functions(nv30); nv30_init_surface_functions(nv30); nv30_init_state_functions(nv30); - nv30_init_miptree_functions(nv30); nv30->draw = draw_create(); assert(nv30->draw); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index d6d16ee8686..c63847a087c 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -91,7 +91,7 @@ nv30_context(struct pipe_context *pipe) extern void nv30_init_state_functions(struct nv30_context *nv30); extern void nv30_init_surface_functions(struct nv30_context *nv30); -extern void nv30_init_miptree_functions(struct nv30_context *nv30); +extern void nv30_init_miptree_functions(struct pipe_screen *screen); extern void nv30_init_query_functions(struct nv30_context *nv30); /* nv30_draw.c */ diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 5fb89f4cfdc..23bcef08ebc 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -4,6 +4,7 @@ #include "pipe/p_inlines.h" #include "nv30_context.h" +#include "nv30_screen.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) @@ -54,9 +55,9 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) } static void -nv30_miptree_create(struct pipe_context *pipe, struct pipe_texture **pt) +nv30_miptree_create(struct pipe_screen *screen, struct pipe_texture **pt) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = screen->winsys; struct nv30_miptree *nv30mt; nv30mt = realloc(*pt, sizeof(struct nv30_miptree)); @@ -77,9 +78,9 @@ nv30_miptree_create(struct pipe_context *pipe, struct pipe_texture **pt) } static void -nv30_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +nv30_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = screen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -96,10 +97,42 @@ nv30_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) } } +static struct pipe_surface * +nv30_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = nv30mt->level[level].pitch / ps->cpp; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv30mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv30mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv30mt->level[level].image_offset[0]; + } + + return ps; +} void -nv30_init_miptree_functions(struct nv30_context *nv30) +nv30_init_miptree_functions(struct pipe_screen *screen) { - nv30->pipe.texture_create = nv30_miptree_create; - nv30->pipe.texture_release = nv30_miptree_release; + struct nv30_screen *nv30screen = nv30_screen(screen); + + nv30screen->screen.texture_create = nv30_miptree_create; + nv30screen->screen.texture_release = nv30_miptree_release; + nv30screen->screen.get_tex_surface = nv30_miptree_surface_get; } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c new file mode 100644 index 00000000000..6d64025528f --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -0,0 +1,151 @@ +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nv30_context.h" +#include "nv30_screen.h" + +static const char * +nv30_screen_get_name(struct pipe_screen *screen) +{ + struct nv30_screen *nv30screen = nv30_screen(screen); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", nv30screen->chipset); + return buffer; +} + +static const char * +nv30_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv30_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 2; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv30_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + case PIPE_CAP_BITMAP_TEXCOORD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv30_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, uint type) +{ + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + } + break; + default: + assert(0); + }; + + return FALSE; +} + +static void +nv30_screen_destroy(struct pipe_screen *screen) +{ + FREE(screen); +} + +struct pipe_screen * +nv30_screen_create(struct pipe_winsys *winsys, unsigned chipset) +{ + struct nv30_screen *nv30screen = CALLOC_STRUCT(nv30_screen); + + if (!nv30screen) + return NULL; + + nv30screen->chipset = chipset; + + nv30screen->screen.winsys = winsys; + + nv30screen->screen.destroy = nv30_screen_destroy; + + nv30screen->screen.get_name = nv30_screen_get_name; + nv30screen->screen.get_vendor = nv30_screen_get_vendor; + nv30screen->screen.get_param = nv30_screen_get_param; + nv30screen->screen.get_paramf = nv30_screen_get_paramf; + nv30screen->screen.is_format_supported = + nv30_screen_is_format_supported; + + nv30_init_miptree_functions(&nv30screen->screen); + return &nv30screen->screen; +} + diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h new file mode 100644 index 00000000000..e55242fbf7f --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -0,0 +1,20 @@ +#ifndef __NV30_SCREEN_H__ +#define __NV30_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv30_screen { + struct pipe_screen screen; + unsigned chipset; +}; + +static INLINE struct nv30_screen * +nv30_screen(struct pipe_screen *screen) +{ + return (struct nv30_screen *)screen; +} + +extern struct pipe_screen * +nv30_screen_create(struct pipe_winsys *winsys, unsigned chipset); + +#endif diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c index 974965679f5..b20a3dd4c15 100644 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -33,76 +33,6 @@ #include "pipe/p_inlines.h" #include "util/p_tile.h" -static boolean -nv30_surface_format_supported(struct pipe_context *pipe, - enum pipe_format format, uint type) -{ - switch (type) { - case PIPE_SURFACE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - break; - case PIPE_TEXTURE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: - case PIPE_FORMAT_U_A8_L8: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - } - break; - default: - assert(0); - }; - - return FALSE; -} - -static struct pipe_surface * -nv30_get_tex_surface(struct pipe_context *pipe, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = pipe->winsys; - struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; - struct pipe_surface *ps; - - ps = ws->surface_alloc(ws); - if (!ps) - return NULL; - pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = nv30mt->level[level].pitch / ps->cpp; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ps->offset = nv30mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ps->offset = nv30mt->level[level].image_offset[zslice]; - } else { - ps->offset = nv30mt->level[level].image_offset[0]; - } - - return ps; -} - static void nv30_surface_copy(struct pipe_context *pipe, unsigned do_flip, struct pipe_surface *dest, unsigned destx, unsigned desty, @@ -130,8 +60,6 @@ nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, void nv30_init_surface_functions(struct nv30_context *nv30) { - nv30->pipe.is_format_supported = nv30_surface_format_supported; - nv30->pipe.get_tex_surface = nv30_get_tex_surface; nv30->pipe.surface_copy = nv30_surface_copy; nv30->pipe.surface_fill = nv30_surface_fill; } diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index fd002b54e78..3369a21574f 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -11,6 +11,7 @@ DRIVER_SOURCES = \ nv40_fragtex.c \ nv40_miptree.c \ nv40_query.c \ + nv40_screen.c \ nv40_state.c \ nv40_state_blend.c \ nv40_state_clip.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 8b5cc693de0..a7f64c6e9e5 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -4,85 +4,12 @@ #include "pipe/p_util.h" #include "nv40_context.h" +#include "nv40_screen.h" #define NV4X_GRCLASS4097_CHIPSETS 0x00000baf #define NV4X_GRCLASS4497_CHIPSETS 0x00005450 #define NV6X_GRCLASS4497_CHIPSETS 0x00000088 -static const char * -nv40_get_name(struct pipe_context *pipe) -{ - struct nv40_context *nv40 = nv40_context(pipe); - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv40->chipset); - return buffer; -} - -static const char * -nv40_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv40_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 4; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv40_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - static void nv40_flush(struct pipe_context *pipe, unsigned flags) { @@ -269,10 +196,11 @@ nv40_destroy(struct pipe_context *pipe) } struct pipe_context * -nv40_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) +nv40_create(struct pipe_screen *pscreen, struct nouveau_winsys *nvws) { + struct pipe_winsys *ws = pscreen->winsys; struct nv40_context *nv40; + unsigned chipset = nv40_screen(pscreen)->chipset; nv40 = CALLOC(1, sizeof(struct nv40_context)); if (!nv40) @@ -288,11 +216,8 @@ nv40_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, nv40->nvws = nvws; nv40->pipe.winsys = ws; + nv40->pipe.screen = pscreen; nv40->pipe.destroy = nv40_destroy; - nv40->pipe.get_name = nv40_get_name; - nv40->pipe.get_vendor = nv40_get_vendor; - nv40->pipe.get_param = nv40_get_param; - nv40->pipe.get_paramf = nv40_get_paramf; nv40->pipe.draw_arrays = nv40_draw_arrays; nv40->pipe.draw_elements = nv40_draw_elements; nv40->pipe.clear = nv40_clear; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 16cc053ad9f..3ddfbd43f60 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -178,6 +178,8 @@ extern void nv40_init_surface_functions(struct nv40_context *nv40); extern void nv40_init_miptree_functions(struct nv40_context *nv40); extern void nv40_init_query_functions(struct nv40_context *nv40); +extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); + /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 5e1c7ade31b..94ba05b7104 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -54,9 +54,9 @@ nv40_miptree_layout(struct nv40_miptree *nv40mt) } static struct pipe_texture * -nv40_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) +nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = pscreen->winsys; struct nv40_miptree *mt; mt = MALLOC(sizeof(struct nv40_miptree)); @@ -64,6 +64,8 @@ nv40_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) return NULL; mt->base = *pt; mt->base.refcount = 1; + mt->base.screen = pscreen; + nv40_miptree_layout(mt); mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, @@ -77,9 +79,9 @@ nv40_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) } static void -nv40_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = pscreen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -102,10 +104,10 @@ nv40_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) } static struct pipe_surface * -nv40_miptree_surface(struct pipe_context *pipe, struct pipe_texture *pt, +nv40_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) { - struct pipe_winsys *ws = pipe->winsys; + struct pipe_winsys *ws = pscreen->winsys; struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; struct pipe_surface *ps; @@ -134,9 +136,14 @@ nv40_miptree_surface(struct pipe_context *pipe, struct pipe_texture *pt, void nv40_init_miptree_functions(struct nv40_context *nv40) { - nv40->pipe.texture_create = nv40_miptree_create; - nv40->pipe.texture_release = nv40_miptree_release; nv40->pipe.texture_update = nv40_miptree_update; - nv40->pipe.get_tex_surface = nv40_miptree_surface; +} + +void +nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv40_miptree_create; + pscreen->texture_release = nv40_miptree_release; + pscreen->get_tex_surface = nv40_miptree_surface; } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c new file mode 100644 index 00000000000..1941598c641 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -0,0 +1,151 @@ +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +static const char * +nv40_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", screen->chipset); + return buffer; +} + +static const char * +nv40_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv40_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + case PIPE_CAP_BITMAP_TEXCOORD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv40_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, uint type) +{ + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + } + break; + default: + assert(0); + }; + + return FALSE; +} + +static void +nv40_screen_destroy(struct pipe_screen *pscreen) +{ + FREE(pscreen); +} + +struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, unsigned chipset) +{ + struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); + + if (!screen) + return NULL; + + screen->chipset = chipset; + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv40_screen_destroy; + + screen->pipe.get_name = nv40_screen_get_name; + screen->pipe.get_vendor = nv40_screen_get_vendor; + screen->pipe.get_param = nv40_screen_get_param; + screen->pipe.get_paramf = nv40_screen_get_paramf; + + screen->pipe.is_format_supported = nv40_screen_surface_format_supported; + + nv40_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h new file mode 100644 index 00000000000..b30a6c5ad57 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -0,0 +1,20 @@ +#ifndef __NV40_SCREEN_H__ +#define __NV40_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv40_screen { + struct pipe_screen pipe; + unsigned chipset; +}; + +static INLINE struct nv40_screen * +nv40_screen(struct pipe_screen *screen) +{ + return (struct nv40_screen *)screen; +} + +extern struct pipe_screen * +nv40_screen_create(struct pipe_winsys *winsys, unsigned chipset); + +#endif diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c index df5d7abdbfc..e8a60116964 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -33,46 +33,6 @@ #include "pipe/p_inlines.h" #include "util/p_tile.h" -static boolean -nv40_surface_format_supported(struct pipe_context *pipe, - enum pipe_format format, uint type) -{ - switch (type) { - case PIPE_SURFACE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - break; - case PIPE_TEXTURE: - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: - case PIPE_FORMAT_U_A8_L8: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - } - break; - default: - assert(0); - }; - - return FALSE; -} - static void nv40_surface_copy(struct pipe_context *pipe, unsigned do_flip, struct pipe_surface *dest, unsigned destx, unsigned desty, @@ -100,7 +60,6 @@ nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, void nv40_init_surface_functions(struct nv40_context *nv40) { - nv40->pipe.is_format_supported = nv40_surface_format_supported; nv40->pipe.surface_copy = nv40_surface_copy; nv40->pipe.surface_fill = nv40_surface_fill; } diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile index 68eb49ff2a3..1c0b82887a1 100644 --- a/src/gallium/drivers/nv50/Makefile +++ b/src/gallium/drivers/nv50/Makefile @@ -9,6 +9,7 @@ DRIVER_SOURCES = \ nv50_draw.c \ nv50_miptree.c \ nv50_query.c \ + nv50_screen.c \ nv50_state.c \ nv50_surface.c \ nv50_vbo.c diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 3c5a54bfd38..98022809a6c 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -4,85 +4,7 @@ #include "pipe/p_util.h" #include "nv50_context.h" - -static boolean -nv50_is_format_supported(struct pipe_context *pipe, enum pipe_format format, - uint type) -{ - return FALSE; -} - -static const char * -nv50_get_name(struct pipe_context *pipe) -{ - struct nv50_context *nv50 = (struct nv50_context *)pipe; - static char buffer[128]; - - snprintf(buffer, sizeof(buffer), "NV%02X", nv50->chipset); - return buffer; -} - -static const char * -nv50_get_vendor(struct pipe_context *pipe) -{ - return "nouveau"; -} - -static int -nv50_get_param(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 32; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_S3TC: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 8; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 0; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv50_get_paramf(struct pipe_context *pipe, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} +#include "nv50_screen.h" static void nv50_flush(struct pipe_context *pipe, unsigned flags) @@ -134,9 +56,10 @@ nv50_init_hwctx(struct nv50_context *nv50, int tesla_class) #define GRCLASS5097_CHIPSETS 0x00000000 #define GRCLASS8297_CHIPSETS 0x00000010 struct pipe_context * -nv50_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, - unsigned chipset) +nv50_create(struct pipe_screen *pscreen, struct nouveau_winsys *nvws) { + struct pipe_winsys *pipe_winsys = pscreen->winsys; + unsigned chipset = nv50_screen(pscreen)->chipset; struct nv50_context *nv50; int tesla_class, ret; @@ -173,13 +96,9 @@ nv50_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, } nv50->pipe.winsys = pipe_winsys; + nv50->pipe.screen = pscreen; nv50->pipe.destroy = nv50_destroy; - nv50->pipe.is_format_supported = nv50_is_format_supported; - nv50->pipe.get_name = nv50_get_name; - nv50->pipe.get_vendor = nv50_get_vendor; - nv50->pipe.get_param = nv50_get_param; - nv50->pipe.get_paramf = nv50_get_paramf; nv50->pipe.draw_arrays = nv50_draw_arrays; nv50->pipe.draw_elements = nv50_draw_elements; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index b99254f6191..a529bf3c3e3 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -38,6 +38,8 @@ extern void nv50_init_surface_functions(struct nv50_context *nv50); extern void nv50_init_state_functions(struct nv50_context *nv50); extern void nv50_init_query_functions(struct nv50_context *nv50); +extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); + /* nv50_draw.c */ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 0c034ed4387..720d33fda91 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -1,25 +1,46 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "pipe/p_screen.h" #include "nv50_context.h" static struct pipe_texture * -nv50_miptree_create(struct pipe_context *pipe, const struct pipe_texture *pt) +nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { NOUVEAU_ERR("unimplemented\n"); return NULL; } static void -nv50_miptree_release(struct pipe_context *pipe, struct pipe_texture **pt) +nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) { NOUVEAU_ERR("unimplemented\n"); } +static struct pipe_surface * +nv50_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + NOUVEAU_ERR("unimplemented\n"); + return NULL; +} + +void +nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv50_miptree_create; + pscreen->texture_release = nv50_miptree_release; + pscreen->get_tex_surface = nv50_miptree_surface; +} + +static void +nv50_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) +{ +} + void nv50_init_miptree_functions(struct nv50_context *nv50) { - nv50->pipe.texture_create = nv50_miptree_create; - nv50->pipe.texture_release = nv50_miptree_release; + nv50->pipe.texture_update = nv50_miptree_update; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c new file mode 100644 index 00000000000..8bf82eb0bc0 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -0,0 +1,117 @@ +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +static boolean +nv50_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, uint type) +{ + return FALSE; +} + +static const char * +nv50_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", screen->chipset); + return buffer; +} + +static const char * +nv50_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv50_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 32; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv50_screen_destroy(struct pipe_screen *pscreen) +{ + FREE(pscreen); +} + +struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, unsigned chipset) +{ + struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); + + if (!screen) + return NULL; + + screen->chipset = chipset; + + screen->pipe.winsys = ws; + + screen->pipe.destroy = nv50_screen_destroy; + + screen->pipe.get_name = nv50_screen_get_name; + screen->pipe.get_vendor = nv50_screen_get_vendor; + screen->pipe.get_param = nv50_screen_get_param; + screen->pipe.get_paramf = nv50_screen_get_paramf; + + screen->pipe.is_format_supported = nv50_screen_is_format_supported; + + nv50_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h new file mode 100644 index 00000000000..45ebbb8051b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -0,0 +1,20 @@ +#ifndef __NV50_SCREEN_H__ +#define __NV50_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv50_screen { + struct pipe_screen pipe; + unsigned chipset; +}; + +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ + return (struct nv50_screen *)screen; +} + +extern struct pipe_screen * +nv50_screen_create(struct pipe_winsys *winsys, unsigned chipset); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index ca92ff02b89..39cf675a57c 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -33,15 +33,6 @@ #include "pipe/p_inlines.h" #include "util/p_tile.h" -static struct pipe_surface * -nv50_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - NOUVEAU_ERR("unimplemented\n"); - return NULL; -} - static void nv50_surface_copy(struct pipe_context *pipe, unsigned flip, struct pipe_surface *dest, unsigned destx, unsigned desty, @@ -69,7 +60,6 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, void nv50_init_surface_functions(struct nv50_context *nv50) { - nv50->pipe.get_tex_surface = nv50_get_tex_surface; nv50->pipe.surface_copy = nv50_surface_copy; nv50->pipe.surface_fill = nv50_surface_fill; } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index 2ca05d84c60..1d758e29e75 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -72,23 +72,29 @@ struct pipe_context * nouveau_pipe_create(struct nouveau_context *nv) { struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); - struct pipe_context *(*hw_create)(struct pipe_winsys *, - struct nouveau_winsys *, - unsigned); + struct pipe_screen *(*hws_create)(struct pipe_winsys *, + unsigned chipset); + struct pipe_context *(*hw_create)(struct pipe_screen *, + struct nouveau_winsys *); + struct pipe_winsys *ws; + struct pipe_screen *pscreen; if (!nvws) return NULL; switch (nv->chipset & 0xf0) { case 0x30: + hws_create = nv30_screen_create; hw_create = nv30_create; break; case 0x40: case 0x60: + hws_create = nv40_screen_create; hw_create = nv40_create; break; case 0x50: case 0x80: + hws_create = nv50_screen_create; hw_create = nv50_create; break; default: @@ -119,6 +125,8 @@ nouveau_pipe_create(struct nouveau_context *nv) nvws->surface_copy = nouveau_pipe_surface_copy; nvws->surface_fill = nouveau_pipe_surface_fill; - return hw_create(nouveau_create_pipe_winsys(nv), nvws, nv->chipset); + ws = nouveau_create_pipe_winsys(nv); + pscreen = hws_create(ws, nv->chipset); + return hw_create(pscreen, nvws); } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c index 0e1b4273d1e..704f6c77506 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c @@ -61,23 +61,25 @@ nouveau_is_format_supported(struct softpipe_winsys *sws, uint format) return FALSE; } - - struct pipe_context * nouveau_create_softpipe(struct nouveau_context *nv) { - struct nouveau_softpipe_winsys *nvsws; - - nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); - - /* Fill in this struct with callbacks that softpipe will need to - * communicate with the window system, buffer manager, etc. - */ - nvsws->sws.is_format_supported = nouveau_is_format_supported; - nvsws->nv = nv; + struct nouveau_softpipe_winsys *nvsws; + struct pipe_screen *pscreen; + struct pipe_winsys *ws; + + ws = nouveau_create_pipe_winsys(nv); + if (!ws) + return NULL; + pscreen = softpipe_create_screen(ws); + + nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); + if (!nvsws) + return NULL; + + nvsws->sws.is_format_supported = nouveau_is_format_supported; + nvsws->nv = nv; - /* Create the softpipe context: - */ - return softpipe_create(nouveau_create_pipe_winsys(nv), &nvsws->sws); + return softpipe_create(pscreen, ws, &nvsws->sws); } -- cgit v1.2.3 From baaae562f02563c5966b857c61b3eae7341950e3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 29 Feb 2008 22:54:40 +1100 Subject: nouveau: hand nouveau_winsys in with pipe_screen init --- src/gallium/drivers/nouveau/nouveau_winsys.h | 15 +++++++++------ src/gallium/drivers/nv30/nv30_context.c | 3 ++- src/gallium/drivers/nv30/nv30_screen.c | 4 +++- src/gallium/drivers/nv30/nv30_screen.h | 5 ++--- src/gallium/drivers/nv40/nv40_context.c | 3 ++- src/gallium/drivers/nv40/nv40_screen.c | 4 +++- src/gallium/drivers/nv40/nv40_screen.h | 5 ++--- src/gallium/drivers/nv50/nv50_context.c | 3 ++- src/gallium/drivers/nv50/nv50_screen.c | 4 +++- src/gallium/drivers/nv50/nv50_screen.h | 5 ++--- src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 8 ++++---- 11 files changed, 34 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 98d95e94a58..11ca7e80ddb 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -50,21 +50,24 @@ struct nouveau_winsys { }; extern struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, unsigned chipset); +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, + unsigned chipset); extern struct pipe_context * -nv30_create(struct pipe_screen *, struct nouveau_winsys *); +nv30_create(struct pipe_screen *); extern struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, unsigned chipset); +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, + unsigned chipset); extern struct pipe_context * -nv40_create(struct pipe_screen *, struct nouveau_winsys *); +nv40_create(struct pipe_screen *); extern struct pipe_screen * -nv50_screen_create(struct pipe_winsys *ws, unsigned chipset); +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, + unsigned chipset); extern struct pipe_context * -nv50_create(struct pipe_screen *, struct nouveau_winsys *); +nv50_create(struct pipe_screen *); #endif diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index b8452e23b16..522fb132269 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -265,9 +265,10 @@ nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) #define NV35TCL_CHIPSET_3X_MASK 0x000001e0 struct pipe_context * -nv30_create(struct pipe_screen *screen, struct nouveau_winsys *nvws) +nv30_create(struct pipe_screen *screen) { struct pipe_winsys *pipe_winsys = screen->winsys; + struct nouveau_winsys *nvws = nv30_screen(screen)->nvws; unsigned chipset = nv30_screen(screen)->chipset; struct nv30_context *nv30; int rankine_class = 0, ret; diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 6d64025528f..39f2ac1af5c 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -125,7 +125,8 @@ nv30_screen_destroy(struct pipe_screen *screen) } struct pipe_screen * -nv30_screen_create(struct pipe_winsys *winsys, unsigned chipset) +nv30_screen_create(struct pipe_winsys *winsys, struct nouveau_winsys *nvws, + unsigned chipset) { struct nv30_screen *nv30screen = CALLOC_STRUCT(nv30_screen); @@ -133,6 +134,7 @@ nv30_screen_create(struct pipe_winsys *winsys, unsigned chipset) return NULL; nv30screen->chipset = chipset; + nv30screen->nvws = nvws; nv30screen->screen.winsys = winsys; diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h index e55242fbf7f..f878f81e11f 100644 --- a/src/gallium/drivers/nv30/nv30_screen.h +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -5,6 +5,8 @@ struct nv30_screen { struct pipe_screen screen; + + struct nouveau_winsys *nvws; unsigned chipset; }; @@ -14,7 +16,4 @@ nv30_screen(struct pipe_screen *screen) return (struct nv30_screen *)screen; } -extern struct pipe_screen * -nv30_screen_create(struct pipe_winsys *winsys, unsigned chipset); - #endif diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index a7f64c6e9e5..679c2ddc6b5 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -196,11 +196,12 @@ nv40_destroy(struct pipe_context *pipe) } struct pipe_context * -nv40_create(struct pipe_screen *pscreen, struct nouveau_winsys *nvws) +nv40_create(struct pipe_screen *pscreen) { struct pipe_winsys *ws = pscreen->winsys; struct nv40_context *nv40; unsigned chipset = nv40_screen(pscreen)->chipset; + struct nouveau_winsys *nvws = nv40_screen(pscreen)->nvws; nv40 = CALLOC(1, sizeof(struct nv40_context)); if (!nv40) diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 1941598c641..66e84b68908 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -125,7 +125,8 @@ nv40_screen_destroy(struct pipe_screen *pscreen) } struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, unsigned chipset) +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, + unsigned chipset) { struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); @@ -133,6 +134,7 @@ nv40_screen_create(struct pipe_winsys *ws, unsigned chipset) return NULL; screen->chipset = chipset; + screen->nvws = nvws; screen->pipe.winsys = ws; screen->pipe.destroy = nv40_screen_destroy; diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h index b30a6c5ad57..88b8fed26cf 100644 --- a/src/gallium/drivers/nv40/nv40_screen.h +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -5,6 +5,8 @@ struct nv40_screen { struct pipe_screen pipe; + + struct nouveau_winsys *nvws; unsigned chipset; }; @@ -14,7 +16,4 @@ nv40_screen(struct pipe_screen *screen) return (struct nv40_screen *)screen; } -extern struct pipe_screen * -nv40_screen_create(struct pipe_winsys *winsys, unsigned chipset); - #endif diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 98022809a6c..e5054e34f6b 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -56,9 +56,10 @@ nv50_init_hwctx(struct nv50_context *nv50, int tesla_class) #define GRCLASS5097_CHIPSETS 0x00000000 #define GRCLASS8297_CHIPSETS 0x00000010 struct pipe_context * -nv50_create(struct pipe_screen *pscreen, struct nouveau_winsys *nvws) +nv50_create(struct pipe_screen *pscreen) { struct pipe_winsys *pipe_winsys = pscreen->winsys; + struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws; unsigned chipset = nv50_screen(pscreen)->chipset; struct nv50_context *nv50; int tesla_class, ret; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 8bf82eb0bc0..f091779e3b0 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -90,7 +90,8 @@ nv50_screen_destroy(struct pipe_screen *pscreen) } struct pipe_screen * -nv50_screen_create(struct pipe_winsys *ws, unsigned chipset) +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, + unsigned chipset) { struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); @@ -98,6 +99,7 @@ nv50_screen_create(struct pipe_winsys *ws, unsigned chipset) return NULL; screen->chipset = chipset; + screen->nvws = nvws; screen->pipe.winsys = ws; diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 45ebbb8051b..d664816a031 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -5,6 +5,8 @@ struct nv50_screen { struct pipe_screen pipe; + + struct nouveau_winsys *nvws; unsigned chipset; }; @@ -14,7 +16,4 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } -extern struct pipe_screen * -nv50_screen_create(struct pipe_winsys *winsys, unsigned chipset); - #endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index 1d758e29e75..529f5771812 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -73,9 +73,9 @@ nouveau_pipe_create(struct nouveau_context *nv) { struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); struct pipe_screen *(*hws_create)(struct pipe_winsys *, + struct nouveau_winsys *, unsigned chipset); - struct pipe_context *(*hw_create)(struct pipe_screen *, - struct nouveau_winsys *); + struct pipe_context *(*hw_create)(struct pipe_screen *); struct pipe_winsys *ws; struct pipe_screen *pscreen; @@ -126,7 +126,7 @@ nouveau_pipe_create(struct nouveau_context *nv) nvws->surface_fill = nouveau_pipe_surface_fill; ws = nouveau_create_pipe_winsys(nv); - pscreen = hws_create(ws, nv->chipset); - return hw_create(pscreen, nvws); + pscreen = hws_create(ws, nvws, nv->chipset); + return hw_create(pscreen); } -- cgit v1.2.3 From 17f6db9d0197657cd753249ef60355c6fd983032 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 29 Feb 2008 23:08:01 +1100 Subject: nv40: move "channel context" stuff into nv40_screen --- src/gallium/drivers/nv40/nv40_context.c | 168 +------------------------ src/gallium/drivers/nv40/nv40_context.h | 29 +---- src/gallium/drivers/nv40/nv40_fragprog.c | 4 +- src/gallium/drivers/nv40/nv40_fragtex.c | 6 +- src/gallium/drivers/nv40/nv40_query.c | 10 +- src/gallium/drivers/nv40/nv40_screen.c | 128 ++++++++++++++++++- src/gallium/drivers/nv40/nv40_screen.h | 15 +++ src/gallium/drivers/nv40/nv40_state.c | 16 +-- src/gallium/drivers/nv40/nv40_state_blend.c | 2 +- src/gallium/drivers/nv40/nv40_state_emit.c | 4 +- src/gallium/drivers/nv40/nv40_state_fb.c | 34 ++--- src/gallium/drivers/nv40/nv40_state_scissor.c | 2 +- src/gallium/drivers/nv40/nv40_state_stipple.c | 2 +- src/gallium/drivers/nv40/nv40_state_viewport.c | 2 +- src/gallium/drivers/nv40/nv40_vbo.c | 10 +- src/gallium/drivers/nv40/nv40_vertprog.c | 8 +- 16 files changed, 203 insertions(+), 237 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 679c2ddc6b5..084829ce281 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -6,10 +6,6 @@ #include "nv40_context.h" #include "nv40_screen.h" -#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf -#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 -#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 - static void nv40_flush(struct pipe_context *pipe, unsigned flags) { @@ -24,7 +20,7 @@ nv40_flush(struct pipe_context *pipe, unsigned flags) } if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv40->hw->sync, 0); + nvws->notifier_reset(nv40->screen->sync, 0); BEGIN_RING(curie, 0x104, 1); OUT_RING (0); BEGIN_RING(curie, 0x100, 1); @@ -34,149 +30,7 @@ nv40_flush(struct pipe_context *pipe, unsigned flags) FIRE_RING(); if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv40->hw->sync, 0, 0, 2000); -} - -static void -nv40_channel_takedown(struct nv40_channel_context *cnv40) -{ - struct nouveau_winsys *nvws = cnv40->nvws; - - nvws->res_free(&cnv40->vp_exec_heap); - nvws->res_free(&cnv40->vp_data_heap); - nvws->res_free(&cnv40->query_heap); - nvws->notifier_free(&cnv40->query); - nvws->notifier_free(&cnv40->sync); - nvws->grobj_free(&cnv40->curie); - free(cnv40); -} - -static struct nv40_channel_context * -nv40_channel_init(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) -{ - struct nv40_channel_context *cnv40 = NULL; - struct nouveau_stateobj *so; - unsigned curie_class = 0; - int ret; - - switch (chipset & 0xf0) { - case 0x40: - if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV40TCL; - else - if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV44TCL; - break; - case 0x60: - if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) - curie_class = NV44TCL; - break; - default: - break; - } - - if (!curie_class) { - NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); - return NULL; - } - - cnv40 = CALLOC(1, sizeof(struct nv40_channel_context)); - if (!cnv40) - return NULL; - cnv40->chipset = chipset; - cnv40->nvws = nvws; - - /* Notifier for sync purposes */ - ret = nvws->notifier_alloc(nvws, 1, &cnv40->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - /* Query objects */ - ret = nvws->notifier_alloc(nvws, 32, &cnv40->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - ret = nvws->res_init(&cnv40->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv40_channel_takedown(cnv40); - return NULL; - } - - /* Vtxprog resources */ - if (nvws->res_init(&cnv40->vp_exec_heap, 0, 512) || - nvws->res_init(&cnv40->vp_data_heap, 0, 256)) { - nv40_channel_takedown(cnv40); - return NULL; - } - - /* 3D object */ - ret = nvws->grobj_alloc(nvws, curie_class, &cnv40->curie); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* Static curie initialisation */ - so = so_new(128, 0); - so_method(so, cnv40->curie, NV40TCL_DMA_NOTIFY, 1); - so_data (so, cnv40->sync->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_TEXTURE0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->gart->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR1, 1); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_VTXBUF0, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->gart->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, cnv40->query->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_UNK01AC, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - so_method(so, cnv40->curie, NV40TCL_DMA_COLOR2, 2); - so_data (so, nvws->channel->vram->handle); - so_data (so, nvws->channel->vram->handle); - - so_method(so, cnv40->curie, 0x1ea4, 3); - so_data (so, 0x00000010); - so_data (so, 0x01000100); - so_data (so, 0xff800006); - - /* vtxprog output routing */ - so_method(so, cnv40->curie, 0x1fc4, 1); - so_data (so, 0x06144321); - so_method(so, cnv40->curie, 0x1fc8, 2); - so_data (so, 0xedcba987); - so_data (so, 0x00000021); - so_method(so, cnv40->curie, 0x1fd0, 1); - so_data (so, 0x00171615); - so_method(so, cnv40->curie, 0x1fd4, 1); - so_data (so, 0x001b1a19); - - so_method(so, cnv40->curie, 0x1ef8, 1); - so_data (so, 0x0020ffff); - so_method(so, cnv40->curie, 0x1d64, 1); - so_data (so, 0x00d30000); - so_method(so, cnv40->curie, 0x1e94, 1); - so_data (so, 0x00000001); - - so_emit(nvws, so); - so_ref(NULL, &so); - nvws->push_flush(nvws->channel, 0); - - return cnv40; + nvws->notifier_wait(nv40->screen->sync, 0, 0, 2000); } static void @@ -186,32 +40,22 @@ nv40_destroy(struct pipe_context *pipe) if (nv40->draw) draw_destroy(nv40->draw); - - if (nv40->hw) { - if (--nv40->hw->refcount == 0) - nv40_channel_takedown(nv40->hw); - } - free(nv40); } struct pipe_context * nv40_create(struct pipe_screen *pscreen) { + struct nv40_screen *screen = nv40_screen(pscreen); struct pipe_winsys *ws = pscreen->winsys; struct nv40_context *nv40; - unsigned chipset = nv40_screen(pscreen)->chipset; - struct nouveau_winsys *nvws = nv40_screen(pscreen)->nvws; + unsigned chipset = screen->chipset; + struct nouveau_winsys *nvws = screen->nvws; nv40 = CALLOC(1, sizeof(struct nv40_context)); if (!nv40) return NULL; - - nv40->hw = nv40_channel_init(ws, nvws, chipset); - if (!nv40->hw) { - nv40_destroy(&nv40->pipe); - return NULL; - } + nv40->screen = screen; nv40->chipset = chipset; nv40->nvws = nvws; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 3ddfbd43f60..3b669594dc3 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -11,7 +11,7 @@ #include "nouveau/nouveau_gldefs.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv40_channel_context *ctx = nv40->hw + struct nv40_screen *ctx = nv40->screen #include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" @@ -59,6 +59,8 @@ enum nv40_state_index { NV40_STATE_MAX = 33 }; +#include "nv40_screen.h" + #define NV40_NEW_BLEND (1 << 0) #define NV40_NEW_RAST (1 << 1) #define NV40_NEW_ZSA (1 << 2) @@ -76,28 +78,6 @@ enum nv40_state_index { #define NV40_FALLBACK_TNL (1 << 0) #define NV40_FALLBACK_RAST (1 << 1) -struct nv40_channel_context { - struct nouveau_winsys *nvws; - unsigned refcount; - - unsigned chipset; - - /* HW graphics objects */ - struct nouveau_grobj *curie; - struct nouveau_notifier *sync; - - /* Query object resources */ - struct nouveau_notifier *query; - struct nouveau_resource *query_heap; - - /* Vtxprog resources */ - struct nouveau_resource *vp_exec_heap; - struct nouveau_resource *vp_data_heap; - - /* Current 3D state of channel */ - struct nouveau_stateobj *state[NV40_STATE_MAX]; -}; - struct nv40_rasterizer_state { struct pipe_rasterizer_state pipe; struct nouveau_stateobj *so; @@ -125,9 +105,10 @@ struct nv40_state { struct nv40_context { struct pipe_context pipe; + struct nouveau_winsys *nvws; + struct nv40_screen *screen; - struct nv40_channel_context *hw; struct draw_context *draw; int chipset; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 2a8abb32a30..3c4ea7e99ed 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -810,11 +810,11 @@ nv40_fragprog_validate(struct nv40_context *nv40) nv40_fragprog_upload(nv40, fp); so = so_new(4, 1); - so_method(so, nv40->hw->curie, NV40TCL_FP_ADDRESS, 1); + so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); - so_method(so, nv40->hw->curie, NV40TCL_FP_CONTROL, 1); + so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); so_data (so, fp->fp_control); so_ref(so, &fp->so); diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 6be8378c087..436f954ceca 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -106,7 +106,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; so = so_new(16, 2); - so_method(so, nv40->hw->curie, NV40TCL_TEX_OFFSET(unit), 8); + so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); @@ -117,7 +117,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | pt->height[0]); so_data (so, ps->bcol); - so_method(so, nv40->hw->curie, NV40TCL_TEX_SIZE1(unit), 1); + so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); return so; @@ -137,7 +137,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) samplers &= ~(1 << unit); so = so_new(2, 0); - so_method(so, nv40->hw->curie, NV40TCL_TEX_ENABLE(unit), 1); + so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 8bca2788b93..03178456246 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -45,9 +45,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - if (nv40->nvws->res_alloc(nv40->hw->query_heap, 1, NULL, &q->object)) + if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) assert(0); - nv40->nvws->notifier_reset(nv40->hw->query, q->object->start); + nv40->nvws->notifier_reset(nv40->screen->query, q->object->start); BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); OUT_RING (1); @@ -82,17 +82,17 @@ nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { unsigned status; - status = nvws->notifier_status(nv40->hw->query, + status = nvws->notifier_status(nv40->screen->query, q->object->start); if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { if (wait == FALSE) return FALSE; - nvws->notifier_wait(nv40->hw->query, q->object->start, + nvws->notifier_wait(nv40->screen->query, q->object->start, NV_NOTIFY_STATE_STATUS_COMPLETED, 0); } - q->result = nvws->notifier_retval(nv40->hw->query, + q->result = nvws->notifier_retval(nv40->screen->query, q->object->start); q->ready = TRUE; nvws->res_free(&q->object); diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 66e84b68908..268ca83ce0d 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -4,6 +4,10 @@ #include "nv40_context.h" #include "nv40_screen.h" +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + static const char * nv40_screen_get_name(struct pipe_screen *pscreen) { @@ -121,6 +125,16 @@ nv40_screen_surface_format_supported(struct pipe_screen *pscreen, static void nv40_screen_destroy(struct pipe_screen *pscreen) { + struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->res_free(&screen->vp_exec_heap); + nvws->res_free(&screen->vp_data_heap); + nvws->res_free(&screen->query_heap); + nvws->notifier_free(&screen->query); + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->curie); + FREE(pscreen); } @@ -129,13 +143,125 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, unsigned chipset) { struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); + struct nouveau_stateobj *so; + unsigned curie_class; + int ret; if (!screen) return NULL; - screen->chipset = chipset; screen->nvws = nvws; + /* 3D object */ + switch (chipset & 0xf0) { + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV40TCL; + else + if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + default: + break; + } + + if (!curie_class) { + NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->res_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&screen->vp_exec_heap, 0, 512) || + nvws->res_init(&screen->vp_data_heap, 0, 256)) { + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static curie initialisation */ + so = so_new(128, 0); + so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle); + so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + + so_method(so, screen->curie, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, screen->curie, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, screen->curie, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, screen->curie, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, screen->curie, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, screen->curie, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, screen->curie, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, screen->curie, 0x1e94, 1); + so_data (so, 0x00000001); + + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws->channel, 0); + screen->pipe.winsys = ws; screen->pipe.destroy = nv40_screen_destroy; diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h index 88b8fed26cf..9f9668dbb6b 100644 --- a/src/gallium/drivers/nv40/nv40_screen.h +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -8,6 +8,21 @@ struct nv40_screen { struct nouveau_winsys *nvws; unsigned chipset; + + /* HW graphics objects */ + struct nouveau_grobj *curie; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV40_STATE_MAX]; }; static INLINE struct nv40_screen * diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 24335fbc44d..caa2f9df0c6 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -10,7 +10,7 @@ nv40_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_grobj *curie = nv40->hw->curie; + struct nouveau_grobj *curie = nv40->screen->curie; struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); struct nouveau_stateobj *so = so_new(16, 0); @@ -286,7 +286,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); struct nouveau_stateobj *so = so_new(32, 0); - struct nouveau_grobj *curie = nv40->hw->curie; + struct nouveau_grobj *curie = nv40->screen->curie; /*XXX: ignored: * light_twoside @@ -420,18 +420,18 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); struct nouveau_stateobj *so = so_new(32, 0); - so_method(so, nv40->hw->curie, NV40TCL_DEPTH_FUNC, 3); + so_method(so, nv40->screen->curie, NV40TCL_DEPTH_FUNC, 3); so_data (so, nvgl_comparison_op(cso->depth.func)); so_data (so, cso->depth.writemask ? 1 : 0); so_data (so, cso->depth.enabled ? 1 : 0); - so_method(so, nv40->hw->curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + so_method(so, nv40->screen->curie, NV40TCL_ALPHA_TEST_ENABLE, 3); so_data (so, cso->alpha.enabled ? 1 : 0); so_data (so, nvgl_comparison_op(cso->alpha.func)); so_data (so, float_to_ubyte(cso->alpha.ref)); if (cso->stencil[0].enabled) { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); + so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, cso->stencil[0].enabled ? 1 : 0); so_data (so, cso->stencil[0].write_mask); so_data (so, nvgl_comparison_op(cso->stencil[0].func)); @@ -441,12 +441,12 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); } else { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); + so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_BACK_ENABLE, 8); + so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 8); so_data (so, cso->stencil[1].enabled ? 1 : 0); so_data (so, cso->stencil[1].write_mask); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); @@ -456,7 +456,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); } else { - so_method(so, nv40->hw->curie, NV40TCL_STENCIL_BACK_ENABLE, 1); + so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c index dd09830aa3d..95e6d7394f4 100644 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -21,7 +21,7 @@ nv40_state_blend_colour_validate(struct nv40_context *nv40) struct nouveau_stateobj *so = so_new(2, 0); struct pipe_blend_color *bcol = &nv40->blend_colour; - so_method(so, nv40->hw->curie, NV40TCL_BLEND_COLOR, 1); + so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | (float_to_ubyte(bcol->color[0]) << 16) | (float_to_ubyte(bcol->color[1]) << 8) | diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index bb2ce0f7221..221503617c3 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -67,8 +67,8 @@ nv40_state_emit(struct nv40_context *nv40) while (state->dirty) { unsigned idx = ffsll(state->dirty) - 1; - so_ref (state->hw[idx], &nv40->hw->state[idx]); - so_emit(nv40->nvws, nv40->hw->state[idx]); + so_ref (state->hw[idx], &nv40->screen->state[idx]); + so_emit(nv40->nvws, nv40->screen->state[idx]); state->dirty &= ~(1ULL << idx); } diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 3d0ab920030..71795ab1824 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -72,73 +72,73 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) } if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR0, 1); + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR0_PITCH, 2); + so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); so_data (so, rt[0]->pitch * rt[0]->cpp); so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR1, 1); + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR1_OFFSET, 2); + so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_data (so, rt[1]->pitch * rt[1]->cpp); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR2, 1); + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR2_OFFSET, 1); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_COLOR2_PITCH, 1); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); so_data (so, rt[2]->pitch * rt[2]->cpp); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_COLOR3, 1); + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_COLOR3_OFFSET, 1); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_COLOR3_PITCH, 1); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); so_data (so, rt[3]->pitch * rt[3]->cpp); } if (zeta_format) { - so_method(so, nv40->hw->curie, NV40TCL_DMA_ZETA, 1); + so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); - so_method(so, nv40->hw->curie, NV40TCL_ZETA_OFFSET, 1); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); so_reloc (so, zeta->buffer, zeta->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv40->hw->curie, NV40TCL_ZETA_PITCH, 1); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); so_data (so, zeta->pitch * zeta->cpp); } - so_method(so, nv40->hw->curie, NV40TCL_RT_ENABLE, 1); + so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1); so_data (so, rt_enable); - so_method(so, nv40->hw->curie, NV40TCL_RT_HORIZ, 3); + so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3); so_data (so, (w << 16) | 0); so_data (so, (h << 16) | 0); so_data (so, rt_format); - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_HORIZ, 2); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2); so_data (so, (w << 16) | 0); so_data (so, (h << 16) | 0); - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); so_data (so, ((w - 1) << 16) | 0); so_data (so, ((h - 1) << 16) | 0); diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index 09ffc49f965..9e9eadc5116 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40) return FALSE; so = so_new(3, 0); - so_method(so, nv40->hw->curie, NV40TCL_SCISSOR_HORIZ, 2); + so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); if (rast->scissor) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); so_data (so, ((s->maxy - s->miny) << 16) | s->miny); diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index 001c396d747..b51024ad9b2 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -4,7 +4,7 @@ static boolean nv40_state_stipple_validate(struct nv40_context *nv40) { struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; - struct nouveau_grobj *curie = nv40->hw->curie; + struct nouveau_grobj *curie = nv40->screen->curie; struct nouveau_stateobj *so; if (nv40->state.hw[NV40_STATE_STIPPLE] && diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 9616be5052a..3a325339072 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -6,7 +6,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40) struct nouveau_stateobj *so = so_new(9, 0); struct pipe_viewport_state *vpt = &nv40->viewport; - so_method(so, nv40->hw->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); so_data (so, fui(vpt->translate[0])); so_data (so, fui(vpt->translate[1])); so_data (so, fui(vpt->translate[2])); diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 1653ebf2a7e..bedc8c6d4ef 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -52,7 +52,7 @@ nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, } /* No support for 8bit indices, no support at all on 0x4497 chips */ - if (nv40->hw->curie->grclass == NV44TCL || ib_size == 1) + if (nv40->screen->curie->grclass == NV44TCL || ib_size == 1) return FALSE; switch (ib_size) { @@ -365,9 +365,9 @@ nv40_vbo_validate(struct nv40_context *nv40) num_hw++; vtxbuf = so_new(20, 18); - so_method(vtxbuf, nv40->hw->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); + so_method(vtxbuf, nv40->screen->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); vtxfmt = so_new(17, 0); - so_method(vtxfmt, nv40->hw->curie, NV40TCL_VTXFMT(0), num_hw); + so_method(vtxfmt, nv40->screen->curie, NV40TCL_VTXFMT(0), num_hw); inputs = vp->ir; for (hw = 0; hw < num_hw; hw++) { @@ -399,13 +399,13 @@ nv40_vbo_validate(struct nv40_context *nv40) } if (ib) { - so_method(vtxbuf, nv40->hw->curie, NV40TCL_IDXBUF_ADDRESS, 2); + so_method(vtxbuf, nv40->screen->curie, NV40TCL_IDXBUF_ADDRESS, 2); so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, 0, NV40TCL_IDXBUF_FORMAT_DMA1); } - so_method(vtxbuf, nv40->hw->curie, 0x1710, 1); + so_method(vtxbuf, nv40->screen->curie, 0x1710, 1); so_data (vtxbuf, 0); so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index d3ed57b1998..5b7a343e55d 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -655,7 +655,7 @@ nv40_vertprog_validate(struct nv40_context *nv40) check_gpu_resources: /* Allocate hw vtxprog exec slots */ if (!vp->exec) { - struct nouveau_resource *heap = nv40->hw->vp_exec_heap; + struct nouveau_resource *heap = nv40->screen->vp_exec_heap; struct nouveau_stateobj *so; uint vplen = vp->nr_insns; @@ -672,9 +672,9 @@ check_gpu_resources: } so = so_new(5, 0); - so_method(so, nv40->hw->curie, NV40TCL_VP_START_FROM_ID, 1); + so_method(so, nv40->screen->curie, NV40TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); - so_method(so, nv40->hw->curie, NV40TCL_VP_ATTRIB_EN, 2); + so_method(so, nv40->screen->curie, NV40TCL_VP_ATTRIB_EN, 2); so_data (so, vp->ir); so_data (so, vp->or); so_ref(so, &vp->so); @@ -684,7 +684,7 @@ check_gpu_resources: /* Allocate hw vtxprog const slots */ if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv40->hw->vp_data_heap; + struct nouveau_resource *heap = nv40->screen->vp_data_heap; if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { while (heap->next && heap->size < vp->nr_consts) { -- cgit v1.2.3 From 4d22330837278574c7f06bdc9e12ffffb659f43d Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Fri, 29 Feb 2008 10:32:25 +0100 Subject: scons: List sp_screen.c. --- src/gallium/drivers/softpipe/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 4c1a6d5df0b..88c21ee3b0f 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -28,6 +28,7 @@ softpipe = env.ConvenienceLibrary( 'sp_quad_stencil.c', 'sp_quad_stipple.c', 'sp_query.c', + 'sp_screen.c', 'sp_state_blend.c', 'sp_state_clip.c', 'sp_state_derived.c', -- cgit v1.2.3 From b560ed2444383b9634786fe742b8cb6f5cdfc781 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 2 Mar 2008 14:56:42 +1100 Subject: nouveau: enable multi-context/single-channel support for nv40 --- src/gallium/drivers/nouveau/nouveau_winsys.h | 6 +-- src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv40/nv40_context.c | 3 +- src/gallium/drivers/nv40/nv40_context.h | 1 + src/gallium/drivers/nv40/nv40_screen.h | 2 + src/gallium/drivers/nv40/nv40_state_emit.c | 10 ++++ src/gallium/drivers/nv50/nv50_context.c | 2 +- src/gallium/winsys/dri/nouveau/nouveau_context.c | 63 +++++++++++++++++++++--- src/gallium/winsys/dri/nouveau/nouveau_context.h | 8 +++ src/gallium/winsys/dri/nouveau/nouveau_screen.h | 2 + src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 10 ++-- 11 files changed, 94 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 11ca7e80ddb..44c8bb919bb 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -54,20 +54,20 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, unsigned chipset); extern struct pipe_context * -nv30_create(struct pipe_screen *); +nv30_create(struct pipe_screen *, unsigned pctx_id); extern struct pipe_screen * nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, unsigned chipset); extern struct pipe_context * -nv40_create(struct pipe_screen *); +nv40_create(struct pipe_screen *, unsigned pctx_id); extern struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, unsigned chipset); extern struct pipe_context * -nv50_create(struct pipe_screen *); +nv50_create(struct pipe_screen *, unsigned pctx_id); #endif diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 522fb132269..28d3f057cc4 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -265,7 +265,7 @@ nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) #define NV35TCL_CHIPSET_3X_MASK 0x000001e0 struct pipe_context * -nv30_create(struct pipe_screen *screen) +nv30_create(struct pipe_screen *screen, unsigned pctx_id) { struct pipe_winsys *pipe_winsys = screen->winsys; struct nouveau_winsys *nvws = nv30_screen(screen)->nvws; diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 084829ce281..203c843a013 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -44,7 +44,7 @@ nv40_destroy(struct pipe_context *pipe) } struct pipe_context * -nv40_create(struct pipe_screen *pscreen) +nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) { struct nv40_screen *screen = nv40_screen(pscreen); struct pipe_winsys *ws = pscreen->winsys; @@ -56,6 +56,7 @@ nv40_create(struct pipe_screen *pscreen) if (!nv40) return NULL; nv40->screen = screen; + nv40->pctx_id = pctx_id; nv40->chipset = chipset; nv40->nvws = nvws; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 3b669594dc3..e118776306b 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -108,6 +108,7 @@ struct nv40_context { struct nouveau_winsys *nvws; struct nv40_screen *screen; + unsigned pctx_id; struct draw_context *draw; diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h index 9f9668dbb6b..3ea78aadfd9 100644 --- a/src/gallium/drivers/nv40/nv40_screen.h +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -9,6 +9,8 @@ struct nv40_screen { struct nouveau_winsys *nvws; unsigned chipset; + unsigned cur_pctx; + /* HW graphics objects */ struct nouveau_grobj *curie; struct nouveau_notifier *sync; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 221503617c3..a95e2472e2b 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -62,8 +62,18 @@ static void nv40_state_emit(struct nv40_context *nv40) { struct nv40_state *state = &nv40->state; + struct nv40_screen *screen = nv40->screen; unsigned i, samplers; + if (nv40->pctx_id != screen->cur_pctx) { + for (i = 0; i < NV40_STATE_MAX; i++) { + if (screen->state[i] != state->hw[i] && state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_pctx = nv40->pctx_id; + } + while (state->dirty) { unsigned idx = ffsll(state->dirty) - 1; diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index e5054e34f6b..c937b8de6d4 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -56,7 +56,7 @@ nv50_init_hwctx(struct nv50_context *nv50, int tesla_class) #define GRCLASS5097_CHIPSETS 0x00000000 #define GRCLASS8297_CHIPSETS 0x00000010 struct pipe_context * -nv50_create(struct pipe_screen *pscreen) +nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) { struct pipe_winsys *pipe_winsys = pscreen->winsys; struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index 7915afae226..dc852c9f498 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -4,6 +4,7 @@ #include "utils.h" #include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" #include "pipe/p_defines.h" #include "pipe/p_context.h" @@ -101,7 +102,8 @@ nouveau_context_create(const __GLcontextModes *glVis, struct nouveau_device_priv *nvdev; struct pipe_context *pipe = NULL; struct st_context *st_share = NULL; - int ret; + struct nouveau_channel_context *nvc = NULL; + int i, ret; if (sharedContextPrivate) { st_share = ((struct nouveau_context *)sharedContextPrivate)->st; @@ -163,12 +165,56 @@ nouveau_context_create(const __GLcontextModes *glVis, nv->frontbuffer = fb_surf; } - nv->nvc = nouveau_channel_context_create(&nvdev->base, nv->chipset); - if (!nv->nvc) { - NOUVEAU_ERR("Failed initialising GPU channel context\n"); - return GL_FALSE; + /* Attempt to share a single channel between multiple contexts from + * a single process. + */ + nvc = nv_screen->nvc; + if (!nvc && st_share) { + struct nouveau_context *snv = st_share->pipe->priv; + if (snv) { + nvc = snv->nvc; + } + } + + /*XXX: temporary - disable multi-context/single-channel on non-NV4x */ + switch (nv->chipset & 0xf0) { + case 0x40: + case 0x60: + break; + default: + nvc = NULL; + break; + } + + if (!nvc) { + nvc = nouveau_channel_context_create(&nvdev->base, nv->chipset); + if (!nvc) { + NOUVEAU_ERR("Failed initialising GPU context\n"); + return GL_FALSE; + } + nv_screen->nvc = nvc; + } + + nvc->refcount++; + nv->nvc = nvc; + + /* Find a free slot for a pipe context, allocate a new one if needed */ + nv->pctx_id = -1; + for (i = 0; i < nvc->nr_pctx; i++) { + if (nvc->pctx[i] == NULL) { + nv->pctx_id = i; + break; + } } + if (nv->pctx_id < 0) { + nv->pctx_id = nvc->nr_pctx++; + nvc->pctx = + realloc(nvc->pctx, + sizeof(struct pipe_context *) * nvc->nr_pctx); + } + + /* Create pipe */ if (nv->chipset < 0x50) ret = nouveau_surface_init_nv04(nv); else @@ -201,13 +247,18 @@ void nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) { struct nouveau_context *nv = driContextPriv->driverPrivate; + struct nouveau_channel_context *nvc = nv->nvc; assert(nv); st_flush(nv->st, PIPE_FLUSH_WAIT); st_destroy_context(nv->st); - nouveau_channel_context_destroy(nv->nvc); + if (nv->pctx_id >= 0) { + nvc->pctx[nv->pctx_id] = NULL; + if (--nvc->refcount <= 0) + nouveau_channel_context_destroy(nvc); + } free(nv); } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.h b/src/gallium/winsys/dri/nouveau/nouveau_context.h index 736c8d8bef3..92f551855a4 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.h @@ -14,6 +14,13 @@ struct nouveau_framebuffer { }; struct nouveau_channel_context { + struct pipe_screen *pscreen; + int refcount; + + unsigned cur_pctx; + unsigned nr_pctx; + struct pipe_context **pctx; + unsigned chipset; struct nouveau_channel *channel; @@ -51,6 +58,7 @@ struct nouveau_context { /* Hardware context */ struct nouveau_channel_context *nvc; + int pctx_id; /* pipe_surface accel */ struct pipe_surface *surf_src, *surf_dst; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.h b/src/gallium/winsys/dri/nouveau/nouveau_screen.h index 019823bd44d..e9da2026904 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.h @@ -14,6 +14,8 @@ struct nouveau_screen { uint32_t front_pitch; uint32_t front_cpp; uint32_t front_height; + + void *nvc; }; #endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index 50d7549b1b7..87619bdcfb4 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -70,11 +70,12 @@ nouveau_pipe_emit_reloc(struct nouveau_channel *chan, void *ptr, struct pipe_context * nouveau_pipe_create(struct nouveau_context *nv) { + struct nouveau_channel_context *nvc = nv->nvc; struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); struct pipe_screen *(*hws_create)(struct pipe_winsys *, struct nouveau_winsys *, unsigned chipset); - struct pipe_context *(*hw_create)(struct pipe_screen *); + struct pipe_context *(*hw_create)(struct pipe_screen *, unsigned); struct pipe_winsys *ws; struct pipe_screen *pscreen; @@ -125,7 +126,10 @@ nouveau_pipe_create(struct nouveau_context *nv) nvws->surface_fill = nouveau_pipe_surface_fill; ws = nouveau_create_pipe_winsys(nv); - pscreen = hws_create(ws, nvws, nv->chipset); - return hw_create(pscreen); + + if (!nvc->pscreen) + nvc->pscreen = hws_create(ws, nvws, nv->chipset); + nvc->pctx[nv->pctx_id] = hw_create(nvc->pscreen, nv->pctx_id); + return nvc->pctx[nv->pctx_id]; } -- cgit v1.2.3 From 59d4b7cc626704dbbd9c817019ec2dd9183322ad Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 2 Mar 2008 15:28:24 +1100 Subject: nv40: fix segv when app "skips" texture units. --- src/gallium/drivers/nv40/nv40_state_emit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index a95e2472e2b..9f268640e00 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -67,7 +67,7 @@ nv40_state_emit(struct nv40_context *nv40) if (nv40->pctx_id != screen->cur_pctx) { for (i = 0; i < NV40_STATE_MAX; i++) { - if (screen->state[i] != state->hw[i] && state->hw[i]) + if (state->hw[i] && screen->state[i] != state->hw[i]) state->dirty |= (1ULL << i); } @@ -84,6 +84,8 @@ nv40_state_emit(struct nv40_context *nv40) so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGTEX0+i]); samplers &= ~(1ULL << i); -- cgit v1.2.3 From 57b8711aebdce9bc21bf3311c50dbfb0f9ad6d42 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 2 Mar 2008 16:48:15 +1100 Subject: nv40: nuke debug --- src/gallium/drivers/nv40/nv40_fragprog.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 3c4ea7e99ed..d981a02a639 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -416,8 +416,6 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, ai = fsrc->SrcRegister.Index; src[i] = tgsi_src(fpc, fsrc); } else { - NOUVEAU_MSG("extra src attr %d\n", - fsrc->SrcRegister.Index); src[i] = temp(fpc); arith(fpc, 0, MOV, src[i], MASK_ALL, tgsi_src(fpc, fsrc), none, none); -- cgit v1.2.3 From 1de15ad83e5a6902ac57212a3df63bb9b829bc20 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 3 Mar 2008 00:01:44 +1100 Subject: nv40: re-do vtxbuf format code --- src/gallium/drivers/nv40/nv40_vbo.c | 79 +++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 29 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index bedc8c6d4ef..f16afc23b84 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -9,34 +9,53 @@ #include "nouveau/nouveau_pushbuf.h" static INLINE int -nv40_vbo_ncomp(uint format) +nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) { - int ncomp = 0; - - if (pf_size_x(format)) ncomp++; - if (pf_size_y(format)) ncomp++; - if (pf_size_z(format)) ncomp++; - if (pf_size_w(format)) ncomp++; - - return ncomp; -} - -static INLINE int -nv40_vbo_type(uint format) -{ - switch (pf_type(format)) { - case PIPE_FORMAT_TYPE_FLOAT: - return NV40TCL_VTXFMT_TYPE_FLOAT; - case PIPE_FORMAT_TYPE_UNORM: - return NV40TCL_VTXFMT_TYPE_UBYTE; + char fs[128]; + + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV40TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV40TCL_VTXFMT_TYPE_UBYTE; + break; default: - { - char fs[128]; - pf_sprint_name(fs, format); + pf_sprint_name(fs, pipe); NOUVEAU_ERR("Unknown format %s\n", fs); - return NV40TCL_VTXFMT_TYPE_FLOAT; + return 1; } + + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *ncomp = 4; + break; + default: + pf_sprint_name(fs, pipe); + NOUVEAU_ERR("Unknown format %s\n", fs); + return 1; } + + return 0; } static boolean @@ -82,11 +101,11 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, struct pipe_vertex_buffer *vb) { struct pipe_winsys *ws = nv40->pipe.winsys; - int type, ncomp; + unsigned type, ncomp; void *map; - type = nv40_vbo_type(ve->src_format); - ncomp = nv40_vbo_ncomp(ve->src_format); + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + return FALSE; map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); map += vb->buffer_offset + ve->src_offset; @@ -373,6 +392,7 @@ nv40_vbo_validate(struct nv40_context *nv40) for (hw = 0; hw < num_hw; hw++) { struct pipe_vertex_element *ve; struct pipe_vertex_buffer *vb; + unsigned type, ncomp; if (!(inputs & (1 << hw))) { so_data(vtxbuf, 0); @@ -389,13 +409,14 @@ nv40_vbo_validate(struct nv40_context *nv40) continue; } + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + assert(0); + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, 0, NV40TCL_VTXBUF_ADDRESS_DMA1); so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | - (nv40_vbo_ncomp(ve->src_format) << - NV40TCL_VTXFMT_SIZE_SHIFT) | - nv40_vbo_type(ve->src_format))); + (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); } if (ib) { -- cgit v1.2.3 From 4528287e040415c2071012d02f20979ff995c754 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Wed, 5 Mar 2008 10:50:14 +0100 Subject: gallium: michel's patch to rework texture/sampler binding interface Bind all the samplers/textures at once rather than piecemeal. This is easier for drivers to understand. --- src/gallium/auxiliary/draw/draw_aaline.c | 54 +- src/gallium/auxiliary/draw/draw_pstipple.c | 51 +- src/gallium/drivers/failover/fo_context.h | 7 +- src/gallium/drivers/failover/fo_state.c | 59 +- src/gallium/drivers/failover/fo_state_emit.c | 18 +- src/gallium/drivers/i915simple/i915_context.h | 675 +++++----- src/gallium/drivers/i915simple/i915_state.c | 46 +- src/gallium/drivers/i915simple/i915_state_emit.c | 6 - .../drivers/i915simple/i915_state_sampler.c | 6 +- src/gallium/drivers/i965simple/brw_context.h | 1365 ++++++++++---------- src/gallium/drivers/i965simple/brw_state.c | 896 ++++++------- .../drivers/i965simple/brw_wm_sampler_state.c | 3 +- .../drivers/i965simple/brw_wm_surface_state.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 486 +++---- src/gallium/drivers/softpipe/sp_context.h | 3 + src/gallium/drivers/softpipe/sp_quad_fs.c | 417 +++--- src/gallium/drivers/softpipe/sp_state.h | 390 +++--- src/gallium/drivers/softpipe/sp_state_sampler.c | 56 +- src/gallium/drivers/softpipe/sp_texture.c | 404 +++--- src/gallium/include/pipe/p_context.h | 8 +- src/mesa/state_tracker/st_atom_sampler.c | 16 +- src/mesa/state_tracker/st_atom_texture.c | 15 +- src/mesa/state_tracker/st_cb_drawpixels.c | 11 +- src/mesa/state_tracker/st_context.h | 5 +- src/mesa/state_tracker/st_gen_mipmap.c | 11 +- 25 files changed, 2561 insertions(+), 2449 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 7660e56fe66..3ec73b0800c 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -78,7 +78,8 @@ struct aaline_stage void *sampler_cso; struct pipe_texture *texture; - uint sampler_unit; + uint num_samplers; + uint num_textures; /* @@ -98,11 +99,10 @@ struct aaline_stage void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); - void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); - - void (*driver_set_sampler_texture)(struct pipe_context *, - unsigned sampler, - struct pipe_texture *); + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, + void **); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); struct pipe_context *pipe; }; @@ -607,6 +607,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) auto struct aaline_stage *aaline = aaline_stage(stage); struct draw_context *draw = stage->draw; struct pipe_context *pipe = aaline->pipe; + uint num = MAX2(aaline->num_textures, aaline->num_samplers); assert(draw->rasterizer->line_smooth); @@ -624,8 +625,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) */ bind_aaline_fragment_shader(aaline); - aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso); - aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture); + aaline->state.sampler[num] = aaline->sampler_cso; + aaline->state.texture[num] = aaline->texture; + + aaline->driver_bind_sampler_states(pipe, num + 1, aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, num + 1, aaline->state.texture); /* now really draw first line */ stage->line = aaline_line; @@ -647,10 +651,10 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); /* XXX restore original texture, sampler state */ - aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, - aaline->state.sampler[aaline->sampler_unit]); - aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, - aaline->state.texture[aaline->sampler_unit]); + aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, + aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, aaline->num_textures, + aaline->state.texture); draw->extra_vp_outputs.slot = 0; } @@ -745,26 +749,28 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs) static void -aaline_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +aaline_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); /* save current */ - aaline->state.sampler[unit] = sampler; + memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); + aaline->num_samplers = num; /* pass-through */ - aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler); + aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); } static void -aaline_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, struct pipe_texture *texture) +aaline_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); /* save current */ - aaline->state.texture[sampler] = texture; + memcpy(aaline->state.texture, texture, num * sizeof(struct pipe_texture *)); + aaline->num_textures = num; /* pass-through */ - aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture); + aaline->driver_set_sampler_textures(aaline->pipe, num, texture); } @@ -798,14 +804,14 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) aaline->driver_bind_fs_state = pipe->bind_fs_state; aaline->driver_delete_fs_state = pipe->delete_fs_state; - aaline->driver_bind_sampler_state = pipe->bind_sampler_state; - aaline->driver_set_sampler_texture = pipe->set_sampler_texture; + aaline->driver_bind_sampler_states = pipe->bind_sampler_states; + aaline->driver_set_sampler_textures = pipe->set_sampler_textures; /* override the driver's functions */ pipe->create_fs_state = aaline_create_fs_state; pipe->bind_fs_state = aaline_bind_fs_state; pipe->delete_fs_state = aaline_delete_fs_state; - pipe->bind_sampler_state = aaline_bind_sampler_state; - pipe->set_sampler_texture = aaline_set_sampler_texture; + pipe->bind_sampler_states = aaline_bind_sampler_states; + pipe->set_sampler_textures = aaline_set_sampler_textures; } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 2cfeb813b30..894b136f2c3 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -68,7 +68,8 @@ struct pstip_stage void *sampler_cso; struct pipe_texture *texture; - uint sampler_unit; + uint num_samplers; + uint num_textures; /* * Currently bound state @@ -88,11 +89,10 @@ struct pstip_stage void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); - void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - void (*driver_set_sampler_texture)(struct pipe_context *, - unsigned sampler, - struct pipe_texture *); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); void (*driver_set_polygon_stipple)(struct pipe_context *, const struct pipe_poly_stipple *); @@ -328,8 +328,6 @@ generate_pstip_fs(struct pstip_stage *pstip) tgsi_dump(pstip_fs.tokens, 0); #endif - pstip->sampler_unit = transform.maxSampler + 1; - #if 1 /* XXX remove */ if (transform.wincoordInput < 0) { pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; @@ -486,6 +484,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) struct pstip_stage *pstip = pstip_stage(stage); struct draw_context *draw = stage->draw; struct pipe_context *pipe = pstip->pipe; + uint num = MAX2(pstip->num_textures, pstip->num_samplers); assert(draw->rasterizer->poly_stipple_enable); @@ -494,8 +493,8 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) */ bind_pstip_fragment_shader(pstip); - pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); - pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + pstip->driver_bind_sampler_states(pipe, num + 1, pstip->state.sampler); + pstip->driver_set_sampler_textures(pipe, num + 1, pstip->state.texture); /* now really draw first line */ stage->tri = passthrough_tri; @@ -517,10 +516,10 @@ pstip_flush(struct draw_stage *stage, unsigned flags) pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); /* XXX restore original texture, sampler state */ - pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, - pstip->state.sampler[pstip->sampler_unit]); - pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, - pstip->state.texture[pstip->sampler_unit]); + pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, + pstip->state.sampler); + pstip->driver_set_sampler_textures(pipe, pstip->num_textures, + pstip->state.texture); } @@ -613,26 +612,28 @@ pstip_delete_fs_state(struct pipe_context *pipe, void *fs) static void -pstip_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +pstip_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - pstip->state.sampler[unit] = sampler; + memcpy(pstip->state.sampler, sampler, num * sizeof(void *)); + pstip->num_samplers = num; /* pass-through */ - pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); + pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); } static void -pstip_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, struct pipe_texture *texture) +pstip_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - pstip->state.texture[sampler] = texture; + memcpy(pstip->state.texture, texture, num * sizeof(struct pipe_texture *)); + pstip->num_textures = num; /* pass-through */ - pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); + pstip->driver_set_sampler_textures(pstip->pipe, num, texture); } @@ -682,8 +683,8 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_bind_fs_state = pipe->bind_fs_state; pstip->driver_delete_fs_state = pipe->delete_fs_state; - pstip->driver_bind_sampler_state = pipe->bind_sampler_state; - pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_bind_sampler_states = pipe->bind_sampler_states; + pstip->driver_set_sampler_textures = pipe->set_sampler_textures; pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* override the driver's functions */ @@ -691,7 +692,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_fs_state = pstip_bind_fs_state; pipe->delete_fs_state = pstip_delete_fs_state; - pipe->bind_sampler_state = pstip_bind_sampler_state; - pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->bind_sampler_states = pstip_bind_sampler_states; + pipe->set_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; } diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 1dc87291c9f..8f3ad3ee798 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -87,12 +87,15 @@ struct failover_context { struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + void *sw_sampler_state[PIPE_MAX_SAMPLERS]; + void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + unsigned dirty; - unsigned dirty_sampler; - unsigned dirty_texture; unsigned dirty_vertex_buffer; unsigned dirty_vertex_element; + unsigned num_samplers; + unsigned num_textures; unsigned mode; struct pipe_context *hw; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 0fc5568da11..11eec2714eb 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -28,6 +28,8 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ +#include "pipe/p_inlines.h" + #include "fo_context.h" @@ -322,18 +324,27 @@ failover_create_sampler_state(struct pipe_context *pipe, } static void -failover_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +failover_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct failover_context *failover = failover_context(pipe); struct fo_state *state = (struct fo_state*)sampler; - failover->sampler[unit] = state; + uint i; + assert(num <= PIPE_MAX_SAMPLERS); + /* Check for no-op */ + if (num == failover->num_samplers && + !memcmp(failover->sampler, sampler, num * sizeof(void *))) + return; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + failover->sw_sampler_state[i] = i < num ? state[i].sw_state : NULL; + failover->hw_sampler_state[i] = i < num ? state[i].hw_state : NULL; + } failover->dirty |= FO_NEW_SAMPLER; - failover->dirty_sampler |= (1<<unit); - failover->sw->bind_sampler_state(failover->sw, unit, - state->sw_state); - failover->hw->bind_sampler_state(failover->hw, unit, - state->hw_state); + failover->num_samplers = num; + failover->sw->bind_sampler_states(failover->sw, num, + failover->sw_sampler_state); + failover->hw->bind_sampler_states(failover->hw, num, + failover->hw_sampler_state); } static void @@ -351,17 +362,29 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) static void -failover_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) +failover_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct failover_context *failover = failover_context(pipe); - - failover->texture[unit] = texture; + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == failover->num_textures && + !memcmp(failover->texture, texture, num * sizeof(struct pipe_texture *))) + return; + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + texture[i]); + for (i = num; i < failover->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + NULL); failover->dirty |= FO_NEW_TEXTURE; - failover->dirty_texture |= (1<<unit); - failover->sw->set_sampler_texture( failover->sw, unit, texture ); - failover->hw->set_sampler_texture( failover->hw, unit, texture ); + failover->num_textures = num; + failover->sw->set_sampler_textures( failover->sw, num, texture ); + failover->hw->set_sampler_textures( failover->hw, num, texture ); } @@ -429,7 +452,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.bind_blend_state = failover_bind_blend_state; failover->pipe.delete_blend_state = failover_delete_blend_state; failover->pipe.create_sampler_state = failover_create_sampler_state; - failover->pipe.bind_sampler_state = failover_bind_sampler_state; + failover->pipe.bind_sampler_states = failover_bind_sampler_states; failover->pipe.delete_sampler_state = failover_delete_sampler_state; failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; @@ -449,7 +472,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; failover->pipe.set_scissor_state = failover_set_scissor_state; - failover->pipe.set_sampler_texture = failover_set_sampler_texture; + failover->pipe.set_sampler_textures = failover_set_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffer = failover_set_vertex_buffer; failover->pipe.set_vertex_element = failover_set_vertex_element; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index c663dd49476..3de931e04eb 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -94,21 +94,13 @@ failover_state_emit( struct failover_context *failover ) failover->sw->set_viewport_state( failover->sw, &failover->viewport ); if (failover->dirty & FO_NEW_SAMPLER) { - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (failover->dirty_sampler & (1<<i)) { - failover->sw->bind_sampler_state( failover->sw, i, - failover->sampler[i]->sw_state ); - } - } + failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, + failover->sw_sampler_state ); } if (failover->dirty & FO_NEW_TEXTURE) { - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (failover->dirty_texture & (1<<i)) { - failover->sw->set_sampler_texture( failover->sw, i, - failover->texture[i] ); - } - } + failover->sw->set_sampler_textures( failover->sw, failover->num_textures, + failover->texture ); } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { @@ -132,6 +124,4 @@ failover_state_emit( struct failover_context *failover ) failover->dirty = 0; failover->dirty_vertex_element = 0; failover->dirty_vertex_buffer = 0; - failover->dirty_texture = 0; - failover->dirty_sampler = 0; } diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 6401112f835..746f18ba38d 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -1,336 +1,339 @@ - /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_CONTEXT_H -#define I915_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "draw/draw_vertex.h" - -#include "tgsi/util/tgsi_scan.h" - - -#define I915_TEX_UNITS 8 - -#define I915_DYNAMIC_MODES4 0 -#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ -#define I915_DYNAMIC_DEPTHSCALE_1 2 -#define I915_DYNAMIC_IAB 3 -#define I915_DYNAMIC_BC_0 4 /* just the header */ -#define I915_DYNAMIC_BC_1 5 -#define I915_DYNAMIC_BFO_0 6 -#define I915_DYNAMIC_BFO_1 7 -#define I915_DYNAMIC_STP_0 8 -#define I915_DYNAMIC_STP_1 9 -#define I915_DYNAMIC_SC_ENA_0 10 -#define I915_DYNAMIC_SC_RECT_0 11 -#define I915_DYNAMIC_SC_RECT_1 12 -#define I915_DYNAMIC_SC_RECT_2 13 -#define I915_MAX_DYNAMIC 14 - - -#define I915_IMMEDIATE_S0 0 -#define I915_IMMEDIATE_S1 1 -#define I915_IMMEDIATE_S2 2 -#define I915_IMMEDIATE_S3 3 -#define I915_IMMEDIATE_S4 4 -#define I915_IMMEDIATE_S5 5 -#define I915_IMMEDIATE_S6 6 -#define I915_IMMEDIATE_S7 7 -#define I915_MAX_IMMEDIATE 8 - -/* These must mach the order of LI0_STATE_* bits, as they will be used - * to generate hardware packets: - */ -#define I915_CACHE_STATIC 0 -#define I915_CACHE_DYNAMIC 1 /* handled specially */ -#define I915_CACHE_SAMPLER 2 -#define I915_CACHE_MAP 3 -#define I915_CACHE_PROGRAM 4 -#define I915_CACHE_CONSTANTS 5 -#define I915_MAX_CACHE 6 - -#define I915_MAX_CONSTANT 32 - - -/** See constant_flags[] below */ -#define I915_CONSTFLAG_USER 0x1f - - -/** - * Subclass of pipe_shader_state - */ -struct i915_fragment_shader -{ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - uint *program; - uint program_len; - - /** - * constants introduced during translation. - * These are placed at the end of the constant buffer and grow toward - * the beginning (eg: slot 31, 30 29, ...) - * User-provided constants start at 0. - * This allows both types of constants to co-exist (until there's too many) - * and doesn't require regenerating/changing the fragment program to - * shuffle constants around. - */ - uint num_constants; - float constants[I915_MAX_CONSTANT][4]; - - /** - * Status of each constant - * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding - * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) - * Else, the bitmask indicates which components are occupied by immediates. - */ - ubyte constant_flags[I915_MAX_CONSTANT]; -}; - - -struct i915_cache_context; - -/* Use to calculate differences between state emitted to hardware and - * current driver-calculated state. - */ -struct i915_state -{ - unsigned immediate[I915_MAX_IMMEDIATE]; - unsigned dynamic[I915_MAX_DYNAMIC]; - - float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; - /** number of constants passed in through a constant buffer */ - uint num_user_constants[PIPE_SHADER_TYPES]; - - /* texture sampler state */ - unsigned sampler[I915_TEX_UNITS][3]; - unsigned sampler_enable_flags; - unsigned sampler_enable_nr; - - /* texture image buffers */ - unsigned texbuffer[I915_TEX_UNITS][2]; - - /** Describes the current hardware vertex layout */ - struct vertex_info vertex_info; - - unsigned id; /* track lost context events */ -}; - -struct i915_blend_state { - unsigned iab; - unsigned modes4; - unsigned LIS5; - unsigned LIS6; -}; - -struct i915_depth_stencil_state { - unsigned stencil_modes4; - unsigned bfo[2]; - unsigned stencil_LIS5; - unsigned depth_LIS6; -}; - -struct i915_rasterizer_state { - int light_twoside : 1; - unsigned st; - enum interp_mode color_interp; - - unsigned LIS4; - unsigned LIS7; - unsigned sc[1]; - - const struct pipe_rasterizer_state *templ; - - union { float f; unsigned u; } ds[2]; -}; - -struct i915_sampler_state { - unsigned state[3]; - const struct pipe_sampler_state *templ; -}; - - -struct i915_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -struct i915_context -{ - struct pipe_context pipe; - struct i915_winsys *winsys; - struct draw_context *draw; - - /* The most recent drawing state as set by the driver: - */ - const struct i915_blend_state *blend; - const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct i915_depth_stencil_state *depth_stencil; - const struct i915_rasterizer_state *rasterizer; - - struct i915_fragment_shader *fs; - - struct pipe_blend_color blend_color; - struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct i915_texture *texture[PIPE_MAX_SAMPLERS]; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - - unsigned dirty; - - unsigned *batch_start; - - /** Vertex buffer */ - struct pipe_buffer *vbo; - - struct i915_state current; - unsigned hardware_dirty; - - unsigned debug; -}; - -/* A flag for each state_tracker state object: - */ -#define I915_NEW_VIEWPORT 0x1 -#define I915_NEW_RASTERIZER 0x2 -#define I915_NEW_FS 0x4 -#define I915_NEW_BLEND 0x8 -#define I915_NEW_CLIP 0x10 -#define I915_NEW_SCISSOR 0x20 -#define I915_NEW_STIPPLE 0x40 -#define I915_NEW_FRAMEBUFFER 0x80 -#define I915_NEW_ALPHA_TEST 0x100 -#define I915_NEW_DEPTH_STENCIL 0x200 -#define I915_NEW_SAMPLER 0x400 -#define I915_NEW_TEXTURE 0x800 -#define I915_NEW_CONSTANTS 0x1000 -#define I915_NEW_VBO 0x2000 -#define I915_NEW_VS 0x4000 - - -/* Driver's internally generated state flags: - */ -#define I915_NEW_VERTEX_FORMAT 0x10000 - - -/* Dirty flags for hardware emit - */ -#define I915_HW_STATIC (1<<I915_CACHE_STATIC) -#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) -#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) -#define I915_HW_MAP (1<<I915_CACHE_MAP) -#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) -#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) -#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) -#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) - - -/*********************************************************************** - * i915_prim_emit.c: - */ -struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); - - -/*********************************************************************** - * i915_prim_vbuf.c: - */ -struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); - - -/*********************************************************************** - * i915_state_emit.c: - */ -void i915_emit_hardware_state(struct i915_context *i915 ); - - - -/*********************************************************************** - * i915_clear.c: - */ -void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue); - - -/*********************************************************************** - * i915_surface.c: - */ -void i915_init_surface_functions( struct i915_context *i915 ); - -void i915_init_state_functions( struct i915_context *i915 ); -void i915_init_flush_functions( struct i915_context *i915 ); -void i915_init_string_functions( struct i915_context *i915 ); - - - - -/*********************************************************************** - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static INLINE struct i915_context * -i915_context( struct pipe_context *pipe ) -{ - return (struct i915_context *)pipe; -} - - - -#endif + /************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "tgsi/util/tgsi_scan.h" + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4 0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2 +#define I915_DYNAMIC_IAB 3 +#define I915_DYNAMIC_BC_0 4 /* just the header */ +#define I915_DYNAMIC_BC_1 5 +#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_1 7 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_DYNAMIC_SC_ENA_0 10 +#define I915_DYNAMIC_SC_RECT_0 11 +#define I915_DYNAMIC_SC_RECT_1 12 +#define I915_DYNAMIC_SC_RECT_2 13 +#define I915_MAX_DYNAMIC 14 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_DYNAMIC 1 /* handled specially */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 + +#define I915_MAX_CONSTANT 32 + + +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; +}; + + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +struct i915_context +{ + struct pipe_context pipe; + struct i915_winsys *winsys; + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + + struct i915_fragment_shader *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + + unsigned *batch_start; + + /** Vertex buffer */ + struct pipe_buffer *vbo; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<<I915_CACHE_STATIC) +#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) +#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) +#define I915_HW_MAP (1<<I915_CACHE_MAP) +#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) +#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) +#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) +#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) + + +/*********************************************************************** + * i915_prim_emit.c: + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_prim_vbuf.c: + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_state_emit.c: + */ +void i915_emit_hardware_state(struct i915_context *i915 ); + + + +/*********************************************************************** + * i915_clear.c: + */ +void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +/*********************************************************************** + * i915_surface.c: + */ +void i915_init_surface_functions( struct i915_context *i915 ); + +void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_flush_functions( struct i915_context *i915 ); +void i915_init_string_functions( struct i915_context *i915 ); + + + + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct i915_context * +i915_context( struct pipe_context *pipe ) +{ + return (struct i915_context *)pipe; +} + + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 27af46bea03..24143243d3d 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -269,13 +269,22 @@ i915_create_sampler_state(struct pipe_context *pipe, return cso; } -static void i915_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +static void i915_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct i915_context *i915 = i915_context(pipe); - assert(unit < PIPE_MAX_SAMPLERS); - i915->sampler[unit] = (const struct i915_sampler_state*)sampler; + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_samplers && + !memcmp(i915->sampler, sampler, num * sizeof(void *))) + return; + + memcpy(i915->sampler, sampler, num * sizeof(void *)); + memset(&i915->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * sizeof(void *)); + + i915->num_samplers = num; i915->dirty |= I915_NEW_SAMPLER; } @@ -526,14 +535,29 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, } -static void i915_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture) +static void i915_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct i915_context *i915 = i915_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_textures && + !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + texture[i]); + + for (i = num; i < i915->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + NULL); - pipe_texture_reference((struct pipe_texture **) &i915->texture[sampler], - texture); + i915->num_textures = num; i915->dirty |= I915_NEW_TEXTURE; } @@ -691,7 +715,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->pipe.delete_blend_state = i915_delete_blend_state; i915->pipe.create_sampler_state = i915_create_sampler_state; - i915->pipe.bind_sampler_state = i915_bind_sampler_state; + i915->pipe.bind_sampler_states = i915_bind_sampler_states; i915->pipe.delete_sampler_state = i915_delete_sampler_state; i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; @@ -715,7 +739,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->pipe.set_polygon_stipple = i915_set_polygon_stipple; i915->pipe.set_scissor_state = i915_set_scissor_state; - i915->pipe.set_sampler_texture = i915_set_sampler_texture; + i915->pipe.set_sampler_textures = i915_set_sampler_textures; i915->pipe.set_viewport_state = i915_set_viewport_state; i915->pipe.set_vertex_buffer = i915_set_vertex_buffer; i915->pipe.set_vertex_element = i915_set_vertex_element; diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 6bbaac4e34c..a7498d22b7e 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -267,12 +267,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) { - /* XXX: we were refering to sampler state - * (current.sampler_enable_nr) below, but only checking - * I915_HW_MAP above. Should probably calculate the enabled - * flags separately - but there will be further rework of - * state so perhaps not necessary yet. - */ const uint nr = i915->current.sampler_enable_nr; if (nr) { const uint enabled = i915->current.sampler_enable_flags; diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 9c1a5bbbd65..9dbb1b1b238 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -106,7 +106,8 @@ void i915_update_samplers( struct i915_context *i915 ) i915->current.sampler_enable_nr = 0; i915->current.sampler_enable_flags = 0x0; - for (unit = 0; unit < I915_TEX_UNITS; unit++) { + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ /* could also examine the fragment program? */ if (i915->texture[unit]) { @@ -219,7 +220,8 @@ i915_update_textures(struct i915_context *i915) { uint unit; - for (unit = 0; unit < I915_TEX_UNITS; unit++) { + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ /* could also examine the fragment program? */ if (i915->texture[unit]) { diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 4da3a8cffcf..b83a13c3b6b 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -1,681 +1,684 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRWCONTEXT_INC -#define BRWCONTEXT_INC - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "tgsi/util/tgsi_scan.h" - -#include "brw_structs.h" -#include "brw_winsys.h" - - -/* Glossary: - * - * URB - uniform resource buffer. A mid-sized buffer which is - * partitioned between the fixed function units and used for passing - * values (vertices, primitives, constants) between them. - * - * CURBE - constant URB entry. An urb region (entry) used to hold - * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. - * - * VUE - vertex URB entry. An urb entry holding a vertex and usually - * a vertex header. The header contains control information and - * things like primitive type, Begin/end flags and clip codes. - * - * PUE - primitive URB entry. An urb entry produced by the setup (SF) - * unit holding rasterization and interpolation parameters. - * - * GRF - general register file. One of several register files - * addressable by programmed threads. The inputs (r0, payload, curbe, - * urb) of the thread are preloaded to this area before the thread is - * spawned. The registers are individually 8 dwords wide and suitable - * for general usage. Registers holding thread input values are not - * special and may be overwritten. - * - * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous - * MRF registers. All program output is via these messages. URB - * entries are populated by sending a message to the shared URB - * function containing the new data, together with a control word, - * often an unmodified copy of R0. - * - * R0 - GRF register 0. Typically holds control information used when - * sending messages to other threads. - * - * EU or GEN4 EU: The name of the programmable subsystem of the - * i965 hardware. Threads are executed by the EU, the registers - * described above are part of the EU architecture. - * - * Fixed function units: - * - * CS - Command streamer. Notional first unit, little software - * interaction. Holds the URB entries used for constant data, ie the - * CURBEs. - * - * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of - * this unit is responsible for pulling vertices out of vertex buffers - * in vram and injecting them into the processing pipe as VUEs. If - * enabled, it first passes them to a VS thread which is a good place - * for the driver to implement any active vertex shader. - * - * GS - Geometry Shader. This corresponds to a new DX10 concept. If - * enabled, incoming strips etc are passed to GS threads in individual - * line/triangle/point units. The GS thread may perform arbitary - * computation and emit whatever primtives with whatever vertices it - * chooses. This makes GS an excellent place to implement GL's - * unfilled polygon modes, though of course it is capable of much - * more. Additionally, GS is used to translate away primitives not - * handled by latter units, including Quads and Lineloops. - * - * CS - Clipper. Mesa's clipping algorithms are imported to run on - * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the - * incoming primitive needs to be passed to a thread for clipping. - * User clip planes are handled via cooperation with the VS thread. - * - * SF - Strips Fans or Setup: Triangles are prepared for - * rasterization. Interpolation coefficients are calculated. - * Flatshading and two-side lighting usually performed here. - * - * WM - Windower. Interpolation of vertex attributes performed here. - * Fragment shader implemented here. SIMD aspects of EU taken full - * advantage of, as pixels are processed in blocks of 16. - * - * CC - Color Calculator. No EU threads associated with this unit. - * Handles blending and (presumably) depth and stencil testing. - */ - -#define BRW_MAX_CURBE (32*16) - -struct brw_context; -struct brw_winsys; - - -/* Raised when we receive new state across the pipe interface: - */ -#define BRW_NEW_VIEWPORT 0x1 -#define BRW_NEW_RASTERIZER 0x2 -#define BRW_NEW_FS 0x4 -#define BRW_NEW_BLEND 0x8 -#define BRW_NEW_CLIP 0x10 -#define BRW_NEW_SCISSOR 0x20 -#define BRW_NEW_STIPPLE 0x40 -#define BRW_NEW_FRAMEBUFFER 0x80 -#define BRW_NEW_ALPHA_TEST 0x100 -#define BRW_NEW_DEPTH_STENCIL 0x200 -#define BRW_NEW_SAMPLER 0x400 -#define BRW_NEW_TEXTURE 0x800 -#define BRW_NEW_CONSTANTS 0x1000 -#define BRW_NEW_VBO 0x2000 -#define BRW_NEW_VS 0x4000 - -/* Raised for other internal events: - */ -#define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_PSP 0x20000 -#define BRW_NEW_CURBE_OFFSETS 0x40000 -#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 -#define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_SCENE 0x200000 -#define BRW_NEW_SF_LINKAGE 0x400000 - -extern int BRW_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 -#define DEBUG_BATCH 0x80000 -#define DEBUG_BUFMGR 0x100000 -#define DEBUG_BLIT 0x200000 -#define DEBUG_REGION 0x400000 -#define DEBUG_MIPTREE 0x800000 - -#define DBG(...) do { \ - if (BRW_DEBUG & FILE_DEBUG_FLAG) \ - brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ -} while(0) - -#define PRINT(...) do { \ - brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ -} while(0) - -struct brw_state_flags { - unsigned cache; - unsigned brw; -}; - - -struct brw_vertex_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - int id; -}; - - -struct brw_fragment_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - - boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ - int id; -}; - - -struct pipe_setup_linkage { - struct { - unsigned vp_output:5; - unsigned interp_mode:4; - unsigned bf_vp_output:5; - } fp_input[PIPE_MAX_SHADER_INPUTS]; - - unsigned fp_input_count:5; - unsigned max_vp_output:5; -}; - - - -struct brw_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ - -struct brw_wm_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - - unsigned first_curbe_grf; - unsigned total_grf; - unsigned total_scratch; - - /* Internally generated constants for the CURBE. These are loaded - * ahead of the data from the constant buffer. - */ - const float internal_const[8]; - unsigned nr_internal_consts; - unsigned max_const; - - boolean error; -}; - -struct brw_sf_prog_data { - unsigned urb_read_length; - unsigned total_grf; - - /* Each vertex may have upto 12 attributes, 4 components each, - * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 - * rows. - * - * Actually we use 4 for each, so call it 12 rows. - */ - unsigned urb_entry_size; -}; - -struct brw_clip_prog_data { - unsigned curb_read_length; /* user planes? */ - unsigned clip_mode; - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_gs_prog_data { - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_vs_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - unsigned total_grf; - unsigned outputs_written; - - unsigned inputs_read; - - unsigned max_const; - - float imm_buf[PIPE_MAX_CONSTANT][4]; - unsigned num_imm; - unsigned num_consts; - - /* Used for calculating urb partitions: - */ - unsigned urb_entry_size; -}; - - -#define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 - -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - unsigned surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - struct pipe_buffer *buffer; - - unsigned size; - unsigned offset; /* offset of first free byte */ - - struct brw_context *brw; -}; - -struct brw_cache_item { - unsigned hash; - unsigned key_size; /* for variable-sized keys */ - const void *key; - - unsigned offset; /* offset within pool's buffer */ - unsigned data_size; - - struct brw_cache_item *next; -}; - - - -struct brw_cache { - unsigned id; - - const char *name; - - struct brw_context *brw; - struct brw_mem_pool *pool; - - struct brw_cache_item **items; - unsigned size, n_items; - - unsigned key_size; /* for fixed-size keys */ - unsigned aux_size; - - unsigned last_addr; /* offset of active item */ -}; - - - - -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ -struct brw_tracked_state { - struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -/* Flags for brw->state.cache. - */ -#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) -#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) -#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) -#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) -#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) -#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) -#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) -#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) -#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) -#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) -#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) -#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) -#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) -#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) -#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) -#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) -#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) -#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) - - - - -enum brw_mempool_id { - BRW_GS_POOL, - BRW_SS_POOL, - BRW_MAX_POOL -}; - - -struct brw_cached_batch_item { - struct header *header; - unsigned sz; - struct brw_cached_batch_item *next; -}; - - - -/* Protect against a future where PIPE_ATTRIB_MAX > 32. Wouldn't life - * be easier if C allowed arrays of packed elements? - */ -#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) - - - - -struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ -}; - - - - - -struct brw_context -{ - struct pipe_context pipe; - struct brw_winsys *winsys; - - unsigned primitive; - unsigned reduced_primitive; - - boolean emit_state_always; - - struct { - struct brw_state_flags dirty; - } state; - - - struct { - const struct pipe_blend_state *Blend; - const struct pipe_depth_stencil_alpha_state *DepthStencil; - const struct pipe_poly_stipple *PolygonStipple; - const struct pipe_rasterizer_state *Raster; - const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; - const struct brw_vertex_program *VertexProgram; - const struct brw_fragment_program *FragmentProgram; - - struct pipe_clip_state Clip; - struct pipe_blend_color BlendColor; - struct pipe_scissor_state Scissor; - struct pipe_viewport_state Viewport; - struct pipe_framebuffer_state FrameBuffer; - - const struct pipe_constant_buffer *Constants[2]; - const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; - } attribs; - - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; - struct brw_cached_batch_item *cached_batch_items; - - struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; - - struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: - */ - struct brw_vertex_info info; - } vb; - - - unsigned hardware_dirty; - unsigned dirty; - unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: - */ - struct { - unsigned vsize; /* vertex size plus header in urb registers */ - unsigned csize; /* constant buffer size in urb registers */ - unsigned sfsize; /* setup data size in urb registers */ - - boolean constrained; - - unsigned nr_vs_entries; - unsigned nr_gs_entries; - unsigned nr_clip_entries; - unsigned nr_sf_entries; - unsigned nr_cs_entries; - -/* unsigned vs_size; */ -/* unsigned gs_size; */ -/* unsigned clip_size; */ -/* unsigned sf_size; */ -/* unsigned cs_size; */ - - unsigned vs_start; - unsigned gs_start; - unsigned clip_start; - unsigned sf_start; - unsigned cs_start; - } urb; - - - /* BRW_NEW_CURBE_OFFSETS: - */ - struct { - unsigned wm_start; - unsigned wm_size; - unsigned clip_start; - unsigned clip_size; - unsigned vs_start; - unsigned vs_size; - unsigned total_size; - - unsigned gs_offset; - - float *last_buf; - unsigned last_bufsz; - } curbe; - - struct { - struct brw_vs_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } vs; - - struct { - struct brw_gs_prog_data *prog_data; - - boolean prog_active; - unsigned prog_gs_offset; - unsigned state_gs_offset; - } gs; - - struct { - struct brw_clip_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } clip; - - - struct { - struct brw_sf_prog_data *prog_data; - - struct pipe_setup_linkage linkage; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } sf; - - struct { - struct brw_wm_prog_data *prog_data; - -// struct brw_wm_compiler *compile_data; - - - /** - * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER - * cache - */ - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - unsigned render_surf; - unsigned nr_surfaces; - - unsigned max_threads; - struct pipe_buffer *scratch_buffer; - unsigned scratch_buffer_size; - - unsigned sampler_count; - unsigned sampler_gs_offset; - - struct brw_surface_binding_table bind; - unsigned bind_ss_offset; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } wm; - - - struct { - unsigned vp_gs_offset; - unsigned state_gs_offset; - } cc; - - - /* Used to give every program string a unique id - */ - unsigned program_id; -}; - - -#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - - -/*====================================================================== - * brw_vtbl.c - */ -void brw_do_flush( struct brw_context *brw, - unsigned flags ); - - -/*====================================================================== - * brw_state.c - */ -void brw_validate_state(struct brw_context *brw); -void brw_init_state(struct brw_context *brw); -void brw_destroy_state(struct brw_context *brw); - - -/*====================================================================== - * brw_tex.c - */ -void brwUpdateTextureState( struct brw_context *brw ); - - -/* brw_urb.c - */ -void brw_upload_urb_fence(struct brw_context *brw); - -void brw_upload_constant_buffer_state(struct brw_context *brw); - -void brw_init_surface_functions(struct brw_context *brw); -void brw_init_state_functions(struct brw_context *brw); -void brw_init_flush_functions(struct brw_context *brw); -void brw_init_string_functions(struct brw_context *brw); - -/*====================================================================== - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static inline struct brw_context * -brw_context( struct pipe_context *ctx ) -{ - return (struct brw_context *)ctx; -} - -#endif - +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "tgsi/util/tgsi_scan.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT 0x1 +#define BRW_NEW_RASTERIZER 0x2 +#define BRW_NEW_FS 0x4 +#define BRW_NEW_BLEND 0x8 +#define BRW_NEW_CLIP 0x10 +#define BRW_NEW_SCISSOR 0x20 +#define BRW_NEW_STIPPLE 0x40 +#define BRW_NEW_FRAMEBUFFER 0x80 +#define BRW_NEW_ALPHA_TEST 0x100 +#define BRW_NEW_DEPTH_STENCIL 0x200 +#define BRW_NEW_SAMPLER 0x400 +#define BRW_NEW_TEXTURE 0x800 +#define BRW_NEW_CONSTANTS 0x1000 +#define BRW_NEW_VBO 0x2000 +#define BRW_NEW_VS 0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE 0x10000 +#define BRW_NEW_PSP 0x20000 +#define BRW_NEW_CURBE_OFFSETS 0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 +#define BRW_NEW_PRIMITIVE 0x100000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 +#define DEBUG_BATCH 0x80000 +#define DEBUG_BUFMGR 0x100000 +#define DEBUG_BLIT 0x200000 +#define DEBUG_REGION 0x400000 +#define DEBUG_MIPTREE 0x800000 + +#define DBG(...) do { \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +#define PRINT(...) do { \ + brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +struct brw_state_flags { + unsigned cache; + unsigned brw; +}; + + +struct brw_vertex_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + int id; +}; + + +struct brw_fragment_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + + boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ + int id; +}; + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + +struct brw_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + + unsigned first_curbe_grf; + unsigned total_grf; + unsigned total_scratch; + + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. + */ + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; +}; + +struct brw_sf_prog_data { + unsigned urb_read_length; + unsigned total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { + unsigned curb_read_length; /* user planes? */ + unsigned clip_mode; + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_gs_prog_data { + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_vs_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + unsigned total_grf; + unsigned outputs_written; + + unsigned inputs_read; + + unsigned max_const; + + float imm_buf[PIPE_MAX_CONSTANT][4]; + unsigned num_imm; + unsigned num_consts; + + /* Used for calculating urb partitions: + */ + unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct pipe_buffer *buffer; + + unsigned size; + unsigned offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + unsigned hash; + unsigned key_size; /* for variable-sized keys */ + const void *key; + + unsigned offset; /* offset within pool's buffer */ + unsigned data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + unsigned id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + unsigned size, n_items; + + unsigned key_size; /* for fixed-size keys */ + unsigned aux_size; + + unsigned last_addr; /* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + + + + +enum brw_mempool_id { + BRW_GS_POOL, + BRW_SS_POOL, + BRW_MAX_POOL +}; + + +struct brw_cached_batch_item { + struct header *header; + unsigned sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where PIPE_ATTRIB_MAX > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) + + + + +struct brw_vertex_info { + unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ +}; + + + + + +struct brw_context +{ + struct pipe_context pipe; + struct brw_winsys *winsys; + + unsigned primitive; + unsigned reduced_primitive; + + boolean emit_state_always; + + struct { + struct brw_state_flags dirty; + } state; + + + struct { + const struct pipe_blend_state *Blend; + const struct pipe_depth_stencil_alpha_state *DepthStencil; + const struct pipe_poly_stipple *PolygonStipple; + const struct pipe_rasterizer_state *Raster; + const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; + const struct brw_vertex_program *VertexProgram; + const struct brw_fragment_program *FragmentProgram; + + struct pipe_clip_state Clip; + struct pipe_blend_color BlendColor; + struct pipe_scissor_state Scissor; + struct pipe_viewport_state Viewport; + struct pipe_framebuffer_state FrameBuffer; + + const struct pipe_constant_buffer *Constants[2]; + const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; + } attribs; + + unsigned num_samplers; + unsigned num_textures; + + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; + + struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + + unsigned hardware_dirty; + unsigned dirty; + unsigned pci_id; + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + unsigned vsize; /* vertex size plus header in urb registers */ + unsigned csize; /* constant buffer size in urb registers */ + unsigned sfsize; /* setup data size in urb registers */ + + boolean constrained; + + unsigned nr_vs_entries; + unsigned nr_gs_entries; + unsigned nr_clip_entries; + unsigned nr_sf_entries; + unsigned nr_cs_entries; + +/* unsigned vs_size; */ +/* unsigned gs_size; */ +/* unsigned clip_size; */ +/* unsigned sf_size; */ +/* unsigned cs_size; */ + + unsigned vs_start; + unsigned gs_start; + unsigned clip_start; + unsigned sf_start; + unsigned cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + unsigned wm_start; + unsigned wm_size; + unsigned clip_start; + unsigned clip_size; + unsigned vs_start; + unsigned vs_size; + unsigned total_size; + + unsigned gs_offset; + + float *last_buf; + unsigned last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + boolean prog_active; + unsigned prog_gs_offset; + unsigned state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct pipe_setup_linkage linkage; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + +// struct brw_wm_compiler *compile_data; + + + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache + */ + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + unsigned render_surf; + unsigned nr_surfaces; + + unsigned max_threads; + struct pipe_buffer *scratch_buffer; + unsigned scratch_buffer_size; + + unsigned sampler_count; + unsigned sampler_gs_offset; + + struct brw_surface_binding_table bind; + unsigned bind_ss_offset; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } wm; + + + struct { + unsigned vp_gs_offset; + unsigned state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, + unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 6744a8aa4f7..f5efe9fc06d 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -1,434 +1,462 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Zack Rusin <zack@tungstengraphics.com> - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "pipe/p_winsys.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_draw.h" - - -#define DUP( TYPE, VAL ) \ -do { \ - struct TYPE *x = malloc(sizeof(*x)); \ - memcpy(x, VAL, sizeof(*x) ); \ - return x; \ -} while (0) - -/************************************************************************ - * Blend - */ -static void * -brw_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - DUP( pipe_blend_state, blend ); -} - -static void brw_bind_blend_state(struct pipe_context *pipe, - void *blend) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Blend = (struct pipe_blend_state*)blend; - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - - -static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - free(blend); -} - -static void brw_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.BlendColor = *blend_color; - - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - -/************************************************************************ - * Sampler - */ - -static void * -brw_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - DUP( pipe_sampler_state, sampler ); -} - -static void brw_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Samplers[unit] = sampler; - brw->state.dirty.brw |= BRW_NEW_SAMPLER; -} - -static void brw_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - free(sampler); -} - - -/************************************************************************ - * Depth stencil - */ - -static void * -brw_create_depth_stencil_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - DUP( pipe_depth_stencil_alpha_state, depth_stencil ); -} - -static void brw_bind_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - - brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; -} - -static void brw_delete_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - free(depth_stencil); -} - -/************************************************************************ - * Scissor - */ -static void brw_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct brw_context *brw = brw_context(pipe); - - memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); - brw->state.dirty.brw |= BRW_NEW_SCISSOR; -} - - -/************************************************************************ - * Stipple - */ - -static void brw_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ -} - - -/************************************************************************ - * Fragment shader - */ - -static void * brw_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_fp->program = *shader; - brw_fp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_fp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_fp->info2); -#endif - - tgsi_dump(shader->tokens, 0); - - - return (void *)brw_fp; -} - -static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; - brw->state.dirty.brw |= BRW_NEW_FS; -} - -static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) -{ - FREE(shader); -} - - -/************************************************************************ - * Vertex shader and other TNL state - */ - -static void *brw_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_vp->program = *shader; - brw_vp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_vp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_vp->info2); -#endif - tgsi_dump(shader->tokens, 0); - - return (void *)brw_vp; -} - -static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; - brw->state.dirty.brw |= BRW_NEW_VS; - - debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); -} - -static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) -{ - FREE(shader); -} - - -static void brw_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Clip = *clip; -} - - -static void brw_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Viewport = *viewport; /* struct copy */ - brw->state.dirty.brw |= BRW_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - //draw_set_viewport_state(brw->draw, viewport); -} - - -static void brw_set_vertex_buffer( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer ) -{ - struct brw_context *brw = brw_context(pipe); - brw->vb.vbo_array[index] = buffer; -} - -static void brw_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *element) -{ - /* flush ? */ - struct brw_context *brw = brw_context(pipe); - - assert(index < PIPE_ATTRIB_MAX); - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); - - el.ve0.src_offset = element->src_offset; - el.ve0.src_format = brw_translate_surface_format(element->src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = element->vertex_buffer_index; - - el.ve1.dst_offset = index * 4; - - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - - switch (element->nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } - - brw->vb.inputs[index] = el; -} - - - -/************************************************************************ - * Constant buffers - */ - -static void brw_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct brw_context *brw = brw_context(pipe); - - assert(buf == 0 || index == 0); - - brw->attribs.Constants[shader] = buf; - brw->state.dirty.brw |= BRW_NEW_CONSTANTS; -} - - -/************************************************************************ - * Texture surfaces - */ - - -static void brw_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) -{ - struct brw_context *brw = brw_context(pipe); - - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[unit], - texture); - - brw->state.dirty.brw |= BRW_NEW_TEXTURE; -} - - -/************************************************************************ - * Render targets, etc - */ - -static void brw_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FrameBuffer = *fb; /* struct copy */ - - brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; -} - - - -/************************************************************************ - * Rasterizer state - */ - -static void * -brw_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - DUP(pipe_rasterizer_state, rasterizer); -} - -static void brw_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; - - /* Also pass-through to draw module: - */ - //draw_set_rasterizer_state(brw->draw, setup); - - brw->state.dirty.brw |= BRW_NEW_RASTERIZER; -} - -static void brw_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) -{ - free(setup); -} - - - -void -brw_init_state_functions( struct brw_context *brw ) -{ - brw->pipe.create_blend_state = brw_create_blend_state; - brw->pipe.bind_blend_state = brw_bind_blend_state; - brw->pipe.delete_blend_state = brw_delete_blend_state; - - brw->pipe.create_sampler_state = brw_create_sampler_state; - brw->pipe.bind_sampler_state = brw_bind_sampler_state; - brw->pipe.delete_sampler_state = brw_delete_sampler_state; - - brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; - brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; - brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; - - brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; - brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; - brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; - brw->pipe.create_fs_state = brw_create_fs_state; - brw->pipe.bind_fs_state = brw_bind_fs_state; - brw->pipe.delete_fs_state = brw_delete_fs_state; - brw->pipe.create_vs_state = brw_create_vs_state; - brw->pipe.bind_vs_state = brw_bind_vs_state; - brw->pipe.delete_vs_state = brw_delete_vs_state; - - brw->pipe.set_blend_color = brw_set_blend_color; - brw->pipe.set_clip_state = brw_set_clip_state; - brw->pipe.set_constant_buffer = brw_set_constant_buffer; - brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; - -// brw->pipe.set_feedback_state = brw_set_feedback_state; -// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; - - brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; - brw->pipe.set_scissor_state = brw_set_scissor_state; - brw->pipe.set_sampler_texture = brw_set_sampler_texture; - brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; - brw->pipe.set_vertex_element = brw_set_vertex_element; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin <zack@tungstengraphics.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_dump.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL ) \ +do { \ + struct TYPE *x = malloc(sizeof(*x)); \ + memcpy(x, VAL, sizeof(*x) ); \ + return x; \ +} while (0) + +/************************************************************************ + * Blend + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Blend = (struct pipe_blend_state*)blend; + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.BlendColor = *blend_color; + + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct brw_context *brw = brw_context(pipe); + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_samplers && + !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) + return; + + memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); + memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + + brw->num_samplers = num; + + brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + free(sampler); +} + + +/************************************************************************ + * Depth stencil + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); + brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + + /* XXX: Do I have to duplicate the tokens as well?? + */ + brw_fp->program = *shader; + brw_fp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_fp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_fp->info2); +#endif + + tgsi_dump(shader->tokens, 0); + + + return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; + brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + FREE(shader); +} + + +/************************************************************************ + * Vertex shader and other TNL state + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + + /* XXX: Do I have to duplicate the tokens as well?? + */ + brw_vp->program = *shader; + brw_vp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_vp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_vp->info2); +#endif + tgsi_dump(shader->tokens, 0); + + return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; + brw->state.dirty.brw |= BRW_NEW_VS; + + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + FREE(shader); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Viewport = *viewport; /* struct copy */ + brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffer( struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer ) +{ + struct brw_context *brw = brw_context(pipe); + brw->vb.vbo_array[index] = buffer; +} + +static void brw_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *element) +{ + /* flush ? */ + struct brw_context *brw = brw_context(pipe); + + assert(index < PIPE_ATTRIB_MAX); + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); + + el.ve0.src_offset = element->src_offset; + el.ve0.src_format = brw_translate_surface_format(element->src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = element->vertex_buffer_index; + + el.ve1.dst_offset = index * 4; + + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (element->nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[index] = el; +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(buf == 0 || index == 0); + + brw->attribs.Constants[shader] = buf; + brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_textures && + !memcmp(brw->attribs.Texture, texture, num * + sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + texture[i]); + + for (i = num; i < brw->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + NULL); + + brw->num_textures = num; + + brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FrameBuffer = *fb; /* struct copy */ + + brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + + /* Also pass-through to draw module: + */ + //draw_set_rasterizer_state(brw->draw, setup); + + brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ + brw->pipe.create_blend_state = brw_create_blend_state; + brw->pipe.bind_blend_state = brw_bind_blend_state; + brw->pipe.delete_blend_state = brw_delete_blend_state; + + brw->pipe.create_sampler_state = brw_create_sampler_state; + brw->pipe.bind_sampler_states = brw_bind_sampler_states; + brw->pipe.delete_sampler_state = brw_delete_sampler_state; + + brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + + brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; + brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; + brw->pipe.create_fs_state = brw_create_fs_state; + brw->pipe.bind_fs_state = brw_bind_fs_state; + brw->pipe.delete_fs_state = brw_delete_fs_state; + brw->pipe.create_vs_state = brw_create_vs_state; + brw->pipe.bind_vs_state = brw_bind_vs_state; + brw->pipe.delete_vs_state = brw_delete_vs_state; + + brw->pipe.set_blend_color = brw_set_blend_color; + brw->pipe.set_clip_state = brw_set_clip_state; + brw->pipe.set_constant_buffer = brw_set_constant_buffer; + brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +// brw->pipe.set_feedback_state = brw_set_feedback_state; +// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + + brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; + brw->pipe.set_scissor_state = brw_set_scissor_state; + brw->pipe.set_sampler_textures = brw_set_sampler_textures; + brw->pipe.set_viewport_state = brw_set_viewport_state; + brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; + brw->pipe.set_vertex_element = brw_set_vertex_element; +} diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c index de42ffc5b1e..ff5ba7e7c7a 100644 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -235,7 +235,8 @@ static void upload_wm_samplers(struct brw_context *brw) unsigned sampler_count = 0; /* BRW_NEW_SAMPLER */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ if (brw->attribs.Texture[unit]) { const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index d16d919bce9..853c743ccf4 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -237,7 +237,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) /* BRW_NEW_TEXTURE */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) { const struct brw_texture *texUnit = brw->attribs.Texture[i]; if (texUnit && diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index fa16ed94e8d..316ae552b8e 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -1,243 +1,243 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_util.h" -#include "sp_clear.h" -#include "sp_context.h" -#include "sp_flush.h" -#include "sp_prim_setup.h" -#include "sp_prim_vbuf.h" -#include "sp_state.h" -#include "sp_surface.h" -#include "sp_tile_cache.h" -#include "sp_texture.h" -#include "sp_winsys.h" -#include "sp_query.h" - - - -/** - * Map any drawing surfaces which aren't already mapped - */ -void -softpipe_map_surfaces(struct softpipe_context *sp) -{ - unsigned i; - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); - } - - sp_tile_cache_map_surfaces(sp->zsbuf_cache); -} - - -/** - * Unmap any mapped drawing surfaces - */ -void -softpipe_unmap_surfaces(struct softpipe_context *sp) -{ - uint i; - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) - sp_flush_tile_cache(sp, sp->cbuf_cache[i]); - sp_flush_tile_cache(sp, sp->zsbuf_cache); - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); - } - sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); -} - - -static void softpipe_destroy( struct pipe_context *pipe ) -{ - struct softpipe_context *softpipe = softpipe_context( pipe ); - struct pipe_winsys *ws = pipe->winsys; - uint i; - - draw_destroy( softpipe->draw ); - - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); - softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.colormask->destroy( softpipe->quad.colormask ); - softpipe->quad.output->destroy( softpipe->quad.output ); - - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - sp_destroy_tile_cache(softpipe->cbuf_cache[i]); - sp_destroy_tile_cache(softpipe->zsbuf_cache); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - sp_destroy_tile_cache(softpipe->tex_cache[i]); - - for (i = 0; i < Elements(softpipe->constants); i++) { - if (softpipe->constants[i].buffer) { - pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); - } - } - - FREE( softpipe ); -} - - -struct pipe_context * -softpipe_create( struct pipe_screen *screen, - struct pipe_winsys *pipe_winsys, - struct softpipe_winsys *softpipe_winsys ) -{ - struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); - uint i; - -#if defined(__i386__) || defined(__386__) - softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; -#else - softpipe->use_sse = FALSE; -#endif - - softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; - - softpipe->pipe.winsys = pipe_winsys; - softpipe->pipe.screen = screen; - softpipe->pipe.destroy = softpipe_destroy; - - /* state setters */ - softpipe->pipe.create_blend_state = softpipe_create_blend_state; - softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; - softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; - - softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_sampler_state = softpipe_bind_sampler_state; - softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; - - softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; - softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; - softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; - - softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; - softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; - softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; - - softpipe->pipe.create_fs_state = softpipe_create_fs_state; - softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; - softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; - - softpipe->pipe.create_vs_state = softpipe_create_vs_state; - softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; - softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; - - softpipe->pipe.set_blend_color = softpipe_set_blend_color; - softpipe->pipe.set_clip_state = softpipe_set_clip_state; - softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; - softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; - softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; - softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_sampler_texture = softpipe_set_sampler_texture; - softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - - softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; - softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; - - softpipe->pipe.draw_arrays = softpipe_draw_arrays; - softpipe->pipe.draw_elements = softpipe_draw_elements; - - softpipe->pipe.clear = softpipe_clear; - softpipe->pipe.flush = softpipe_flush; - - softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( softpipe ); - - /* - * Alloc caches for accessing drawing surfaces and textures. - * Must be before quad stage setup! - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - softpipe->cbuf_cache[i] = sp_create_tile_cache(); - softpipe->zsbuf_cache = sp_create_tile_cache(); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tile_cache(); - - - /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); - softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad.output = sp_quad_output_stage(softpipe); - - softpipe->winsys = softpipe_winsys; - - /* - * Create drawing context and plug our rendering stage into it. - */ - softpipe->draw = draw_create(); - assert(softpipe->draw); - softpipe->setup = sp_draw_render_stage(softpipe); - - if (GETENV( "SP_VBUF" ) != NULL) { - sp_init_vbuf(softpipe); - } - else { - draw_set_rasterize_stage(softpipe->draw, softpipe->setup); - } - - /* plug in AA line/point stages */ - draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); - draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); - -#if USE_DRAW_STAGE_PSTIPPLE - /* Do polygon stipple w/ texture map + frag prog? */ - draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); -#endif - - sp_init_surface_functions(softpipe); - - return &softpipe->pipe; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_flush.h" +#include "sp_prim_setup.h" +#include "sp_prim_vbuf.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_query.h" + + + +/** + * Map any drawing surfaces which aren't already mapped + */ +void +softpipe_map_surfaces(struct softpipe_context *sp) +{ + unsigned i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); + } + + sp_tile_cache_map_surfaces(sp->zsbuf_cache); +} + + +/** + * Unmap any mapped drawing surfaces + */ +void +softpipe_unmap_surfaces(struct softpipe_context *sp) +{ + uint i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); + } + sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); +} + + +static void softpipe_destroy( struct pipe_context *pipe ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct pipe_winsys *ws = pipe->winsys; + uint i; + + draw_destroy( softpipe->draw ); + + softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); + softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); + softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); + softpipe->quad.coverage->destroy( softpipe->quad.coverage ); + softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.colormask->destroy( softpipe->quad.colormask ); + softpipe->quad.output->destroy( softpipe->quad.output ); + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + sp_destroy_tile_cache(softpipe->zsbuf_cache); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + sp_destroy_tile_cache(softpipe->tex_cache[i]); + + for (i = 0; i < Elements(softpipe->constants); i++) { + if (softpipe->constants[i].buffer) { + pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + } + } + + FREE( softpipe ); +} + + +struct pipe_context * +softpipe_create( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct softpipe_winsys *softpipe_winsys ) +{ + struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); + uint i; + +#if defined(__i386__) || defined(__386__) + softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; +#else + softpipe->use_sse = FALSE; +#endif + + softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; + + softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.screen = screen; + softpipe->pipe.destroy = softpipe_destroy; + + /* state setters */ + softpipe->pipe.create_blend_state = softpipe_create_blend_state; + softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; + softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; + + softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; + softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; + + softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; + softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; + softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; + + softpipe->pipe.create_fs_state = softpipe_create_fs_state; + softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; + softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; + + softpipe->pipe.create_vs_state = softpipe_create_vs_state; + softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; + softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + + softpipe->pipe.set_blend_color = softpipe_set_blend_color; + softpipe->pipe.set_clip_state = softpipe_set_clip_state; + softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; + softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; + softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; + softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; + + softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; + softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; + + softpipe->pipe.draw_arrays = softpipe_draw_arrays; + softpipe->pipe.draw_elements = softpipe_draw_elements; + + softpipe->pipe.clear = softpipe_clear; + softpipe->pipe.flush = softpipe_flush; + + softpipe_init_query_funcs( softpipe ); + softpipe_init_texture_funcs( softpipe ); + + /* + * Alloc caches for accessing drawing surfaces and textures. + * Must be before quad stage setup! + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + softpipe->cbuf_cache[i] = sp_create_tile_cache(); + softpipe->zsbuf_cache = sp_create_tile_cache(); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + softpipe->tex_cache[i] = sp_create_tile_cache(); + + + /* setup quad rendering stages */ + softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); + softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad.output = sp_quad_output_stage(softpipe); + + softpipe->winsys = softpipe_winsys; + + /* + * Create drawing context and plug our rendering stage into it. + */ + softpipe->draw = draw_create(); + assert(softpipe->draw); + softpipe->setup = sp_draw_render_stage(softpipe); + + if (GETENV( "SP_VBUF" ) != NULL) { + sp_init_vbuf(softpipe); + } + else { + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + } + + /* plug in AA line/point stages */ + draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + + sp_init_surface_functions(softpipe); + + return &softpipe->pipe; +} diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index feeafc70846..19e6cfaf02e 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -81,6 +81,9 @@ struct softpipe_context { struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; unsigned dirty; + unsigned num_samplers; + unsigned num_textures; + /* Counter for occlusion queries. Note this supports overlapping * queries. */ diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 1fbb2e38c4e..9198198db3d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -1,208 +1,209 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_headers.h" -#include "sp_quad.h" -#include "sp_texture.h" -#include "sp_tex_sample.h" - - -struct quad_shade_stage -{ - struct quad_stage stage; - struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; - struct tgsi_exec_machine machine; - struct tgsi_exec_vector *inputs, *outputs; - int colorOutSlot, depthOutSlot; -}; - - -/** cast wrapper */ -static INLINE struct quad_shade_stage * -quad_shade_stage(struct quad_stage *qs) -{ - return (struct quad_shade_stage *) qs; -} - - - -/** - * Execute fragment shader for the four fragments in the quad. - */ -static void -shade_quad( - struct quad_stage *qs, - struct quad_header *quad ) -{ - struct quad_shade_stage *qss = quad_shade_stage( qs ); - struct softpipe_context *softpipe = qs->softpipe; - struct tgsi_exec_machine *machine = &qss->machine; - - /* Consts do not require 16 byte alignment. */ - machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; - - machine->InterpCoefs = quad->coef; - - /* run shader */ - quad->mask &= softpipe->fs->run( softpipe->fs, - &qss->machine, - quad ); - - /* store result color */ - if (qss->colorOutSlot >= 0) { - /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - memcpy( - quad->outputs.color, - &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], - sizeof( quad->outputs.color ) ); - } - - /* - * XXX the following code for updating quad->outputs.depth - * isn't really needed if we did early z testing. - */ - - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; - } - } - else { - /* copy input Z (which was interpolated by the executor) to output Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; - /* XXX not sure the above line is always correct. The following - * might be better: - quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; - */ - } - } - - /* shader may cull fragments */ - if( quad->mask ) { - qs->next->run( qs->next, quad ); - } -} - -/** - * Per-primitive (or per-begin?) setup - */ -static void shade_begin(struct quad_stage *qs) -{ - struct quad_shade_stage *qss = quad_shade_stage(qs); - struct softpipe_context *softpipe = qs->softpipe; - unsigned i; - - /* set TGSI sampler state that varies */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - qss->samplers[i].state = softpipe->sampler[i]; - qss->samplers[i].texture = softpipe->texture[i]; - } - - /* find output slots for depth, color */ - qss->colorOutSlot = -1; - qss->depthOutSlot = -1; - for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { - switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - qss->depthOutSlot = i; - break; - case TGSI_SEMANTIC_COLOR: - qss->colorOutSlot = i; - break; - } - } - - softpipe->fs->prepare( softpipe->fs, - &qss->machine, - qss->samplers ); - - qs->next->begin(qs->next); -} - - -static void shade_destroy(struct quad_stage *qs) -{ - struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; - - tgsi_exec_machine_free_data(&qss->machine); - FREE( qss->inputs ); - FREE( qss->outputs ); - FREE( qs ); -} - - -struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) -{ - struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); - uint i; - - /* allocate storage for program inputs/outputs, aligned to 16 bytes */ - qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); - qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); - qss->machine.Inputs = align16(qss->inputs); - qss->machine.Outputs = align16(qss->outputs); - - qss->stage.softpipe = softpipe; - qss->stage.begin = shade_begin; - qss->stage.run = shade_quad; - qss->stage.destroy = shade_destroy; - - /* set TGSI sampler state that's constant */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - assert(softpipe->tex_cache[i]); - qss->samplers[i].get_samples = sp_get_samples; - qss->samplers[i].pipe = &softpipe->pipe; - qss->samplers[i].cache = softpipe->tex_cache[i]; - } - - tgsi_exec_machine_init( &qss->machine ); - - return &qss->stage; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" + + +struct quad_shade_stage +{ + struct quad_stage stage; + struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; + struct tgsi_exec_machine machine; + struct tgsi_exec_vector *inputs, *outputs; + int colorOutSlot, depthOutSlot; +}; + + +/** cast wrapper */ +static INLINE struct quad_shade_stage * +quad_shade_stage(struct quad_stage *qs) +{ + return (struct quad_shade_stage *) qs; +} + + + +/** + * Execute fragment shader for the four fragments in the quad. + */ +static void +shade_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = &qss->machine; + + /* Consts do not require 16 byte alignment. */ + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + + machine->InterpCoefs = quad->coef; + + /* run shader */ + quad->mask &= softpipe->fs->run( softpipe->fs, + &qss->machine, + quad ); + + /* store result color */ + if (qss->colorOutSlot >= 0) { + /* XXX need to handle multiple color outputs someday */ + assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + memcpy( + quad->outputs.color, + &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], + sizeof( quad->outputs.color ) ); + } + + /* + * XXX the following code for updating quad->outputs.depth + * isn't really needed if we did early z testing. + */ + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; + /* XXX not sure the above line is always correct. The following + * might be better: + quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; + */ + } + } + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} + +/** + * Per-primitive (or per-begin?) setup + */ +static void shade_begin(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + unsigned i; + unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers); + + /* set TGSI sampler state that varies */ + for (i = 0; i < num; i++) { + qss->samplers[i].state = softpipe->sampler[i]; + qss->samplers[i].texture = softpipe->texture[i]; + } + + /* find output slots for depth, color */ + qss->colorOutSlot = -1; + qss->depthOutSlot = -1; + for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { + switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + qss->depthOutSlot = i; + break; + case TGSI_SEMANTIC_COLOR: + qss->colorOutSlot = i; + break; + } + } + + softpipe->fs->prepare( softpipe->fs, + &qss->machine, + qss->samplers ); + + qs->next->begin(qs->next); +} + + +static void shade_destroy(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; + + tgsi_exec_machine_free_data(&qss->machine); + FREE( qss->inputs ); + FREE( qss->outputs ); + FREE( qs ); +} + + +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) +{ + struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); + uint i; + + /* allocate storage for program inputs/outputs, aligned to 16 bytes */ + qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); + qss->machine.Inputs = align16(qss->inputs); + qss->machine.Outputs = align16(qss->outputs); + + qss->stage.softpipe = softpipe; + qss->stage.begin = shade_begin; + qss->stage.run = shade_quad; + qss->stage.destroy = shade_destroy; + + /* set TGSI sampler state that's constant */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + assert(softpipe->tex_cache[i]); + qss->samplers[i].get_samples = sp_get_samples; + qss->samplers[i].pipe = &softpipe->pipe; + qss->samplers[i].cache = softpipe->tex_cache[i]; + } + + tgsi_exec_machine_init( &qss->machine ); + + return &qss->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 3943d4ed2b5..45d159143f7 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -1,195 +1,195 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef SP_STATE_H -#define SP_STATE_H - -#include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" - - -#define SP_NEW_VIEWPORT 0x1 -#define SP_NEW_RASTERIZER 0x2 -#define SP_NEW_FS 0x4 -#define SP_NEW_BLEND 0x8 -#define SP_NEW_CLIP 0x10 -#define SP_NEW_SCISSOR 0x20 -#define SP_NEW_STIPPLE 0x40 -#define SP_NEW_FRAMEBUFFER 0x80 -#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 -#define SP_NEW_CONSTANTS 0x200 -#define SP_NEW_SAMPLER 0x400 -#define SP_NEW_TEXTURE 0x800 -#define SP_NEW_VERTEX 0x1000 -#define SP_NEW_VS 0x2000 -#define SP_NEW_QUERY 0x4000 - - -struct tgsi_sampler; -struct tgsi_exec_machine; - - -/** Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ -struct sp_fragment_shader { - struct pipe_shader_state shader; - - struct tgsi_shader_info info; - - void (*prepare)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers); - - /* Run the shader - this interface will get cleaned up in the - * future: - */ - unsigned (*run)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct quad_header *quad ); - - - void (*delete)( struct sp_fragment_shader * ); -}; - -struct vertex_info; - -/** Subclass of pipe_shader_state */ -struct sp_vertex_shader { - struct pipe_shader_state shader; - struct draw_vertex_shader *draw_data; -}; - - - -void * -softpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void softpipe_bind_blend_state(struct pipe_context *, - void *); -void softpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -softpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void softpipe_bind_sampler_state(struct pipe_context *, unsigned, void *); -void softpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -softpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -softpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void softpipe_bind_rasterizer_state(struct pipe_context *, void *); -void softpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void softpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void softpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void softpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void softpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - const struct pipe_constant_buffer *buf); - -void *softpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_fs_state(struct pipe_context *, void *); -void softpipe_delete_fs_state(struct pipe_context *, void *); -void *softpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_vs_state(struct pipe_context *, void *); -void softpipe_delete_vs_state(struct pipe_context *, void *); - -void softpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void softpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void softpipe_set_sampler_texture( struct pipe_context *, - unsigned unit, - struct pipe_texture * ); - -void softpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void softpipe_set_vertex_element(struct pipe_context *, - unsigned index, - const struct pipe_vertex_element *); - -void softpipe_set_vertex_buffer(struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer *); - - -void softpipe_update_derived( struct softpipe_context *softpipe ); - - -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); - - -void -softpipe_map_surfaces(struct softpipe_context *sp); - -void -softpipe_unmap_surfaces(struct softpipe_context *sp); - -void -softpipe_map_texture_surfaces(struct softpipe_context *sp); - -void -softpipe_unmap_texture_surfaces(struct softpipe_context *sp); - - -struct vertex_info * -softpipe_get_vertex_info(struct softpipe_context *softpipe); - -struct vertex_info * -softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); - - -#endif +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_STATE_H +#define SP_STATE_H + +#include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" + + +#define SP_NEW_VIEWPORT 0x1 +#define SP_NEW_RASTERIZER 0x2 +#define SP_NEW_FS 0x4 +#define SP_NEW_BLEND 0x8 +#define SP_NEW_CLIP 0x10 +#define SP_NEW_SCISSOR 0x20 +#define SP_NEW_STIPPLE 0x40 +#define SP_NEW_FRAMEBUFFER 0x80 +#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define SP_NEW_CONSTANTS 0x200 +#define SP_NEW_SAMPLER 0x400 +#define SP_NEW_TEXTURE 0x800 +#define SP_NEW_VERTEX 0x1000 +#define SP_NEW_VS 0x2000 +#define SP_NEW_QUERY 0x4000 + + +struct tgsi_sampler; +struct tgsi_exec_machine; + + +/** Subclass of pipe_shader_state (though it doesn't really need to be). + * + * This is starting to look an awful lot like a quad pipeline stage... + */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + + struct tgsi_shader_info info; + + void (*prepare)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler *samplers); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + unsigned (*run)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad ); + + + void (*delete)( struct sp_fragment_shader * ); +}; + +struct vertex_info; + +/** Subclass of pipe_shader_state */ +struct sp_vertex_shader { + struct pipe_shader_state shader; + struct draw_vertex_shader *draw_data; +}; + + + +void * +softpipe_create_blend_state(struct pipe_context *, + const struct pipe_blend_state *); +void softpipe_bind_blend_state(struct pipe_context *, + void *); +void softpipe_delete_blend_state(struct pipe_context *, + void *); + +void * +softpipe_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); +void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void softpipe_delete_sampler_state(struct pipe_context *, void *); + +void * +softpipe_create_depth_stencil_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); +void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); +void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); + +void * +softpipe_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void softpipe_bind_rasterizer_state(struct pipe_context *, void *); +void softpipe_delete_rasterizer_state(struct pipe_context *, void *); + +void softpipe_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + +void softpipe_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void softpipe_set_constant_buffer(struct pipe_context *, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void *softpipe_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_fs_state(struct pipe_context *, void *); +void softpipe_delete_fs_state(struct pipe_context *, void *); +void *softpipe_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_vs_state(struct pipe_context *, void *); +void softpipe_delete_vs_state(struct pipe_context *, void *); + +void softpipe_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void softpipe_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void softpipe_set_sampler_textures( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); + +void softpipe_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + +void softpipe_set_vertex_element(struct pipe_context *, + unsigned index, + const struct pipe_vertex_element *); + +void softpipe_set_vertex_buffer(struct pipe_context *, + unsigned index, + const struct pipe_vertex_buffer *); + + +void softpipe_update_derived( struct softpipe_context *softpipe ); + + +boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + +void +softpipe_map_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_surfaces(struct softpipe_context *sp); + +void +softpipe_map_texture_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_texture_surfaces(struct softpipe_context *sp); + + +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe); + +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 1d6dd17d1d9..7cf85b9207b 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -50,45 +50,67 @@ softpipe_create_sampler_state(struct pipe_context *pipe, return mem_dup(sampler, sizeof(*sampler)); } + void -softpipe_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +softpipe_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct softpipe_context *softpipe = softpipe_context(pipe); + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_samplers && + !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) + return; + draw_flush(softpipe->draw); - assert(unit < PIPE_MAX_SAMPLERS); - softpipe->sampler[unit] = (struct pipe_sampler_state *)sampler; + memcpy(softpipe->sampler, sampler, num * sizeof(void *)); + memset(&softpipe->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + + softpipe->num_samplers = num; softpipe->dirty |= SP_NEW_SAMPLER; } void -softpipe_delete_sampler_state(struct pipe_context *pipe, - void *sampler) +softpipe_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { - FREE( sampler ); -} + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + assert(num <= PIPE_MAX_SAMPLERS); -void -softpipe_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); + /* Check for no-op */ + if (num == softpipe->num_textures && + !memcmp(softpipe->texture, texture, num * sizeof(struct pipe_texture *))) + return; draw_flush(softpipe->draw); - assert(unit < PIPE_MAX_SAMPLERS); - pipe_texture_reference(&softpipe->texture[unit], texture); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + pipe_texture_reference(&softpipe->texture[i], tex); + sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex); + } - sp_tile_cache_set_texture(pipe, softpipe->tex_cache[unit], texture); + softpipe->num_textures = num; softpipe->dirty |= SP_NEW_TEXTURE; } +void +softpipe_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index c605ed925b2..bbf2d80308e 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -1,202 +1,202 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Michel Dänzer <michel@tungstengraphics.com> - */ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_util.h" -#include "pipe/p_winsys.h" - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_texture.h" -#include "sp_tile_cache.h" - - -/* Simple, maximally packed layout. - */ - -static unsigned minify( unsigned d ) -{ - return MAX2(1, d>>1); -} - - -static void -softpipe_texture_layout(struct softpipe_texture * spt) -{ - struct pipe_texture *pt = &spt->base; - unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - - spt->buffer_size = 0; - - for (level = 0; level <= pt->last_level; level++) { - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; - - spt->level_offset[level] = spt->buffer_size; - - spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - width * pt->cpp; - - width = minify(width); - height = minify(height); - depth = minify(depth); - } -} - - -static struct pipe_texture * -softpipe_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) -{ - struct pipe_winsys *ws = screen->winsys; - struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); - if (!spt) - return NULL; - - spt->base = *templat; - spt->base.refcount = 1; - spt->base.screen = screen; - - softpipe_texture_layout(spt); - - spt->buffer = ws->buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); - if (!spt->buffer) { - FREE(spt); - return NULL; - } - - assert(spt->base.refcount == 1); - - return &spt->base; -} - - -static void -softpipe_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) -{ - if (!*pt) - return; - - /* - DBG("%s %p refcount will be %d\n", - __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); - */ - if (--(*pt)->refcount <= 0) { - struct softpipe_texture *spt = softpipe_texture(*pt); - - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); - */ - - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); - - FREE(spt); - } - *pt = NULL; -} - - -static struct pipe_surface * -softpipe_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = screen->winsys; - struct softpipe_texture *spt = softpipe_texture(pt); - struct pipe_surface *ps; - - assert(level <= pt->last_level); - - ps = ws->surface_alloc(ws); - if (ps) { - assert(ps->refcount); - assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = ps->width; - ps->offset = spt->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { - ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - (pt->compressed ? ps->height/4 : ps->height) * - ps->width * ps->cpp; - } - else { - assert(face == 0); - assert(zslice == 0); - } - } - return ps; -} - - -static void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); - uint unit; - for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (softpipe->texture[unit] == texture) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); - } - } -} - - -void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ) -{ - softpipe->pipe.texture_update = softpipe_texture_update; -} - - -void -softpipe_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->texture_create = softpipe_texture_create_screen; - screen->texture_release = softpipe_texture_release_screen; - screen->get_tex_surface = softpipe_get_tex_surface_screen; -} +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +softpipe_texture_layout(struct softpipe_texture * spt) +{ + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + spt->buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + + spt->level_offset[level] = spt->buffer_size; + + spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +static struct pipe_texture * +softpipe_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *templat; + spt->base.refcount = 1; + spt->base.screen = screen; + + softpipe_texture_layout(spt); + + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + if (!spt->buffer) { + FREE(spt); + return NULL; + } + + assert(spt->base.refcount == 1); + + return &spt->base; +} + + +static void +softpipe_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct softpipe_texture *spt = softpipe_texture(*pt); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + */ + + pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + + FREE(spt); + } + *pt = NULL; +} + + +static struct pipe_surface * +softpipe_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct softpipe_texture *spt = softpipe_texture(pt); + struct pipe_surface *ps; + + assert(level <= pt->last_level); + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = spt->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + (pt->compressed ? ps->height/4 : ps->height) * + ps->width * ps->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + +static void +softpipe_texture_update(struct pipe_context *pipe, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint unit; + for (unit = 0; unit < softpipe->num_textures; unit++) { + if (softpipe->texture[unit] == texture) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); + } + } +} + + +void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ) +{ + softpipe->pipe.texture_update = softpipe_texture_update; +} + + +void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = softpipe_texture_create_screen; + screen->texture_release = softpipe_texture_release_screen; + screen->get_tex_surface = softpipe_get_tex_surface_screen; +} diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 1501b52f3e5..b64948f0f3b 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -100,7 +100,7 @@ struct pipe_context { void * (*create_sampler_state)(struct pipe_context *, const struct pipe_sampler_state *); - void (*bind_sampler_state)(struct pipe_context *, unsigned unit, void *); + void (*bind_sampler_states)(struct pipe_context *, unsigned num, void **); void (*delete_sampler_state)(struct pipe_context *, void *); void * (*create_rasterizer_state)(struct pipe_context *, @@ -148,9 +148,9 @@ struct pipe_context { /* Currently a sampler is constrained to sample from a single texture: */ - void (*set_sampler_texture)( struct pipe_context *, - unsigned sampler, - struct pipe_texture * ); + void (*set_sampler_textures)( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); void (*set_viewport_state)( struct pipe_context *, const struct pipe_viewport_state * ); diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 92263cb6881..1000f98ffc0 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -120,10 +120,11 @@ update_samplers(struct st_context *st) const struct st_fragment_program *fs = st->fp; GLuint su; + st->state.num_samplers = 0; + /* loop over sampler units (aka tex image units) */ for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { struct pipe_sampler_state sampler; - const struct cso_sampler *cso; memset(&sampler, 0, sizeof(sampler)); @@ -168,17 +169,16 @@ update_samplers(struct st_context *st) = st_compare_func_to_pipe(texobj->CompareFunc); } + st->state.num_samplers = su + 1; + /* XXX more sampler state here */ } - cso = st_cached_sampler_state(st, &sampler); - - if (cso != st->state.sampler[su]) { - /* state has changed */ - st->state.sampler[su] = cso; - st->pipe->bind_sampler_state(st->pipe, su, cso->data); - } + st->state.sampler[su] = st_cached_sampler_state(st, &sampler)->data; } + + st->pipe->bind_sampler_states(st->pipe, st->state.num_samplers, + st->state.sampler); } diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 697d2cdfb44..e53a897637c 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -37,6 +37,7 @@ #include "st_texture.h" #include "st_cb_texture.h" #include "pipe/p_context.h" +#include "pipe/p_inlines.h" /** @@ -51,6 +52,8 @@ update_textures(struct st_context *st) struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; GLuint unit; + st->state.num_textures = 0; + for (unit = 0; unit < st->ctx->Const.MaxTextureCoordUnits; unit++) { const GLuint su = fprog->Base.SamplerUnits[unit]; struct gl_texture_object *texObj = st->ctx->Texture.Unit[su]._Current; @@ -62,6 +65,8 @@ update_textures(struct st_context *st) retval = st_finalize_texture(st->ctx, st->pipe, texObj, &flush); /* XXX retval indicates whether there's a texture border */ + + st->state.num_textures = unit + 1; } /* XXX: need to ensure that textures are unbound/removed from @@ -70,18 +75,16 @@ update_textures(struct st_context *st) */ pt = st_get_stobj_texture(stObj); - - if (st->state.sampler_texture[unit] != pt) { - st->state.sampler_texture[unit] = pt; - st->pipe->set_sampler_texture(st->pipe, unit, pt); - } + pipe_texture_reference(&st->state.sampler_texture[unit], pt); if (stObj && stObj->dirtyData) { st->pipe->texture_update(st->pipe, pt); stObj->dirtyData = GL_FALSE; } - } + + st->pipe->set_sampler_textures(st->pipe, st->state.num_textures, + st->state.sampler_texture); } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 65c9fda9cb6..dee4c4132a4 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -641,7 +641,6 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, const GLfloat *color, GLboolean invertTex) { - const GLuint unit = 0; struct pipe_context *pipe = ctx->st->pipe; GLfloat x0, y0, x1, y1; GLuint maxSize; @@ -684,7 +683,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.normalized_coords = 1; cso = st_cached_sampler_state(ctx->st, &sampler); - pipe->bind_sampler_state(pipe, unit, cso->data); + pipe->bind_sampler_states(pipe, 1, (void**)&cso->data); } /* viewport state: viewport matching window dims */ @@ -705,7 +704,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, /* texture state: */ { - pipe->set_sampler_texture(pipe, unit, pt); + pipe->set_sampler_textures(pipe, 1, &pt); } /* Compute window coords (y=0=bottom) with pixel zoom. @@ -727,8 +726,10 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, pipe->bind_rasterizer_state(pipe, ctx->st->state.rasterizer->data); pipe->bind_fs_state(pipe, ctx->st->state.fs->data); pipe->bind_vs_state(pipe, ctx->st->state.vs->cso->data); - pipe->set_sampler_texture(pipe, unit, ctx->st->state.sampler_texture[unit]); - pipe->bind_sampler_state(pipe, unit, ctx->st->state.sampler[unit]->data); + pipe->set_sampler_textures(pipe, ctx->st->state.num_textures, + ctx->st->state.sampler_texture); + pipe->bind_sampler_states(pipe, ctx->st->state.num_samplers, + ctx->st->state.sampler); pipe->set_viewport_state(pipe, &ctx->st->state.viewport); } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 1fbf9721e79..897a5109b7e 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -76,7 +76,7 @@ struct st_context struct { const struct cso_alpha_test *alpha_test; const struct cso_blend *blend; - const struct cso_sampler *sampler[PIPE_MAX_SAMPLERS]; + void *sampler[PIPE_MAX_SAMPLERS]; const struct cso_depth_stencil_alpha *depth_stencil; const struct cso_rasterizer *rasterizer; const struct cso_fragment_shader *fs; @@ -90,6 +90,9 @@ struct st_context struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; + + GLuint num_samplers; + GLuint num_textures; } state; struct { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 841d77abbc7..3723e26d45d 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -284,7 +284,7 @@ st_render_mipmap(struct st_context *st, */ sampler.min_lod = sampler.max_lod = srcLevel; sampler_cso = pipe->create_sampler_state(pipe, &sampler); - pipe->bind_sampler_state(pipe, 0, sampler_cso); + pipe->bind_sampler_states(pipe, 1, &sampler_cso); simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); @@ -293,7 +293,7 @@ st_render_mipmap(struct st_context *st, * the right mipmap level. */ /*pt->first_level = srcLevel;*/ - pipe->set_sampler_texture(pipe, 0, pt); + pipe->set_sampler_textures(pipe, 1, &pt); draw_quad(st->ctx); @@ -310,9 +310,10 @@ st_render_mipmap(struct st_context *st, pipe->bind_fs_state(pipe, st->state.fs->data); if (st->state.vs) pipe->bind_vs_state(pipe, st->state.vs->cso->data); - if (st->state.sampler[0]) - pipe->bind_sampler_state(pipe, 0, st->state.sampler[0]->data); - pipe->set_sampler_texture(pipe, 0, st->state.sampler_texture[0]); + pipe->bind_sampler_states(pipe, st->state.num_samplers, + st->state.sampler); + pipe->set_sampler_textures(pipe, st->state.num_textures, + st->state.sampler_texture); pipe->set_viewport_state(pipe, &st->state.viewport); return TRUE; -- cgit v1.2.3 From 82f22d9e147ed55c2ca513ebc2d069e197d36ea8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 6 Mar 2008 11:52:25 +1100 Subject: nv30: a couple of vtxprog fixes --- src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_vertprog.c | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 28d3f057cc4..0124f9af0fc 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -319,7 +319,7 @@ nv30_create(struct pipe_screen *screen, unsigned pctx_id) } /* Vtxprog resources */ - if (nvws->res_init(&nv30->vertprog.exec_heap, 0, 512) || + if (nvws->res_init(&nv30->vertprog.exec_heap, 0, 256) || nvws->res_init(&nv30->vertprog.data_heap, 0, 256)) { nv30_destroy(&nv30->pipe); return NULL; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 4a8269d5dd7..548b6907d02 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -225,6 +225,18 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, // hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; // hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); + if (dst.type == NV30SR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + emit_dst(vpc, hw, slot, dst); emit_src(vpc, hw, 0, s0); emit_src(vpc, hw, 1, s1); @@ -752,7 +764,7 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) } #endif BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (/*vp->exec->start*/0); + OUT_RING (vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); OUT_RINGp (vp->insns[i].data, 4); @@ -760,8 +772,7 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) } BEGIN_RING(rankine, NV34TCL_VP_START_FROM_ID, 1); -// OUT_RING (vp->exec->start); - OUT_RING (0); + OUT_RING (vp->exec->start); nv30->vertprog.active = vp; } -- cgit v1.2.3 From ae0e047ba4e05d25d6e0b9b0574e36c7e8ccd510 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 10 Mar 2008 14:27:22 +1100 Subject: nv30: put the card into vtxprog mode + small cleanups/fixes --- src/gallium/drivers/nv30/nv30_context.c | 122 ++----------------------------- src/gallium/drivers/nv30/nv30_vertprog.c | 5 ++ 2 files changed, 10 insertions(+), 117 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 0124f9af0fc..b3906e28e3f 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -133,129 +133,17 @@ nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) BEGIN_RING(rankine, NV34TCL_RC_ENABLE, 1); OUT_RING (0); - /* Attempt to setup a known state.. Probably missing a heap of - * stuff here.. - */ - BEGIN_RING(rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_DEPTH_WRITE_ENABLE, 2); - OUT_RING (0); /* wr disable */ - OUT_RING (0); /* test disable */ - BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1); - OUT_RING (0x01010101); /* TR,TR,TR,TR */ - BEGIN_RING(rankine, NV34TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_ENABLE, 5); - OUT_RING (0); /* Blend enable */ - OUT_RING (0); /* Blend src */ - OUT_RING (0); /* Blend dst */ - OUT_RING (0x00000000); /* Blend colour */ - OUT_RING (0x8006); /* FUNC_ADD */ - BEGIN_RING(rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (0); - OUT_RING (0x1503 /*GL_COPY*/); - BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1); - OUT_RING (1); - BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1); - OUT_RING (0x1d01 /*GL_SMOOTH*/); - BEGIN_RING(rankine, NV34TCL_POLYGON_OFFSET_FACTOR,2); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - BEGIN_RING(rankine, NV34TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (0x1b02 /*GL_FILL*/); - OUT_RING (0x1b02 /*GL_FILL*/); - /* - Disable texture units - * - Set fragprog to MOVR result.color, fragment.color */ - for (i=0;i<16;i++) { - BEGIN_RING(rankine, - NV34TCL_TX_ENABLE(i), 1); - OUT_RING (0); - } - /* Polygon stipple */ - BEGIN_RING(rankine, - NV34TCL_POLYGON_STIPPLE_PATTERN(0), 0x20); - for (i=0;i<0x20;i++) - OUT_RING (0xFFFFFFFF); - - int w=4096; - int h=4096; - int pitch=4096*4; - BEGIN_RING(rankine, NV34TCL_RT_HORIZ, 5); - OUT_RING (w<<16); - OUT_RING (h<<16); - OUT_RING (0x148); /* format */ - OUT_RING (pitch << 16 | pitch); - OUT_RING (0x0); - BEGIN_RING(rankine, 0x0a00, 2); - OUT_RING ((w<<16) | 0); - OUT_RING ((h<<16) | 0); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING ((w-1)<<16); - OUT_RING ((h-1)<<16); - BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); - OUT_RING (w<<16); - OUT_RING (h<<16); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 2); - OUT_RING (w<<16); - OUT_RING (h<<16); - - BEGIN_RING(rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - - BEGIN_RING(rankine, NV34TCL_MODELVIEW_MATRIX(0), 16); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - - BEGIN_RING(rankine, NV34TCL_PROJECTION_MATRIX(0), 16); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); + BEGIN_RING(rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); OUT_RINGf (0.0); OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - - BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); - OUT_RING (4096<<16); - OUT_RING (4096<<16); BEGIN_RING(rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); OUT_RING (0xffff0000); + /* enables use of vp rather than fixed-function somehow */ + BEGIN_RING(rankine, 0x1e94, 1); + OUT_RING (0x13); + FIRE_RING (); return TRUE; } diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 548b6907d02..96bc4b5ef9a 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -193,6 +193,11 @@ emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; break; default: assert(0); -- cgit v1.2.3 From b721bc8792f6add71dede11924d7060bbce72f0e Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 10 Mar 2008 13:04:13 +0000 Subject: gallium: WinCE portability fixes. --- src/gallium/auxiliary/draw/draw_unfilled.c | 2 +- src/gallium/auxiliary/util/p_debug.c | 8 ++++++++ src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 6 +++--- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 2 +- src/gallium/include/pipe/p_util.h | 10 ++++++++++ 6 files changed, 24 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c index 4d718d514c6..b07860cd9ea 100644 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_unfilled.c @@ -129,7 +129,7 @@ static void unfilled_tri( struct draw_stage *stage, points( stage, header ); break; default: - abort(); + assert(0); } } diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 7b3c0309565..6255d716a61 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -57,11 +57,15 @@ int rpl_vsnprintf(char *, size_t, const char *, va_list); void debug_vprintf(const char *format, va_list ap) { #ifdef WIN32 +#ifndef WINCE /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation */ char buf[512 + 1]; rpl_vsnprintf(buf, sizeof(buf), format, ap); rpl_EngDebugPrint("%s", buf); +#else + /* TODO: Implement debug print for WINCE */ +#endif #else vfprintf(stderr, format, ap); #endif @@ -80,7 +84,11 @@ void debug_printf(const char *format, ...) static INLINE void debug_abort(void) { #ifdef WIN32 +#ifndef WINCE EngDebugBreak(); +#else + _asm {int 3}; +#endif #else abort(); #endif diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 4ffeac35e16..318916fe301 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -72,7 +72,7 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) passMask = MASK_ALL; break; default: - abort(); + assert(0); } quad->mask &= passMask; diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 17f3ecd0b83..0b79cfa1ed2 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -392,7 +392,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); /* to do */ break; default: - abort(); + assert(0); } /* @@ -464,7 +464,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) } break; default: - abort(); + assert(0); } @@ -716,7 +716,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ break; default: - abort(); + assert(0); } /* pass blended quad to next stage */ diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index a9a0754f279..a1859f98832 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -185,7 +185,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) zmask = MASK_ALL; break; default: - abort(); + assert(0); } quad->mask &= zmask; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 3b32ba1d248..ef36ce75f7d 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -54,7 +54,12 @@ EngFreeMem( static INLINE void * MALLOC( unsigned size ) { +#ifdef WINCE + /* TODO: Need to abstract this */ + return malloc( size ); +#else return EngAllocMem( 0, size, 'D3AG' ); +#endif } static INLINE void * @@ -71,7 +76,12 @@ static INLINE void FREE( void *ptr ) { if( ptr ) { +#ifdef WINCE + /* TODO: Need to abstract this */ + free( ptr ); +#else EngFreeMem( ptr ); +#endif } } -- cgit v1.2.3 From 5b82d551b7a4954b24059585fea207f3a250ec0f Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 10 Mar 2008 16:43:36 -0600 Subject: cell: sync up with sampler/texture state-setting changes --- src/gallium/drivers/cell/ppu/cell_context.h | 2 ++ src/gallium/drivers/cell/ppu/cell_pipe_state.c | 32 ++++++++++++++++++-------- src/gallium/drivers/cell/ppu/cell_state_emit.c | 6 +++-- 3 files changed, 28 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index bf27289f3f5..c568922cbd4 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -63,6 +63,7 @@ struct cell_context const struct pipe_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + uint num_samplers; const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; const struct cell_vertex_shader_state *vs; @@ -75,6 +76,7 @@ struct cell_context struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct cell_texture *texture[PIPE_MAX_SAMPLERS]; + uint num_textures; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 075e0a0c471..025ed3bbbfe 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -210,15 +210,19 @@ cell_create_sampler_state(struct pipe_context *pipe, static void -cell_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +cell_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **samplers) { struct cell_context *cell = cell_context(pipe); draw_flush(cell->draw); assert(unit < PIPE_MAX_SAMPLERS); - cell->sampler[unit] = (struct pipe_sampler_state *)sampler; + + memcpy(cell->sampler, samplers, num * sizeof(void *)); + memset(&cell->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + cell->num_samplers = num; cell->dirty |= CELL_NEW_SAMPLER; } @@ -234,16 +238,24 @@ cell_delete_sampler_state(struct pipe_context *pipe, static void -cell_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture) +cell_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct cell_context *cell = cell_context(pipe); + uint i; + + /* Check for no-op */ + if (num == cell->num_textures && + !memcmp(cell->texture, texture, num * sizeof(struct pipe_texture *))) + return; draw_flush(cell->draw); - pipe_texture_reference((struct pipe_texture **) &cell->texture[sampler], - texture); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + pipe_texture_reference((struct pipe_texture **) &cell->texture[i], tex); + } cell_update_texture_mapping(cell); @@ -300,10 +312,10 @@ cell_init_state_functions(struct cell_context *cell) cell->pipe.delete_blend_state = cell_delete_blend_state; cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_sampler_state = cell_bind_sampler_state; + cell->pipe.bind_sampler_states = cell_bind_sampler_states; cell->pipe.delete_sampler_state = cell_delete_sampler_state; - cell->pipe.set_sampler_texture = cell_set_sampler_texture; + cell->pipe.set_sampler_textures = cell_set_sampler_textures; cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 49c0d130c52..670eb26bdd0 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -77,8 +77,10 @@ cell_emit_state(struct cell_context *cell) } if (cell->dirty & CELL_NEW_SAMPLER) { - emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, - cell->sampler[0], sizeof(struct pipe_sampler_state)); + if (cell->sampler[0]) { + emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, + cell->sampler[0], sizeof(struct pipe_sampler_state)); + } } if (cell->dirty & CELL_NEW_TEXTURE) { -- cgit v1.2.3 From b1525662b330ca8b4cdd930775f3642bfec3b58f Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Mon, 10 Mar 2008 16:28:54 -0700 Subject: Move SPE register allocator to rtasm code Move the register allocator to a common location. There is more code on the way that will make use of this interface. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 47 +++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 16 ++++ src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 101 +++++++---------------- 3 files changed, 92 insertions(+), 72 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 95a2d6fcbbe..a996218ce72 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -306,6 +306,11 @@ void spe_init_func(struct spe_function *p, unsigned code_size) { p->store = align_malloc(code_size, 16); p->csr = p->store; + + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. + */ + p->regs[0] = ~7; + p->regs[1] = (1U << (80 - 64)) - 1; } @@ -317,6 +322,48 @@ void spe_release_func(struct spe_function *p) } +int spe_allocate_available_register(struct spe_function *p) +{ + unsigned i; + for (i = 0; i < 128; i++) { + const uint64_t mask = (1ULL << (i % 128)); + const unsigned idx = i / 128; + + if ((p->regs[idx] & mask) != 0) { + p->regs[idx] &= ~mask; + return i; + } + } + + return -1; +} + + +int spe_allocate_register(struct spe_function *p, int reg) +{ + const unsigned idx = reg / 128; + const unsigned bit = reg % 128; + + assert((p->regs[idx] & (1ULL << bit)) != 0); + + p->regs[idx] &= ~(1ULL << bit); + return reg; +} + + +void spe_release_register(struct spe_function *p, int reg) +{ + const unsigned idx = reg / 128; + const unsigned bit = reg % 128; + + assert((p->regs[idx] & (1ULL << bit)) == 0); + + p->regs[idx] |= (1ULL << bit); +} + + + + void spe_bi(struct spe_function *p, unsigned rA, int d, int e) { emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 10ce44b3a08..5a1eb1ed8de 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -39,11 +39,27 @@ struct spe_function { uint32_t *store; uint32_t *csr; const char *fn; + + /** + * Mask of used / unused registers + * + * Each set bit corresponds to an available register. Each cleared bit + * corresponds to an allocated register. + * + * \sa + * spe_allocate_register, spe_allocate_available_register, + * spe_release_register + */ + uint64_t regs[2]; }; extern void spe_init_func(struct spe_function *p, unsigned code_size); extern void spe_release_func(struct spe_function *p); +extern int spe_allocate_available_register(struct spe_function *p); +extern int spe_allocate_register(struct spe_function *p, int reg); +extern void spe_release_register(struct spe_function *p, int reg); + #endif /* RTASM_PPC_SPE_H */ #ifndef EMIT_ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 9cf74bab477..4828a8023bd 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -33,46 +33,11 @@ #include "cell_context.h" #include "rtasm/rtasm_ppc_spe.h" -typedef uint64_t register_mask; - -int allocate_available_register(register_mask *m) -{ - unsigned i; - for (i = 0; i < 64; i++) { - const uint64_t mask = (1ULL << i); - - if ((m[0] & mask) != 0) { - m[0] &= ~mask; - return i; - } - } - - return -1; -} - - -int allocate_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) != 0); - - m[0] &= ~(1ULL << reg); - return reg; -} - - -void release_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) == 0); - - m[0] |= (1ULL << reg); -} - /** * Emit a 4x4 matrix transpose operation * * \param p Function that the transpose operation is to be appended to - * \param m Live register mask * \param row0 Register containing row 0 of the source matrix * \param row1 Register containing row 1 of the source matrix * \param row2 Register containing row 2 of the source matrix @@ -91,15 +56,15 @@ void release_register(register_mask *m, unsigned reg) * This function requires that four temporary are available on entry. */ static void -emit_matrix_transpose(struct spe_function *p, register_mask *m, +emit_matrix_transpose(struct spe_function *p, unsigned row0, unsigned row1, unsigned row2, unsigned row3, unsigned dest_ptr, unsigned shuf_ptr, unsigned count) { - int shuf_hi = allocate_available_register(m); - int shuf_lo = allocate_available_register(m); - int t1 = allocate_available_register(m); - int t2 = allocate_available_register(m); + int shuf_hi = spe_allocate_available_register(p); + int shuf_lo = spe_allocate_available_register(p); + int t1 = spe_allocate_available_register(p); + int t2 = spe_allocate_available_register(p); int t3; int t4; int col0; @@ -169,19 +134,19 @@ emit_matrix_transpose(struct spe_function *p, register_mask *m, /* Release all of the temporary registers used. */ - release_register(m, col0); - release_register(m, col1); - release_register(m, col2); - release_register(m, col3); - release_register(m, shuf_hi); - release_register(m, shuf_lo); - release_register(m, t2); - release_register(m, t4); + spe_release_register(p, col0); + spe_release_register(p, col1); + spe_release_register(p, col2); + spe_release_register(p, col3); + spe_release_register(p, shuf_hi); + spe_release_register(p, shuf_lo); + spe_release_register(p, t2); + spe_release_register(p, t4); } static void -emit_fetch(struct spe_function *p, register_mask *m, +emit_fetch(struct spe_function *p, unsigned in_ptr, unsigned *offset, unsigned out_ptr, unsigned shuf_ptr, enum pipe_format format) @@ -191,11 +156,11 @@ emit_fetch(struct spe_function *p, register_mask *m, const unsigned type = pf_type(format); const unsigned bytes = pf_size_x(format); - int v0 = allocate_available_register(m); - int v1 = allocate_available_register(m); - int v2 = allocate_available_register(m); - int v3 = allocate_available_register(m); - int tmp = allocate_available_register(m); + int v0 = spe_allocate_available_register(p); + int v1 = spe_allocate_available_register(p); + int v2 = spe_allocate_available_register(p); + int v3 = spe_allocate_available_register(p); + int tmp = spe_allocate_available_register(p); int float_zero = -1; int float_one = -1; float scale_signed = 0.0; @@ -260,19 +225,19 @@ emit_fetch(struct spe_function *p, register_mask *m, if (count < 4) { - float_one = allocate_available_register(m); + float_one = spe_allocate_available_register(p); spe_il(p, float_one, 1); spe_cuflt(p, float_one, float_one, 0); if (count < 3) { - float_zero = allocate_available_register(m); + float_zero = spe_allocate_available_register(p); spe_il(p, float_zero, 0); } } - release_register(m, tmp); + spe_release_register(p, tmp); - emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); switch (count) { case 1: @@ -284,11 +249,11 @@ emit_fetch(struct spe_function *p, register_mask *m, } if (float_zero != -1) { - release_register(m, float_zero); + spe_release_register(p, float_zero); } if (float_one != -1) { - release_register(m, float_one); + spe_release_register(p, float_one); } } @@ -297,7 +262,6 @@ void cell_update_vertex_fetch(struct draw_context *draw) { struct cell_context *const cell = (struct cell_context *) draw->driver_private; - register_mask m = ~0; struct spe_function *p = &cell->attrib_fetch; unsigned function_index[PIPE_ATTRIB_MAX]; unsigned unique_attr_formats; @@ -338,18 +302,11 @@ void cell_update_vertex_fetch(struct draw_context *draw) spe_init_func(p, 136 * unique_attr_formats); - /* Registers 0, 1, and 2 are reserved by the ABI. - */ - allocate_register(&m, 0); - allocate_register(&m, 1); - allocate_register(&m, 2); - - /* Allocate registers for the function's input parameters. */ - out_ptr = allocate_register(&m, 3); - in_ptr = allocate_register(&m, 4); - shuf_ptr = allocate_register(&m, 5); + out_ptr = spe_allocate_register(p, 3); + in_ptr = spe_allocate_register(p, 4); + shuf_ptr = spe_allocate_register(p, 5); /* Generate code for the individual attribute fetch functions. @@ -362,7 +319,7 @@ void cell_update_vertex_fetch(struct draw_context *draw) - (void *) p->store); offset = 0; - emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr, + emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, draw->vertex_element[i].src_format); spe_bi(p, 0, 0, 0); -- cgit v1.2.3 From 1fb3c94f03e07a80bb7a93777d4fef5173da71ca Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 12 Mar 2008 02:20:40 +1100 Subject: nv50: some forgotten changes --- src/gallium/drivers/nv50/nv50_draw.c | 45 +++++++++++++++--------- src/gallium/drivers/nv50/nv50_screen.c | 2 ++ src/gallium/winsys/dri/nouveau/nouveau_context.c | 2 +- 3 files changed, 32 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c index c6ed6838c6a..790408c6df8 100644 --- a/src/gallium/drivers/nv50/nv50_draw.c +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -3,53 +3,66 @@ #include "nv50_context.h" -struct nv50_draw_stage { - struct draw_stage draw; +struct nv50_render_stage { + struct draw_stage stage; struct nv50_context *nv50; }; +static INLINE struct nv50_render_stage * +nv50_render_stage(struct draw_stage *stage) +{ + return (struct nv50_render_stage *)stage; +} + static void -nv50_draw_point(struct draw_stage *draw, struct prim_header *prim) +nv50_render_point(struct draw_stage *stage, struct prim_header *prim) { NOUVEAU_ERR("\n"); } static void -nv50_draw_line(struct draw_stage *draw, struct prim_header *prim) +nv50_render_line(struct draw_stage *stage, struct prim_header *prim) { NOUVEAU_ERR("\n"); } static void -nv50_draw_tri(struct draw_stage *draw, struct prim_header *prim) +nv50_render_tri(struct draw_stage *stage, struct prim_header *prim) { NOUVEAU_ERR("\n"); } static void -nv50_draw_flush(struct draw_stage *draw, unsigned flags) +nv50_render_flush(struct draw_stage *stage, unsigned flags) { } static void -nv50_draw_reset_stipple_counter(struct draw_stage *draw) +nv50_render_reset_stipple_counter(struct draw_stage *stage) { NOUVEAU_ERR("\n"); } +static void +nv50_render_destroy(struct draw_stage *stage) +{ + free(stage); +} + struct draw_stage * nv50_draw_render_stage(struct nv50_context *nv50) { - struct nv50_draw_stage *nv50draw = CALLOC_STRUCT(nv50_draw_stage); + struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); - nv50draw->nv50 = nv50; - nv50draw->draw.draw = nv50->draw; - nv50draw->draw.point = nv50_draw_point; - nv50draw->draw.line = nv50_draw_line; - nv50draw->draw.tri = nv50_draw_tri; - nv50draw->draw.flush = nv50_draw_flush; - nv50draw->draw.reset_stipple_counter = nv50_draw_reset_stipple_counter; + rs->nv50 = nv50; + rs->stage.draw = nv50->draw; + rs->stage.destroy = nv50_render_destroy; + rs->stage.point = nv50_render_point; + rs->stage.line = nv50_render_line; + rs->stage.tri = nv50_render_tri; + rs->stage.flush = nv50_render_flush; + rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; - return &nv50draw->draw; + return &rs->stage; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index f091779e3b0..77ceb678f21 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -77,6 +77,8 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) return 16.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 4.0; + case PIPE_CAP_BITMAP_TEXCOORD_BIAS: + return 0.0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0.0; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index 2e54729aa97..8dac08a5d21 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -72,7 +72,7 @@ nouveau_channel_context_create(struct nouveau_device *nvdev, unsigned chipset) return NULL; } - switch (chipset) { + switch (chipset & 0xf0) { case 0x50: case 0x80: ret = nouveau_surface_channel_create_nv50(nvc); -- cgit v1.2.3 From b2e48f848496d5e315e536688c8c33dfb1fab7eb Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 12 Mar 2008 02:39:13 +1100 Subject: nv50: convert to hwctx-in-screen as nv40 is --- src/gallium/drivers/nv50/nv50_context.c | 61 ++++----------------------------- src/gallium/drivers/nv50/nv50_context.h | 11 +++--- src/gallium/drivers/nv50/nv50_screen.c | 49 +++++++++++++++++++++++++- src/gallium/drivers/nv50/nv50_screen.h | 5 +++ src/gallium/drivers/nv50/nv50_surface.c | 4 +-- 5 files changed, 67 insertions(+), 63 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index c937b8de6d4..980d066c844 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -10,10 +10,11 @@ static void nv50_flush(struct pipe_context *pipe, unsigned flags) { struct nv50_context *nv50 = (struct nv50_context *)pipe; - struct nouveau_winsys *nvws = nv50->nvws; + struct nv50_screen *screen = nv50->screen; + struct nouveau_winsys *nvws = screen->nvws; if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv50->sync, 0); + nvws->notifier_reset(screen->sync, 0); BEGIN_RING(tesla, 0x104, 1); OUT_RING (0); BEGIN_RING(tesla, 0x100, 1); @@ -23,7 +24,7 @@ nv50_flush(struct pipe_context *pipe, unsigned flags) FIRE_RING(); if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv50->sync, 0, 0, 2000); + nvws->notifier_wait(screen->sync, 0, 0, 2000); } static void @@ -35,66 +36,18 @@ nv50_destroy(struct pipe_context *pipe) free(nv50); } -static boolean -nv50_init_hwctx(struct nv50_context *nv50, int tesla_class) -{ - struct nouveau_winsys *nvws = nv50->nvws; - int ret; - - if ((ret = nvws->grobj_alloc(nvws, tesla_class, &nv50->tesla))) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - BEGIN_RING(tesla, NV50TCL_DMA_NOTIFY, 1); - OUT_RING (nv50->sync->handle); - - FIRE_RING (); - return TRUE; -} - -#define GRCLASS5097_CHIPSETS 0x00000000 -#define GRCLASS8297_CHIPSETS 0x00000010 struct pipe_context * nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) { struct pipe_winsys *pipe_winsys = pscreen->winsys; - struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws; - unsigned chipset = nv50_screen(pscreen)->chipset; + struct nv50_screen *screen = nv50_screen(pscreen); struct nv50_context *nv50; - int tesla_class, ret; - - if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) { - NOUVEAU_ERR("Not a G8x chipset\n"); - return NULL; - } - - if (GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) { - tesla_class = 0x5097; - } else - if (GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) { - tesla_class = 0x8297; - } else { - NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); - return NULL; - } nv50 = CALLOC_STRUCT(nv50_context); if (!nv50) return NULL; - nv50->chipset = chipset; - nv50->nvws = nvws; - - if ((ret = nvws->notifier_alloc(nvws, 1, &nv50->sync))) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - free(nv50); - return NULL; - } - - if (!nv50_init_hwctx(nv50, tesla_class)) { - free(nv50); - return NULL; - } + nv50->screen = screen; + nv50->pctx_id = pctx_id; nv50->pipe.winsys = pipe_winsys; nv50->pipe.screen = pscreen; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index a529bf3c3e3..e2656bd539b 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -11,10 +11,11 @@ #include "nouveau/nouveau_gldefs.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv50_context *ctx = nv50 + struct nv50_screen *ctx = nv50->screen #include "nouveau/nouveau_push.h" #include "nv50_state.h" +#include "nv50_screen.h" #define NOUVEAU_ERR(fmt, args...) \ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); @@ -23,13 +24,11 @@ struct nv50_context { struct pipe_context pipe; - struct nouveau_winsys *nvws; - struct draw_context *draw; + struct nv50_screen *screen; + unsigned pctx_id; - int chipset; - struct nouveau_grobj *tesla; - struct nouveau_notifier *sync; + struct draw_context *draw; }; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 77ceb678f21..f8fd11dc5ff 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -4,6 +4,11 @@ #include "nv50_context.h" #include "nv50_screen.h" +#include "nouveau/nouveau_stateobj.h" + +#define GRCLASS5097_CHIPSETS 0x00000000 +#define GRCLASS8297_CHIPSETS 0x00000010 + static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, uint type) @@ -96,13 +101,55 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, unsigned chipset) { struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); + struct nouveau_stateobj *so; + unsigned tesla_class = 0, ret; if (!screen) return NULL; - screen->chipset = chipset; screen->nvws = nvws; + /* 3D object */ + if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) { + NOUVEAU_ERR("Not a G8x chipset\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + if (GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) { + tesla_class = 0x5097; + } else + if (GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) { + tesla_class = 0x8297; + } else { + NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* Sync notifier */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static tesla init */ + so = so_new(128, 0); + so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws->channel, 0); + screen->pipe.winsys = ws; screen->pipe.destroy = nv50_screen_destroy; diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index d664816a031..6e4120d6693 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -8,6 +8,11 @@ struct nv50_screen { struct nouveau_winsys *nvws; unsigned chipset; + + unsigned cur_pctx; + + struct nouveau_grobj *tesla; + struct nouveau_notifier *sync; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 39cf675a57c..4e294198b07 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -40,7 +40,7 @@ nv50_surface_copy(struct pipe_context *pipe, unsigned flip, unsigned width, unsigned height) { struct nv50_context *nv50 = (struct nv50_context *)pipe; - struct nouveau_winsys *nvws = nv50->nvws; + struct nouveau_winsys *nvws = nv50->screen->nvws; nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, width, height); @@ -52,7 +52,7 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, unsigned height, unsigned value) { struct nv50_context *nv50 = (struct nv50_context *)pipe; - struct nouveau_winsys *nvws = nv50->nvws; + struct nouveau_winsys *nvws = nv50->screen->nvws; nvws->surface_fill(nvws, dest, destx, desty, width, height, value); } -- cgit v1.2.3 From 3250bacd2411d3f1af50135599380b2140238535 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 12 Mar 2008 02:56:10 +1100 Subject: nv50: create blend stateobj --- src/gallium/drivers/nv50/nv50_context.h | 15 ++++++++ src/gallium/drivers/nv50/nv50_screen.c | 10 ++++++ src/gallium/drivers/nv50/nv50_state.c | 62 ++++++++++++++++++++++++++++++++- 3 files changed, 86 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index e2656bd539b..6096818d40d 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -22,6 +22,13 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +#define NV50_NEW_BLEND (1 << 0) + +struct nv50_blend_stateobj { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + struct nv50_context { struct pipe_context pipe; @@ -29,8 +36,16 @@ struct nv50_context { unsigned pctx_id; struct draw_context *draw; + + unsigned dirty; + struct nv50_blend_stateobj *blend; }; +static INLINE struct nv50_context * +nv50_context(struct pipe_context *pipe) +{ + return (struct nv50_context *)pipe; +} extern void nv50_init_miptree_functions(struct nv50_context *nv50); extern void nv50_init_surface_functions(struct nv50_context *nv50); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index f8fd11dc5ff..62c23c790ca 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -103,6 +103,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); struct nouveau_stateobj *so; unsigned tesla_class = 0, ret; + int i; if (!screen) return NULL; @@ -146,6 +147,15 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, so = so_new(128, 0); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); + so_method(so, screen->tesla, NV50TCL_DMA_IN_MEMORY0(0), + NV50TCL_DMA_IN_MEMORY0__SIZE); + for (i = 0; i < NV50TCL_DMA_IN_MEMORY0__SIZE; i++) + so_data(so, nvws->channel->vram->handle); + so_method(so, screen->tesla, NV50TCL_DMA_IN_MEMORY1(0), + NV50TCL_DMA_IN_MEMORY1__SIZE); + for (i = 0; i < NV50TCL_DMA_IN_MEMORY1__SIZE; i++) + so_data(so, nvws->channel->vram->handle); + so_emit(nvws, so); so_ref(NULL, &so); nvws->push_flush(nvws->channel, 0); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 99dcab51b26..7a01100fd74 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -5,21 +5,81 @@ #include "nv50_context.h" #include "nv50_state.h" +#include "nouveau/nouveau_stateobj.h" + static void * nv50_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { - return NULL; + struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); + unsigned cmask = 0, i; + + /*XXX ignored: + * - dither + */ + + if (cso->blend_enable == 0) { + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + for (i = 0; i < 8; i++) + so_data(so, 0); + } else { + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + for (i = 0; i < 8; i++) + so_data(so, 1); + so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5); + so_data (so, nvgl_blend_eqn(cso->rgb_func)); + so_data (so, nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->rgb_dst_factor)); + so_data (so, nvgl_blend_eqn(cso->alpha_func)); + so_data (so, nvgl_blend_func(cso->alpha_src_factor)); + so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1); + so_data (so, nvgl_blend_func(cso->alpha_dst_factor)); + } + + if (cso->logicop_enable == 0 ) { + so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } else { + so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } + + if (cso->colormask & PIPE_MASK_R) + cmask |= (1 << 0); + if (cso->colormask & PIPE_MASK_G) + cmask |= (1 << 4); + if (cso->colormask & PIPE_MASK_B) + cmask |= (1 << 8); + if (cso->colormask & PIPE_MASK_A) + cmask |= (1 << 12); + so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8); + for (i = 0; i < 8; i++) + so_data(so, cmask); + + bso->pipe = *cso; + so_ref(so, &bso->so); + return (void *)bso; } static void nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend = hwcso; + nv50->dirty |= NV50_NEW_BLEND; } static void nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) { + struct nv50_blend_stateobj *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); } static void * -- cgit v1.2.3 From 9c29512154992f95c11939615ddcbef185c6a96c Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 12 Mar 2008 02:59:20 +1100 Subject: nv50: crappy state validate/emit function Just for testing stateobjs to make sure they don't hang the engine. --- src/gallium/drivers/nv50/Makefile | 1 + src/gallium/drivers/nv50/nv50_context.h | 2 ++ src/gallium/drivers/nv50/nv50_state_validate.c | 14 ++++++++++++++ src/gallium/drivers/nv50/nv50_vbo.c | 8 ++++++++ 4 files changed, 25 insertions(+) create mode 100644 src/gallium/drivers/nv50/nv50_state_validate.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile index 1c0b82887a1..a62a4d961bc 100644 --- a/src/gallium/drivers/nv50/Makefile +++ b/src/gallium/drivers/nv50/Makefile @@ -11,6 +11,7 @@ DRIVER_SOURCES = \ nv50_query.c \ nv50_screen.c \ nv50_state.c \ + nv50_state_validate.c \ nv50_surface.c \ nv50_vbo.c diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 6096818d40d..98b9aba0796 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -70,4 +70,6 @@ extern boolean nv50_draw_elements(struct pipe_context *pipe, extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue); +extern boolean nv50_state_validate(struct nv50_context *nv50); + #endif diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c new file mode 100644 index 00000000000..a89d1526c58 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -0,0 +1,14 @@ +#include "nv50_context.h" +#include "nouveau/nouveau_stateobj.h" + +boolean +nv50_state_validate(struct nv50_context *nv50) +{ + struct nouveau_winsys *nvws = nv50->screen->nvws; + + if (nv50->dirty & NV50_NEW_BLEND) + so_emit(nvws, nv50->blend->so); + + return TRUE; +} + diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 6c0dc23a439..b01ce1d42c5 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -9,6 +9,10 @@ boolean nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50_state_validate(nv50); + NOUVEAU_ERR("unimplemented\n"); return TRUE; } @@ -18,6 +22,10 @@ nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50_state_validate(nv50); + NOUVEAU_ERR("unimplemented\n"); return TRUE; } -- cgit v1.2.3 From 06bd7d78b979df66915b161157f2b6b1c09ad285 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 12 Mar 2008 03:41:05 +1100 Subject: nv50: depth_stencil_alpha stateobj --- src/gallium/drivers/nv50/nv50_context.h | 9 +++- src/gallium/drivers/nv50/nv50_state.c | 70 +++++++++++++++++++++++++- src/gallium/drivers/nv50/nv50_state_validate.c | 3 ++ 3 files changed, 80 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 98b9aba0796..93027648940 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -22,13 +22,19 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); -#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_ZSA (1 << 1) struct nv50_blend_stateobj { struct pipe_blend_state pipe; struct nouveau_stateobj *so; }; +struct nv50_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + struct nv50_context { struct pipe_context pipe; @@ -39,6 +45,7 @@ struct nv50_context { unsigned dirty; struct nv50_blend_stateobj *blend; + struct nv50_zsa_stateobj *zsa; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 7a01100fd74..8fd4f774e27 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -127,17 +127,85 @@ static void * nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *cso) { - return NULL; + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); + struct nouveau_stateobj *so = so_new(64, 0); + + so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); + so_data (so, cso->depth.writemask ? 1 : 0); + if (cso->depth.enabled) { + so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); + so_data (so, nvgl_comparison_op(cso->depth.func)); + } else { + so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[0].enabled) { + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 5); + so_data (so, 1); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 3); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].write_mask); + so_data (so, cso->stencil[0].value_mask); + } else { + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 8); + so_data (so, 1); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].write_mask); + so_data (so, cso->stencil[1].value_mask); + } else { + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + if (cso->alpha.enabled) { + so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2); + so_data (so, fui(cso->alpha.ref)); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + } else { + so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); + so_data (so, 0); + } + + zsa->pipe = *cso; + so_ref(so, &zsa->so); + return (void *)zsa; } static void nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->zsa = hwcso; + nv50->dirty |= NV50_NEW_ZSA; } static void nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) { + struct nv50_zsa_stateobj *zsa = hwcso; + + so_ref(NULL, &zsa->so); + FREE(zsa); } static void * diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index a89d1526c58..786987c76ad 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -8,7 +8,10 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & NV50_NEW_BLEND) so_emit(nvws, nv50->blend->so); + if (nv50->dirty & NV50_NEW_ZSA) + so_emit(nvws, nv50->zsa->so); + nv50->dirty = 0; return TRUE; } -- cgit v1.2.3 From 2fee5f76483feb301546b24c26eea699732ffb57 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 12 Mar 2008 03:54:53 +1100 Subject: nv50: scissor/viewport/blend colour/stipple --- src/gallium/drivers/nv50/nv50_context.h | 12 +++++-- src/gallium/drivers/nv50/nv50_state.c | 16 +++++++++ src/gallium/drivers/nv50/nv50_state_validate.c | 49 ++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 93027648940..406ad2ca634 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -22,8 +22,12 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); -#define NV50_NEW_BLEND (1 << 0) -#define NV50_NEW_ZSA (1 << 1) +#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_ZSA (1 << 1) +#define NV50_NEW_BLEND_COLOUR (1 << 2) +#define NV50_NEW_STIPPLE (1 << 3) +#define NV50_NEW_SCISSOR (1 << 4) +#define NV50_NEW_VIEWPORT (1 << 5) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -46,6 +50,10 @@ struct nv50_context { unsigned dirty; struct nv50_blend_stateobj *blend; struct nv50_zsa_stateobj *zsa; + struct pipe_blend_color blend_colour; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 8fd4f774e27..40e23f46094 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -246,6 +246,10 @@ static void nv50_set_blend_color(struct pipe_context *pipe, const struct pipe_blend_color *bcol) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend_colour = *bcol; + nv50->dirty |= NV50_NEW_BLEND_COLOUR; } static void @@ -270,18 +274,30 @@ static void nv50_set_polygon_stipple(struct pipe_context *pipe, const struct pipe_poly_stipple *stipple) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->stipple = *stipple; + nv50->dirty |= NV50_NEW_STIPPLE; } static void nv50_set_scissor_state(struct pipe_context *pipe, const struct pipe_scissor_state *s) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->scissor = *s; + nv50->dirty |= NV50_NEW_SCISSOR; } static void nv50_set_viewport_state(struct pipe_context *pipe, const struct pipe_viewport_state *vpt) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->viewport = *vpt; + nv50->dirty |= NV50_NEW_VIEWPORT; } static void diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 786987c76ad..8920b6f49b0 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -5,12 +5,61 @@ boolean nv50_state_validate(struct nv50_context *nv50) { struct nouveau_winsys *nvws = nv50->screen->nvws; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + unsigned i; if (nv50->dirty & NV50_NEW_BLEND) so_emit(nvws, nv50->blend->so); + if (nv50->dirty & NV50_NEW_ZSA) so_emit(nvws, nv50->zsa->so); + if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { + so = so_new(5, 0); + so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 8); + so_data (so, fui(nv50->blend_colour.color[3])); + so_data (so, fui(nv50->blend_colour.color[0])); + so_data (so, fui(nv50->blend_colour.color[1])); + so_data (so, fui(nv50->blend_colour.color[2])); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_STIPPLE) { + so = so_new(33, 0); + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv50->stipple.stipple[i]); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_SCISSOR) { + so = so_new(3, 0); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); + so_data (so, (nv50->scissor.maxx << 16) | + nv50->scissor.minx); + so_data (so, (nv50->scissor.maxy << 16) | + nv50->scissor.miny); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_VIEWPORT) { + so = so_new(8, 0); + so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); + so_data (so, fui(nv50->viewport.translate[0])); + so_data (so, fui(nv50->viewport.translate[1])); + so_data (so, fui(nv50->viewport.translate[2])); + so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); + so_data (so, fui(nv50->viewport.scale[0])); + so_data (so, fui(nv50->viewport.scale[1])); + so_data (so, fui(nv50->viewport.scale[2])); + so_emit(nvws, so); + so_ref(NULL, &so); + } + nv50->dirty = 0; return TRUE; } -- cgit v1.2.3 From cd85dc1e5dfa37cb9bee696e5e18332e3f1d65a1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 12 Mar 2008 04:29:58 +1100 Subject: nv50: rasterizer stateobj --- src/gallium/drivers/nv50/nv50_context.h | 7 ++ src/gallium/drivers/nv50/nv50_state.c | 120 ++++++++++++++++++++++++- src/gallium/drivers/nv50/nv50_state_validate.c | 3 + 3 files changed, 129 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 406ad2ca634..d839cf701eb 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -28,6 +28,7 @@ #define NV50_NEW_STIPPLE (1 << 3) #define NV50_NEW_SCISSOR (1 << 4) #define NV50_NEW_VIEWPORT (1 << 5) +#define NV50_NEW_RASTERIZER (1 << 6) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -39,6 +40,11 @@ struct nv50_zsa_stateobj { struct nouveau_stateobj *so; }; +struct nv50_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + struct nv50_context { struct pipe_context pipe; @@ -50,6 +56,7 @@ struct nv50_context { unsigned dirty; struct nv50_blend_stateobj *blend; struct nv50_zsa_stateobj *zsa; + struct nv50_rasterizer_stateobj *rasterizer; struct pipe_blend_color blend_colour; struct pipe_poly_stipple stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 40e23f46094..088df9e6756 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -110,17 +110,135 @@ static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - return NULL; + struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_rasterizer_stateobj *rso = + CALLOC_STRUCT(nv50_rasterizer_stateobj); + unsigned i; + + /*XXX: ignored + * - light_twosize + * - point_smooth + * - multisample + * - point_sprite / sprite_coord_mode + */ + + so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : + NV50TCL_SHADE_MODEL_SMOOTH); + + so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); + so_data (so, fui(cso->line_width)); + so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); + so_data (so, cso->line_smooth ? 1 : 0); + if (cso->line_stipple_enable) { + so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + } else { + so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, tesla, NV50TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + } + so_data(so, cso->poly_smooth ? 1 : 0); + + so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3); + so_data (so, cso->cull_mode != PIPE_WINDING_NONE); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, NV50TCL_FRONT_FACE_CCW); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV50TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + } + } else { + so_data(so, NV50TCL_FRONT_FACE_CW); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV50TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + } + } + + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); + so_data (so, fui(cso->offset_scale)); + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); + so_data (so, fui(cso->offset_units * 2)); + } + + rso->pipe = *cso; + so_ref(so, &rso->so); + return (void *)rso; } static void nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->rasterizer = hwcso; + nv50->dirty |= NV50_NEW_RASTERIZER; } static void nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) { + struct nv50_rasterizer_stateobj *rso = hwcso; + + so_ref(NULL, &rso->so); + FREE(rso); } static void * diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 8920b6f49b0..8921bfaf887 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -15,6 +15,9 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & NV50_NEW_ZSA) so_emit(nvws, nv50->zsa->so); + if (nv50->dirty & NV50_NEW_RASTERIZER) + so_emit(nvws, nv50->rasterizer->so); + if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { so = so_new(5, 0); so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 8); -- cgit v1.2.3 From cd9ed05aec9d1d9614973165fd13647ba2e1b8c7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 12 Mar 2008 04:50:53 +1100 Subject: nv50: start on fb state --- src/gallium/drivers/nv50/nv50_context.h | 2 + src/gallium/drivers/nv50/nv50_state.c | 4 ++ src/gallium/drivers/nv50/nv50_state_validate.c | 93 ++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index d839cf701eb..f532fa6bfb3 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -29,6 +29,7 @@ #define NV50_NEW_SCISSOR (1 << 4) #define NV50_NEW_VIEWPORT (1 << 5) #define NV50_NEW_RASTERIZER (1 << 6) +#define NV50_NEW_FRAMEBUFFER (1 << 7) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -61,6 +62,7 @@ struct nv50_context { struct pipe_poly_stipple stipple; struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 088df9e6756..aa65fd482e4 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -386,6 +386,10 @@ static void nv50_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->framebuffer = *fb; + nv50->dirty |= NV50_NEW_FRAMEBUFFER; } static void diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 8921bfaf887..68d419f9fc8 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -1,6 +1,96 @@ #include "nv50_context.h" #include "nouveau/nouveau_stateobj.h" +static void +nv50_state_validate_fb(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so_new(128, 18); + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i, w, h, gw = 0; + + for (i = 0; i < fb->num_cbufs; i++) { + if (!gw) { + w = fb->cbufs[i]->width; + h = fb->cbufs[i]->height; + gw = 1; + } else { + assert(w != fb->cbufs[i]->width); + assert(h != fb->cbufs[i]->height); + } + + so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2); + so_data (so, fb->cbufs[i]->width); + so_data (so, fb->cbufs[i]->height); + + so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); + so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); + switch (fb->cbufs[i]->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, 0xcf); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, 0xe8); + break; + default: + { + char fmt[128]; + pf_sprint_name(fmt, fb->cbufs[i]->format); + NOUVEAU_ERR("AIIII unknown format %s\n", fmt); + } + so_data(so, 0xe6); + break; + } + so_data(so, 0x00000040); + so_data(so, 0x00000000); + } + + if (fb->zsbuf) { + if (!gw) { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + gw = 1; + } else { + assert(w != fb->zsbuf->width); + assert(h != fb->zsbuf->height); + } + + so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); + so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); + switch (fb->zsbuf->format) { + case PIPE_FORMAT_Z24S8_UNORM: + so_data(so, 0x16); + break; + default: + { + char fmt[128]; + pf_sprint_name(fmt, fb->zsbuf->format); + NOUVEAU_ERR("AIIII unknown format %s\n", fmt); + } + so_data(so, 0x16); + break; + } + so_data(so, 0x00000040); + so_data(so, 0x00000000); + } + + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_data (so, w << 16); + so_data (so, h << 16); + so_method(so, tesla, 0xff0, 2); + so_data (so, w << 16); + so_data (so, h << 16); + + so_emit(nv50->screen->nvws, so); + so_ref(NULL, &so); +} + boolean nv50_state_validate(struct nv50_context *nv50) { @@ -9,6 +99,9 @@ nv50_state_validate(struct nv50_context *nv50) struct nouveau_stateobj *so; unsigned i; + if (nv50->dirty & NV50_NEW_FRAMEBUFFER) + nv50_state_validate_fb(nv50); + if (nv50->dirty & NV50_NEW_BLEND) so_emit(nvws, nv50->blend->so); -- cgit v1.2.3 From 169912b71a4242389301890ef303046d49ce71df Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Tue, 11 Mar 2008 19:22:02 +0100 Subject: nv30: silence some warnings --- src/gallium/drivers/nv30/nv30_fragprog.c | 2 +- src/gallium/drivers/nv30/nv30_state.c | 2 +- src/gallium/drivers/nv30/nv30_vbo.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 09ad555c324..0a2ba04f958 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -378,7 +378,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, { const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); struct nv30_sreg src[3], dst, tmp; - int mask, sat, unit; + int mask, sat, unit = 0; int ai = -1, ci = -1; int i; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index aa3fe7837e4..319d53fccac 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -538,7 +538,7 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct pipe_surface *rt[4], *zeta = NULL; - uint32_t rt_enable, rt_format, w, h; + uint32_t rt_enable, rt_format, w = 0, h = 0; int i, colour_format = 0, zeta_format = 0; rt_enable = 0; diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 9e00cdac3fb..a62462f7bc5 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -102,7 +102,7 @@ nv30_vbo_arrays_update(struct nv30_context *nv30) { struct nv30_vertex_program *vp = nv30->vertprog.active; uint32_t inputs, vtxfmt[16]; - int hw, num_hw; + int hw, num_hw = 0; nv30->vb_enable = 0; -- cgit v1.2.3 From 21ff00306131cd5598f95285badaaabc98021e11 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 11 Mar 2008 23:51:27 +0000 Subject: gallium: Silence MSVC warnings. --- src/gallium/auxiliary/util/u_snprintf.c | 10 +++------- src/gallium/drivers/i915simple/i915_state.c | 7 +++++-- src/gallium/drivers/softpipe/sp_state_sampler.c | 8 +++++--- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index 5e19a2ddb2a..61c20b48f7e 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -241,7 +241,6 @@ static void *mymemcpy(void *, void *, size_t); #endif /* !HAVE_VASPRINTF */ #if !HAVE_VSNPRINTF -#include <errno.h> /* For ERANGE and errno. */ #include <limits.h> /* For *_MAX. */ #if HAVE_INTTYPES_H #include <inttypes.h> /* For intmax_t (if not defined in <stdint.h>). */ @@ -445,8 +444,6 @@ static UINTMAX_T cast(LDOUBLE); static UINTMAX_T myround(LDOUBLE); static LDOUBLE mypow10(int); -extern int errno; - int rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) { @@ -747,7 +744,7 @@ rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) goto out; break; case 'c': - cvalue = va_arg(args, int); + cvalue = (unsigned char)va_arg(args, int); OUTCHAR(str, len, size, cvalue); break; case 's': @@ -844,7 +841,6 @@ out: str[size - 1] = '\0'; if (overflow || len >= INT_MAX) { - errno = overflow ? EOVERFLOW : ERANGE; return -1; } return (int)len; @@ -1106,7 +1102,7 @@ again: * Factor of ten with the number of digits needed for the fractional * part. For example, if the precision is 3, the mask will be 1000. */ - mask = mypow10(precision); + mask = (UINTMAX_T)mypow10(precision); /* * We "cheat" by converting the fractional part to integer by * multiplying by a factor of ten. @@ -1358,7 +1354,7 @@ cast(LDOUBLE value) if (value >= UINTMAX_MAX) return UINTMAX_MAX; - result = value; + result = (UINTMAX_T)value; /* * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 24143243d3d..d9ab483bfca 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -273,6 +273,7 @@ static void i915_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct i915_context *i915 = i915_context(pipe); + unsigned i; assert(num <= PIPE_MAX_SAMPLERS); @@ -281,8 +282,10 @@ static void i915_bind_sampler_states(struct pipe_context *pipe, !memcmp(i915->sampler, sampler, num * sizeof(void *))) return; - memcpy(i915->sampler, sampler, num * sizeof(void *)); - memset(&i915->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * sizeof(void *)); + for (i = 0; i < num; ++i) + i915->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + i915->sampler[i] = NULL; i915->num_samplers = num; diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 7cf85b9207b..033288a0aa3 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -56,6 +56,7 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; assert(num <= PIPE_MAX_SAMPLERS); @@ -66,9 +67,10 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, draw_flush(softpipe->draw); - memcpy(softpipe->sampler, sampler, num * sizeof(void *)); - memset(&softpipe->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * - sizeof(void *)); + for (i = 0; i < num; ++i) + softpipe->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + softpipe->sampler[i] = NULL; softpipe->num_samplers = num; -- cgit v1.2.3 From 339e7ec6805e6de8794514c0a935081b5d36d38f Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 11 Mar 2008 18:54:31 -0600 Subject: gallium: rework CSO-related code in state tracker Use the code in cso_context.c rather than st_cache.c. Basically, binding of state objects now goes through the CSO module. But Vertex/fragment shaders go through pipe->bind_fs/vs_state() since they're not cached by the CSO module at this time. Also, update softpipe driver to handle NULL state objects in various places. This happens during context destruction. May need to update other drivers... --- src/gallium/auxiliary/draw/draw_aaline.c | 3 +- src/gallium/auxiliary/draw/draw_aapoint.c | 3 +- src/gallium/auxiliary/draw/draw_pstipple.c | 3 +- src/gallium/drivers/softpipe/sp_state_fs.c | 3 +- src/mesa/sources | 1 - src/mesa/state_tracker/st_atom_blend.c | 52 ++++++-------- src/mesa/state_tracker/st_atom_depth.c | 60 +++++++--------- src/mesa/state_tracker/st_atom_rasterizer.c | 108 ++++++++++++++-------------- src/mesa/state_tracker/st_atom_sampler.c | 43 ++++++----- src/mesa/state_tracker/st_atom_shader.c | 28 ++------ src/mesa/state_tracker/st_cb_accum.c | 1 - src/mesa/state_tracker/st_cb_clear.c | 47 ++++++------ src/mesa/state_tracker/st_cb_drawpixels.c | 64 +++++++++-------- src/mesa/state_tracker/st_cb_program.c | 5 +- src/mesa/state_tracker/st_context.c | 9 ++- src/mesa/state_tracker/st_context.h | 19 ++--- src/mesa/state_tracker/st_debug.c | 6 +- src/mesa/state_tracker/st_draw.c | 13 ++-- src/mesa/state_tracker/st_gen_mipmap.c | 34 ++++----- src/mesa/state_tracker/st_mesa_to_tgsi.c | 5 +- src/mesa/state_tracker/st_mesa_to_tgsi.h | 2 +- src/mesa/state_tracker/st_program.c | 59 +++++++++------ src/mesa/state_tracker/st_program.h | 22 ++---- 23 files changed, 292 insertions(+), 298 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 3ec73b0800c..6b1e640ae90 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -733,7 +733,8 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ aaline->fs = aafs; /* pass-through */ - aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs); + aaline->driver_bind_fs_state(aaline->pipe, + (aafs ? aafs->driver_fs : NULL)); } diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 70f696475fc..99e9e9fe342 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -800,7 +800,8 @@ aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ aapoint->fs = aafs; /* pass-through */ - aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs); + aapoint->driver_bind_fs_state(aapoint->pipe, + (aafs ? aafs->driver_fs : NULL)); } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index b3e52dc1c79..ed50d0805a7 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -595,7 +595,8 @@ pstip_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ pstip->fs = aafs; /* pass-through */ - pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); + pstip->driver_bind_fs_state(pstip->pipe, + (aafs ? aafs->driver_fs : NULL)); } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index eb641ed321d..4eefd1d61f5 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -120,7 +120,8 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) softpipe->vs = (const struct sp_vertex_shader *)vs; - draw_bind_vertex_shader(softpipe->draw, softpipe->vs->draw_data); + draw_bind_vertex_shader(softpipe->draw, + (softpipe->vs ? softpipe->vs->draw_data : NULL)); softpipe->dirty |= SP_NEW_VS; } diff --git a/src/mesa/sources b/src/mesa/sources index f0bf7b31fbc..e3d5f228493 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -184,7 +184,6 @@ STATETRACKER_SOURCES = \ state_tracker/st_cb_readpixels.c \ state_tracker/st_cb_strings.c \ state_tracker/st_cb_texture.c \ - state_tracker/st_cache.c \ state_tracker/st_context.c \ state_tracker/st_debug.c \ state_tracker/st_draw.c \ diff --git a/src/mesa/state_tracker/st_atom_blend.c b/src/mesa/state_tracker/st_atom_blend.c index 2a9d2091536..6c13fc81415 100644 --- a/src/mesa/state_tracker/st_atom_blend.c +++ b/src/mesa/state_tracker/st_atom_blend.c @@ -33,11 +33,11 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -155,44 +155,43 @@ translate_logicop(GLenum logicop) static void update_blend( struct st_context *st ) { - struct pipe_blend_state blend; - const struct cso_blend *cso; + struct pipe_blend_state *blend = &st->state.blend; - memset(&blend, 0, sizeof(blend)); + memset(blend, 0, sizeof(*blend)); if (st->ctx->Color.ColorLogicOpEnabled || (st->ctx->Color.BlendEnabled && st->ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) { /* logicop enabled */ - blend.logicop_enable = 1; - blend.logicop_func = translate_logicop(st->ctx->Color.LogicOp); + blend->logicop_enable = 1; + blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp); } else if (st->ctx->Color.BlendEnabled) { /* blending enabled */ - blend.blend_enable = 1; + blend->blend_enable = 1; - blend.rgb_func = translate_blend(st->ctx->Color.BlendEquationRGB); + blend->rgb_func = translate_blend(st->ctx->Color.BlendEquationRGB); if (st->ctx->Color.BlendEquationRGB == GL_MIN || st->ctx->Color.BlendEquationRGB == GL_MAX) { /* Min/max are special */ - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend->rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rgb_dst_factor = PIPE_BLENDFACTOR_ONE; } else { - blend.rgb_src_factor = translate_blend(st->ctx->Color.BlendSrcRGB); - blend.rgb_dst_factor = translate_blend(st->ctx->Color.BlendDstRGB); + blend->rgb_src_factor = translate_blend(st->ctx->Color.BlendSrcRGB); + blend->rgb_dst_factor = translate_blend(st->ctx->Color.BlendDstRGB); } - blend.alpha_func = translate_blend(st->ctx->Color.BlendEquationA); + blend->alpha_func = translate_blend(st->ctx->Color.BlendEquationA); if (st->ctx->Color.BlendEquationA == GL_MIN || st->ctx->Color.BlendEquationA == GL_MAX) { /* Min/max are special */ - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->alpha_dst_factor = PIPE_BLENDFACTOR_ONE; } else { - blend.alpha_src_factor = translate_blend(st->ctx->Color.BlendSrcA); - blend.alpha_dst_factor = translate_blend(st->ctx->Color.BlendDstA); + blend->alpha_src_factor = translate_blend(st->ctx->Color.BlendSrcA); + blend->alpha_dst_factor = translate_blend(st->ctx->Color.BlendDstA); } } else { @@ -201,25 +200,18 @@ update_blend( struct st_context *st ) /* Colormask - maybe reverse these bits? */ if (st->ctx->Color.ColorMask[0]) - blend.colormask |= PIPE_MASK_R; + blend->colormask |= PIPE_MASK_R; if (st->ctx->Color.ColorMask[1]) - blend.colormask |= PIPE_MASK_G; + blend->colormask |= PIPE_MASK_G; if (st->ctx->Color.ColorMask[2]) - blend.colormask |= PIPE_MASK_B; + blend->colormask |= PIPE_MASK_B; if (st->ctx->Color.ColorMask[3]) - blend.colormask |= PIPE_MASK_A; + blend->colormask |= PIPE_MASK_A; if (st->ctx->Color.DitherFlag) - blend.dither = 1; + blend->dither = 1; - cso = st_cached_blend_state(st, &blend); - - if (st->state.blend != cso) { - /* state has changed */ - st->state.blend = cso; - /* bind new state */ - st->pipe->bind_blend_state(st->pipe, cso->data); - } + cso_set_blend(st->cso_context, blend); if (memcmp(st->ctx->Color.BlendColor, &st->state.blend_color, 4 * sizeof(GLfloat)) != 0) { /* state has changed */ diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c index 7aecdbfbcc0..827ad3b548b 100644 --- a/src/mesa/state_tracker/st_atom_depth.c +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -34,10 +34,10 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -93,53 +93,47 @@ gl_stencil_op_to_pipe(GLenum func) static void update_depth_stencil_alpha(struct st_context *st) { - struct pipe_depth_stencil_alpha_state depth_stencil; - const struct cso_depth_stencil_alpha *cso; + struct pipe_depth_stencil_alpha_state *dsa = &st->state.depth_stencil; - memset(&depth_stencil, 0, sizeof(depth_stencil)); + memset(dsa, 0, sizeof(*dsa)); - depth_stencil.depth.enabled = st->ctx->Depth.Test; - depth_stencil.depth.writemask = st->ctx->Depth.Mask; - depth_stencil.depth.func = st_compare_func_to_pipe(st->ctx->Depth.Func); + dsa->depth.enabled = st->ctx->Depth.Test; + dsa->depth.writemask = st->ctx->Depth.Mask; + dsa->depth.func = st_compare_func_to_pipe(st->ctx->Depth.Func); if (st->ctx->Query.CurrentOcclusionObject && st->ctx->Query.CurrentOcclusionObject->Active) - depth_stencil.depth.occlusion_count = 1; + dsa->depth.occlusion_count = 1; if (st->ctx->Stencil.Enabled) { - depth_stencil.stencil[0].enabled = 1; - depth_stencil.stencil[0].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[0]); - depth_stencil.stencil[0].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[0]); - depth_stencil.stencil[0].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[0]); - depth_stencil.stencil[0].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[0]); - depth_stencil.stencil[0].ref_value = st->ctx->Stencil.Ref[0] & 0xff; - depth_stencil.stencil[0].value_mask = st->ctx->Stencil.ValueMask[0] & 0xff; - depth_stencil.stencil[0].write_mask = st->ctx->Stencil.WriteMask[0] & 0xff; + dsa->stencil[0].enabled = 1; + dsa->stencil[0].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[0]); + dsa->stencil[0].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[0]); + dsa->stencil[0].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[0]); + dsa->stencil[0].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[0]); + dsa->stencil[0].ref_value = st->ctx->Stencil.Ref[0] & 0xff; + dsa->stencil[0].value_mask = st->ctx->Stencil.ValueMask[0] & 0xff; + dsa->stencil[0].write_mask = st->ctx->Stencil.WriteMask[0] & 0xff; if (st->ctx->Stencil.TestTwoSide) { - depth_stencil.stencil[1].enabled = 1; - depth_stencil.stencil[1].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[1]); - depth_stencil.stencil[1].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[1]); - depth_stencil.stencil[1].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[1]); - depth_stencil.stencil[1].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[1]); - depth_stencil.stencil[1].ref_value = st->ctx->Stencil.Ref[1] & 0xff; - depth_stencil.stencil[1].value_mask = st->ctx->Stencil.ValueMask[1] & 0xff; - depth_stencil.stencil[1].write_mask = st->ctx->Stencil.WriteMask[1] & 0xff; + dsa->stencil[1].enabled = 1; + dsa->stencil[1].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[1]); + dsa->stencil[1].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[1]); + dsa->stencil[1].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[1]); + dsa->stencil[1].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[1]); + dsa->stencil[1].ref_value = st->ctx->Stencil.Ref[1] & 0xff; + dsa->stencil[1].value_mask = st->ctx->Stencil.ValueMask[1] & 0xff; + dsa->stencil[1].write_mask = st->ctx->Stencil.WriteMask[1] & 0xff; } } if (st->ctx->Color.AlphaEnabled) { - depth_stencil.alpha.enabled = 1; - depth_stencil.alpha.func = st_compare_func_to_pipe(st->ctx->Color.AlphaFunc); - depth_stencil.alpha.ref = st->ctx->Color.AlphaRef; + dsa->alpha.enabled = 1; + dsa->alpha.func = st_compare_func_to_pipe(st->ctx->Color.AlphaFunc); + dsa->alpha.ref = st->ctx->Color.AlphaRef; } - cso = st_cached_depth_stencil_alpha_state(st, &depth_stencil); - if (st->state.depth_stencil != cso) { - /* state has changed */ - st->state.depth_stencil = cso; - st->pipe->bind_depth_stencil_alpha_state(st->pipe, cso->data); /* bind new state */ - } + cso_set_depth_stencil_alpha(st->cso_context, dsa); } diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 229839d8b2c..77cef9236bb 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -32,10 +32,11 @@ #include "main/macros.h" #include "st_context.h" -#include "st_cache.h" +#include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "st_atom.h" +#include "cso_cache/cso_context.h" + static GLuint translate_fill( GLenum mode ) { @@ -72,22 +73,21 @@ static GLboolean get_offset_flag( GLuint fill_mode, static void update_raster_state( struct st_context *st ) { GLcontext *ctx = st->ctx; - struct pipe_rasterizer_state raster; - const struct cso_rasterizer *cso; + struct pipe_rasterizer_state *raster = &st->state.rasterizer; const struct gl_vertex_program *vertProg = ctx->VertexProgram._Current; uint i; - memset(&raster, 0, sizeof(raster)); + memset(raster, 0, sizeof(*raster)); - raster.origin_lower_left = 1; /* Always true for OpenGL */ + raster->origin_lower_left = 1; /* Always true for OpenGL */ /* _NEW_POLYGON, _NEW_BUFFERS */ { if (ctx->Polygon.FrontFace == GL_CCW) - raster.front_winding = PIPE_WINDING_CCW; + raster->front_winding = PIPE_WINDING_CCW; else - raster.front_winding = PIPE_WINDING_CW; + raster->front_winding = PIPE_WINDING_CW; /* XXX * I think the intention here is that user-created framebuffer objects @@ -96,13 +96,13 @@ static void update_raster_state( struct st_context *st ) * But this is an implementation/driver-specific artifact - remove... */ if (ctx->DrawBuffer && ctx->DrawBuffer->Name != 0) - raster.front_winding ^= PIPE_WINDING_BOTH; + raster->front_winding ^= PIPE_WINDING_BOTH; } /* _NEW_LIGHT */ if (ctx->Light.ShadeModel == GL_FLAT) - raster.flatshade = 1; + raster->flatshade = 1; /* _NEW_LIGHT | _NEW_PROGRAM * @@ -113,28 +113,28 @@ static void update_raster_state( struct st_context *st ) if (ctx->VertexProgram._Current) { if (ctx->VertexProgram._Enabled) { /* user-defined program */ - raster.light_twoside = ctx->VertexProgram.TwoSideEnabled; + raster->light_twoside = ctx->VertexProgram.TwoSideEnabled; } else { /* TNL-generated program */ - raster.light_twoside = ctx->Light.Enabled && ctx->Light.Model.TwoSide; + raster->light_twoside = ctx->Light.Enabled && ctx->Light.Model.TwoSide; } } else if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { - raster.light_twoside = 1; + raster->light_twoside = 1; } /* _NEW_POLYGON */ if (ctx->Polygon.CullFlag) { if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) { - raster.cull_mode = PIPE_WINDING_BOTH; + raster->cull_mode = PIPE_WINDING_BOTH; } else if (ctx->Polygon.CullFaceMode == GL_FRONT) { - raster.cull_mode = raster.front_winding; + raster->cull_mode = raster->front_winding; } else { - raster.cull_mode = raster.front_winding ^ PIPE_WINDING_BOTH; + raster->cull_mode = raster->front_winding ^ PIPE_WINDING_BOTH; } } @@ -144,23 +144,23 @@ static void update_raster_state( struct st_context *st ) GLuint fill_front = translate_fill( ctx->Polygon.FrontMode ); GLuint fill_back = translate_fill( ctx->Polygon.BackMode ); - if (raster.front_winding == PIPE_WINDING_CW) { - raster.fill_cw = fill_front; - raster.fill_ccw = fill_back; + if (raster->front_winding == PIPE_WINDING_CW) { + raster->fill_cw = fill_front; + raster->fill_ccw = fill_back; } else { - raster.fill_cw = fill_back; - raster.fill_ccw = fill_front; + raster->fill_cw = fill_back; + raster->fill_ccw = fill_front; } /* Simplify when culling is active: */ - if (raster.cull_mode & PIPE_WINDING_CW) { - raster.fill_cw = raster.fill_ccw; + if (raster->cull_mode & PIPE_WINDING_CW) { + raster->fill_cw = raster->fill_ccw; } - if (raster.cull_mode & PIPE_WINDING_CCW) { - raster.fill_ccw = raster.fill_cw; + if (raster->cull_mode & PIPE_WINDING_CCW) { + raster->fill_ccw = raster->fill_cw; } } @@ -168,95 +168,91 @@ static void update_raster_state( struct st_context *st ) */ if (ctx->Polygon.OffsetUnits != 0.0 || ctx->Polygon.OffsetFactor != 0.0) { - raster.offset_cw = get_offset_flag( raster.fill_cw, &ctx->Polygon ); - raster.offset_ccw = get_offset_flag( raster.fill_ccw, &ctx->Polygon ); - raster.offset_units = ctx->Polygon.OffsetUnits; - raster.offset_scale = ctx->Polygon.OffsetFactor; + raster->offset_cw = get_offset_flag( raster->fill_cw, &ctx->Polygon ); + raster->offset_ccw = get_offset_flag( raster->fill_ccw, &ctx->Polygon ); + raster->offset_units = ctx->Polygon.OffsetUnits; + raster->offset_scale = ctx->Polygon.OffsetFactor; } if (ctx->Polygon.SmoothFlag) - raster.poly_smooth = 1; + raster->poly_smooth = 1; if (ctx->Polygon.StippleFlag) - raster.poly_stipple_enable = 1; + raster->poly_stipple_enable = 1; /* _NEW_BUFFERS, _NEW_POLYGON */ - if (raster.fill_cw != PIPE_POLYGON_MODE_FILL || - raster.fill_ccw != PIPE_POLYGON_MODE_FILL) + if (raster->fill_cw != PIPE_POLYGON_MODE_FILL || + raster->fill_ccw != PIPE_POLYGON_MODE_FILL) { GLfloat mrd = (ctx->DrawBuffer ? ctx->DrawBuffer->_MRD : 1.0); - raster.offset_units = ctx->Polygon.OffsetFactor * mrd; - raster.offset_scale = (ctx->Polygon.OffsetUnits * mrd * + raster->offset_units = ctx->Polygon.OffsetFactor * mrd; + raster->offset_scale = (ctx->Polygon.OffsetUnits * mrd * st->polygon_offset_scale); } /* _NEW_POINT */ - raster.point_size = ctx->Point.Size; - raster.point_smooth = ctx->Point.SmoothFlag; - raster.point_sprite = ctx->Point.PointSprite; + raster->point_size = ctx->Point.Size; + raster->point_smooth = ctx->Point.SmoothFlag; + raster->point_sprite = ctx->Point.PointSprite; for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { if (ctx->Point.CoordReplace[i]) { if (ctx->Point.SpriteOrigin == GL_UPPER_LEFT) - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_UPPER_LEFT; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_UPPER_LEFT; else - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_LOWER_LEFT; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_LOWER_LEFT; } else { - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE; } } if (vertProg) { if (vertProg->Base.Id == 0) { if (vertProg->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { /* generated program which emits point size */ - raster.point_size_per_vertex = TRUE; + raster->point_size_per_vertex = TRUE; } } else if (ctx->VertexProgram.PointSizeEnabled) { /* user-defined program and GL_VERTEX_PROGRAM_POINT_SIZE set */ - raster.point_size_per_vertex = ctx->VertexProgram.PointSizeEnabled; + raster->point_size_per_vertex = ctx->VertexProgram.PointSizeEnabled; } } /* _NEW_LINE */ - raster.line_smooth = ctx->Line.SmoothFlag; + raster->line_smooth = ctx->Line.SmoothFlag; if (ctx->Line.SmoothFlag) { - raster.line_width = CLAMP(ctx->Line.Width, + raster->line_width = CLAMP(ctx->Line.Width, ctx->Const.MinLineWidthAA, ctx->Const.MaxLineWidthAA); } else { - raster.line_width = CLAMP(ctx->Line.Width, + raster->line_width = CLAMP(ctx->Line.Width, ctx->Const.MinLineWidth, ctx->Const.MaxLineWidth); } - raster.line_stipple_enable = ctx->Line.StippleFlag; - raster.line_stipple_pattern = ctx->Line.StipplePattern; + raster->line_stipple_enable = ctx->Line.StippleFlag; + raster->line_stipple_pattern = ctx->Line.StipplePattern; /* GL stipple factor is in [1,256], remap to [0, 255] here */ - raster.line_stipple_factor = ctx->Line.StippleFactor - 1; + raster->line_stipple_factor = ctx->Line.StippleFactor - 1; /* _NEW_MULTISAMPLE */ if (ctx->Multisample.Enabled) - raster.multisample = 1; + raster->multisample = 1; /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) - raster.scissor = 1; + raster->scissor = 1; - cso = st_cached_rasterizer_state(st, &raster); - if (st->state.rasterizer != cso) { - st->state.rasterizer = cso; - st->pipe->bind_rasterizer_state(st->pipe, cso->data); - } + cso_set_rasterizer(st->cso_context, raster); } const struct st_tracked_state st_update_rasterizer = { diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 1000f98ffc0..1babba9b4fa 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -33,11 +33,11 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -124,9 +124,9 @@ update_samplers(struct st_context *st) /* loop over sampler units (aka tex image units) */ for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { - struct pipe_sampler_state sampler; + struct pipe_sampler_state *sampler = st->state.samplers + su; - memset(&sampler, 0, sizeof(sampler)); + memset(sampler, 0, sizeof(*sampler)); if (fs->Base.Base.SamplersUsed & (1 << su)) { GLuint texUnit = fs->Base.Base.SamplerUnits[su]; @@ -135,37 +135,37 @@ update_samplers(struct st_context *st) assert(texobj); - sampler.wrap_s = gl_wrap_to_sp(texobj->WrapS); - sampler.wrap_t = gl_wrap_to_sp(texobj->WrapT); - sampler.wrap_r = gl_wrap_to_sp(texobj->WrapR); + sampler->wrap_s = gl_wrap_to_sp(texobj->WrapS); + sampler->wrap_t = gl_wrap_to_sp(texobj->WrapT); + sampler->wrap_r = gl_wrap_to_sp(texobj->WrapR); - sampler.min_img_filter = gl_filter_to_img_filter(texobj->MinFilter); - sampler.min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter); - sampler.mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter); + sampler->min_img_filter = gl_filter_to_img_filter(texobj->MinFilter); + sampler->min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter); + sampler->mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter); if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB) - sampler.normalized_coords = 1; + sampler->normalized_coords = 1; - sampler.lod_bias = st->ctx->Texture.Unit[su].LodBias; + sampler->lod_bias = st->ctx->Texture.Unit[su].LodBias; #if 1 - sampler.min_lod = (texobj->MinLod) < 0.0 ? 0.0 : texobj->MinLod; - sampler.max_lod = texobj->MaxLod; + sampler->min_lod = (texobj->MinLod) < 0.0 ? 0.0 : texobj->MinLod; + sampler->max_lod = texobj->MaxLod; #else /* min/max lod should really be as follows (untested). * Also, calculate_first_last_level() needs to be overhauled * since today's hardware had real support for LOD clamping. */ - sampler.min_lod = MAX2(texobj->BaseLevel, texobj->MinLod); - sampler.max_lod = MIN2(texobj->MaxLevel, texobj->MaxLod); + sampler->min_lod = MAX2(texobj->BaseLevel, texobj->MinLod); + sampler->max_lod = MIN2(texobj->MaxLevel, texobj->MaxLod); #endif - sampler.max_anisotropy = texobj->MaxAnisotropy; + sampler->max_anisotropy = texobj->MaxAnisotropy; /* only care about ARB_shadow, not SGI shadow */ if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) { - sampler.compare = 1; - sampler.compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; - sampler.compare_func + sampler->compare = 1; + sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; + sampler->compare_func = st_compare_func_to_pipe(texobj->CompareFunc); } @@ -174,11 +174,10 @@ update_samplers(struct st_context *st) /* XXX more sampler state here */ } - st->state.sampler[su] = st_cached_sampler_state(st, &sampler)->data; + cso_single_sampler(st->cso_context, su, sampler); } - st->pipe->bind_sampler_states(st->pipe, st->state.num_samplers, - st->state.sampler); + cso_single_sampler_done(st->cso_context); } diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 10c131d5549..07266884930 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -43,10 +43,9 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "st_atom_shader.h" @@ -70,9 +69,6 @@ struct translated_vertex_program /** Maps VERT_RESULT_x to slot */ GLuint output_to_slot[VERT_RESULT_MAX]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - /** Pointer to the translated vertex program */ struct st_vertex_program *vp; @@ -158,7 +154,7 @@ find_translated_vp(struct st_context *st, /* * Translate fragment program if needed. */ - if (!stfp->cso) { + if (!stfp->state.tokens) { GLuint inAttr, numIn = 0; for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { @@ -175,11 +171,7 @@ find_translated_vp(struct st_context *st, assert(stfp->Base.Base.NumInstructions > 1); - (void) st_translate_fragment_program(st, stfp, - stfp->input_to_slot, - stfp->tokens, - ST_MAX_SHADER_TOKENS); - assert(stfp->cso); + st_translate_fragment_program(st, stfp, stfp->input_to_slot); } @@ -261,12 +253,8 @@ find_translated_vp(struct st_context *st, assert(stvp->Base.Base.NumInstructions > 1); - st_translate_vertex_program(st, stvp, - xvp->output_to_slot, - xvp->tokens, - ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, xvp->output_to_slot); - assert(stvp->cso); xvp->vp = stvp; /* translated VP is up to date now */ @@ -296,12 +284,10 @@ update_linkage( struct st_context *st ) xvp = find_translated_vp(st, stvp, stfp); st->vp = stvp; - st->state.vs = xvp->vp; - st->pipe->bind_vs_state(st->pipe, st->state.vs->cso->data); - st->fp = stfp; - st->state.fs = stfp->cso; - st->pipe->bind_fs_state(st->pipe, st->state.fs->data); + + st->pipe->bind_vs_state(st->pipe, stvp->driver_shader); + st->pipe->bind_fs_state(st->pipe, stfp->driver_shader); st->vertex_result_to_slot = xvp->output_to_slot; } diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 663c4f205d8..f1fddc4e026 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -35,7 +35,6 @@ #include "main/macros.h" #include "st_context.h" -#include "st_cache.h" #include "st_cb_accum.h" #include "st_cb_fbo.h" #include "st_draw.h" diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index e712fd84cd5..eae40f2a4f1 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -35,7 +35,6 @@ #include "main/macros.h" #include "shader/prog_instruction.h" #include "st_atom.h" -#include "st_cache.h" #include "st_context.h" #include "st_cb_accum.h" #include "st_cb_clear.h" @@ -50,6 +49,8 @@ #include "pipe/p_defines.h" #include "pipe/p_winsys.h" +#include "cso_cache/cso_context.h" + @@ -157,8 +158,7 @@ make_frag_shader(struct st_context *st) p->OutputsWritten = (1 << FRAG_RESULT_COLR); stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); return stfp; } @@ -206,9 +206,10 @@ make_vertex_shader(struct st_context *st) (1 << VERT_RESULT_HPOS)); stvp = (struct st_vertex_program *) p; - st_translate_vertex_program(st, stvp, NULL, - stvp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, NULL); +#if 0 assert(stvp->cso); +#endif return stvp; } @@ -284,7 +285,6 @@ clear_with_quad(GLcontext *ctx, /* blend state: RGBA masking */ { struct pipe_blend_state blend; - const struct cso_blend *cso; memset(&blend, 0, sizeof(blend)); if (color) { if (ctx->Color.ColorMask[0]) @@ -298,14 +298,12 @@ clear_with_quad(GLcontext *ctx, if (st->ctx->Color.DitherFlag) blend.dither = 1; } - cso = st_cached_blend_state(st, &blend); - pipe->bind_blend_state(pipe, cso->data); + cso_set_blend(st->cso_context, &blend); } /* depth_stencil state: always pass/set to ref value */ { struct pipe_depth_stencil_alpha_state depth_stencil; - const struct cso_depth_stencil_alpha *cso; memset(&depth_stencil, 0, sizeof(depth_stencil)); if (depth) { depth_stencil.depth.enabled = 1; @@ -323,14 +321,13 @@ clear_with_quad(GLcontext *ctx, depth_stencil.stencil[0].value_mask = 0xff; depth_stencil.stencil[0].write_mask = ctx->Stencil.WriteMask[0] & 0xff; } - cso = st_cached_depth_stencil_alpha_state(st, &depth_stencil); - pipe->bind_depth_stencil_alpha_state(pipe, cso->data); + + cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); } /* rasterizer state: nothing */ { struct pipe_rasterizer_state raster; - const struct cso_rasterizer *cso; memset(&raster, 0, sizeof(raster)); #if 0 /* don't do per-pixel scissor; we'll just draw a PIPE_PRIM_QUAD @@ -339,8 +336,7 @@ clear_with_quad(GLcontext *ctx, if (ctx->Scissor.Enabled) raster.scissor = 1; #endif - cso = st_cached_rasterizer_state(st, &raster); - pipe->bind_rasterizer_state(pipe, cso->data); + cso_set_rasterizer(st->cso_context, &raster); } /* fragment shader state: color pass-through program */ @@ -349,7 +345,7 @@ clear_with_quad(GLcontext *ctx, if (!stfp) { stfp = make_frag_shader(st); } - pipe->bind_fs_state(pipe, stfp->cso->data); + pipe->bind_fs_state(pipe, stfp->driver_shader); } /* vertex shader state: color/position pass-through */ @@ -358,7 +354,7 @@ clear_with_quad(GLcontext *ctx, if (!stvp) { stvp = make_vertex_shader(st); } - pipe->bind_vs_state(pipe, stvp->cso->data); + pipe->bind_vs_state(pipe, stvp->driver_shader); } /* viewport state: viewport matching window dims */ @@ -380,16 +376,19 @@ clear_with_quad(GLcontext *ctx, /* draw quad matching scissor rect (XXX verify coord round-off) */ draw_quad(ctx, x0, y0, x1, y1, ctx->Depth.Clear, ctx->Color.ClearColor); +#if 0 + /* Can't depend on old state objects still existing -- may have + * been deleted to make room in the hash, etc. (Should get + * fixed...) + */ + st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL); +#else /* Restore pipe state */ - pipe->bind_blend_state(pipe, st->state.blend->data); - pipe->bind_depth_stencil_alpha_state(pipe, st->state.depth_stencil->data); - pipe->bind_fs_state(pipe, st->state.fs->data); - pipe->bind_vs_state(pipe, st->state.vs->cso->data); - pipe->bind_rasterizer_state(pipe, st->state.rasterizer->data); + cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + pipe->bind_fs_state(pipe, st->fp->driver_shader); + pipe->bind_vs_state(pipe, st->vp->driver_shader); +#endif pipe->set_viewport_state(pipe, &st->state.viewport); - /* OR: - st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL); - */ } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index dee4c4132a4..18ec9645c44 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -41,7 +41,6 @@ #include "st_context.h" #include "st_atom.h" #include "st_atom_constbuf.h" -#include "st_cache.h" #include "st_draw.h" #include "st_program.h" #include "st_cb_drawpixels.h" @@ -58,6 +57,7 @@ #include "pipe/p_winsys.h" #include "util/p_tile.h" #include "shader/prog_instruction.h" +#include "cso_cache/cso_context.h" /** @@ -159,8 +159,7 @@ make_bitmap_fragment_program(GLcontext *ctx) stfp = (struct st_fragment_program *) p; stfp->Base.UsesKill = GL_TRUE; - st_translate_fragment_program(ctx->st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(ctx->st, stfp, NULL); return stfp; } @@ -204,8 +203,7 @@ combined_bitmap_fragment_program(GLcontext *ctx) #endif /* translate to TGSI tokens */ - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); /* save new program, update serial numbers */ st->bitmap.user_prog_sn = st->fp->serialNo; @@ -266,8 +264,7 @@ combined_drawpix_fragment_program(GLcontext *ctx) #endif /* translate to TGSI tokens */ - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); /* save new program, update serial numbers */ st->pixel_xfer.xfer_prog_sn = st->pixel_xfer.program->serialNo; @@ -343,8 +340,7 @@ make_fragment_shader_z(struct st_context *st) p->OutputsWritten = (1 << FRAG_RESULT_COLR) | (1 << FRAG_RESULT_DEPR); stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); return stfp; } @@ -421,8 +417,7 @@ st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor) } stvp = (struct st_vertex_program *) p; - st_translate_vertex_program(st, stvp, NULL, - stvp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, NULL); progs[passColor] = stvp; @@ -641,7 +636,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, const GLfloat *color, GLboolean invertTex) { + struct st_context *st = ctx->st; struct pipe_context *pipe = ctx->st->pipe; + struct cso_context *cso = ctx->st->cso_context; GLfloat x0, y0, x1, y1; GLuint maxSize; @@ -656,24 +653,23 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, /* setup state: just scissor */ { struct pipe_rasterizer_state setup; - const struct cso_rasterizer *cso; memset(&setup, 0, sizeof(setup)); if (ctx->Scissor.Enabled) setup.scissor = 1; - cso = st_cached_rasterizer_state(ctx->st, &setup); - pipe->bind_rasterizer_state(pipe, cso->data); + + cso_set_rasterizer(cso, &setup); } /* fragment shader state: TEX lookup program */ - pipe->bind_fs_state(pipe, stfp->cso->data); + pipe->bind_fs_state(pipe, stfp->driver_shader); /* vertex shader state: position + texcoord pass-through */ - pipe->bind_vs_state(pipe, stvp->cso->data); + pipe->bind_vs_state(pipe, stvp->driver_shader); + /* texture sampling state: */ { struct pipe_sampler_state sampler; - const struct cso_sampler *cso; memset(&sampler, 0, sizeof(sampler)); sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; sampler.wrap_t = PIPE_TEX_WRAP_CLAMP; @@ -682,8 +678,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.normalized_coords = 1; - cso = st_cached_sampler_state(ctx->st, &sampler); - pipe->bind_sampler_states(pipe, 1, (void**)&cso->data); + + cso_single_sampler(cso, 0, &sampler); + cso_single_sampler_done(cso); } /* viewport state: viewport matching window dims */ @@ -723,14 +720,25 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, draw_quad(ctx, x0, y0, z, x1, y1, invertTex); /* restore GL state */ - pipe->bind_rasterizer_state(pipe, ctx->st->state.rasterizer->data); - pipe->bind_fs_state(pipe, ctx->st->state.fs->data); - pipe->bind_vs_state(pipe, ctx->st->state.vs->cso->data); pipe->set_sampler_textures(pipe, ctx->st->state.num_textures, ctx->st->state.sampler_texture); - pipe->bind_sampler_states(pipe, ctx->st->state.num_samplers, - ctx->st->state.sampler); + pipe->set_viewport_state(pipe, &ctx->st->state.viewport); + +#if 0 + /* Can't depend on old state objects still existing -- may have + * been deleted to make room in the hash, etc. (Should get + * fixed...) + */ + st_invalidate_state(ctx, _NEW_COLOR | _NEW_TEXTURE); +#else + /* restore state */ + pipe->bind_fs_state(pipe, st->fp->driver_shader); + pipe->bind_vs_state(pipe, st->vp->driver_shader); + cso_set_rasterizer(cso, &st->state.rasterizer); + cso_set_samplers(cso, PIPE_MAX_SAMPLERS, + (const struct pipe_sampler_state **) st->state.sampler_list); +#endif } @@ -798,10 +806,10 @@ compatible_formats(GLenum format, GLenum type, enum pipe_format pipeFormat) static GLboolean any_fragment_ops(const struct st_context *st) { - if (st->state.depth_stencil->state.alpha.enabled || - st->state.blend->state.blend_enable || - st->state.blend->state.logicop_enable || - st->state.depth_stencil->state.depth.enabled) + if (st->state.depth_stencil.alpha.enabled || + st->state.depth_stencil.depth.enabled || + st->state.blend.blend_enable || + st->state.blend.logicop_enable) /* XXX more checks */ return GL_TRUE; else diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 61d4f4c41c6..4dc76f19b10 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -168,11 +168,13 @@ static void st_program_string_notify( GLcontext *ctx, stfp->serialNo++; +#if 0 if (stfp->cso) { /* free the TGSI code */ // cso_delete(stfp->vs); stfp->cso = NULL; } +#endif stfp->param_state = stfp->Base.Base.Parameters->StateFlags; @@ -184,13 +186,14 @@ static void st_program_string_notify( GLcontext *ctx, stvp->serialNo++; +#if 0 if (stvp->cso) { /* free the CSO data */ st->pipe->delete_vs_state(st->pipe, stvp->cso->data); FREE((void *) stvp->cso); stvp->cso = NULL; } - +#endif if (stvp->draw_shader) { draw_delete_vertex_shader(st->draw, stvp->draw_shader); stvp->draw_shader = NULL; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 09e389f9dc7..5458ab420e4 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -56,6 +56,7 @@ #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" /** @@ -78,6 +79,7 @@ void st_invalidate_state(GLcontext * ctx, GLuint new_state) static struct st_context * st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) { + uint i; struct st_context *st = CALLOC_STRUCT( st_context ); ctx->st = st; @@ -93,12 +95,15 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) st->dirty.mesa = ~0; st->dirty.st = ~0; - st->cache = cso_cache_create(); + st->cso_context = cso_create_context(pipe); st_init_atoms( st ); st_init_draw( st ); st_init_generate_mipmap(st); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + st->state.sampler_list[i] = &st->state.samplers[i]; + /* we want all vertex data to be placed in buffer objects */ vbo_use_buffer_objects(ctx); @@ -149,7 +154,7 @@ static void st_destroy_context_priv( struct st_context *st ) _vbo_DestroyContext(st->ctx); - cso_cache_delete( st->cache ); + cso_destroy_context(st->cso_context); _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 897a5109b7e..e81aebba3d2 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -74,14 +74,11 @@ struct st_context * Other state is just parameter values. */ struct { - const struct cso_alpha_test *alpha_test; - const struct cso_blend *blend; - void *sampler[PIPE_MAX_SAMPLERS]; - const struct cso_depth_stencil_alpha *depth_stencil; - const struct cso_rasterizer *rasterizer; - const struct cso_fragment_shader *fs; - struct st_vertex_program *vs; - + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; struct pipe_blend_color blend_color; struct pipe_clip_state clip; struct pipe_constant_buffer constants[2]; @@ -151,6 +148,10 @@ struct st_context /** For gen/render mipmap feature */ struct { + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + void *blend_cso; void *depthstencil_cso; void *rasterizer_cso; @@ -158,7 +159,7 @@ struct st_context struct st_vertex_program *stvp; } gen_mipmap; - struct cso_cache *cache; + struct cso_context *cso_context; }; diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 5888bcb98a3..9c13010da85 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -53,15 +53,15 @@ st_print_current(void) int i; printf("Vertex Transform Inputs:\n"); - for (i = 0; i < st->state.vs->cso->state.num_inputs; i++) { + for (i = 0; i < st->vp->state.num_inputs; i++) { printf(" Slot %d: VERT_ATTRIB_%d\n", i, st->vp->index_to_input[i]); } - tgsi_dump( st->state.vs->cso->state.tokens, 0 ); + tgsi_dump( st->vp->state.tokens, 0 ); if (st->vp->Base.Base.Parameters) _mesa_print_parameter_list(st->vp->Base.Base.Parameters); - tgsi_dump( st->state.fs->state.tokens, 0 ); + tgsi_dump( st->fp->state.tokens, 0 ); if (st->fp->Base.Base.Parameters) _mesa_print_parameter_list(st->fp->Base.Base.Parameters); } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 1c0fa8c6aad..98504e46c1a 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -36,7 +36,6 @@ #include "vbo/vbo.h" #include "st_atom.h" -#include "st_cache.h" #include "st_context.h" #include "st_cb_bufferobjects.h" #include "st_draw.h" @@ -219,7 +218,7 @@ st_draw_vbo(GLcontext *ctx, /* must get these after state validation! */ vp = ctx->st->vp; - vs = &ctx->st->state.vs->cso->state; + vs = &ctx->st->vp->state; /* loop over TGSI shader inputs to determine vertex buffer * and attribute info @@ -481,10 +480,10 @@ st_feedback_draw_vbo(GLcontext *ctx, /* must get these after state validation! */ vp = ctx->st->vp; - vs = &ctx->st->state.vs->cso->state; + vs = &st->vp->state; - if (!st->state.vs->draw_shader) { - st->state.vs->draw_shader = draw_create_vertex_shader(draw, vs); + if (!st->vp->draw_shader) { + st->vp->draw_shader = draw_create_vertex_shader(draw, vs); } /* @@ -496,8 +495,8 @@ st_feedback_draw_vbo(GLcontext *ctx, assert(draw); draw_set_viewport_state(draw, &st->state.viewport); draw_set_clip_state(draw, &st->state.clip); - draw_set_rasterizer_state(draw, &st->state.rasterizer->state); - draw_bind_vertex_shader(draw, st->state.vs->draw_shader); + draw_set_rasterizer_state(draw, &st->state.rasterizer); + draw_bind_vertex_shader(draw, st->vp->draw_shader); set_feedback_vertex_format(ctx); /* loop over TGSI shader inputs to determine vertex buffer diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 3723e26d45d..9c4e1032efe 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -38,6 +38,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" #include "st_context.h" #include "st_draw.h" @@ -89,8 +90,7 @@ make_tex_fragment_program(GLcontext *ctx) stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(ctx->st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(ctx->st, stfp, NULL); return stfp; } @@ -118,6 +118,7 @@ st_init_generate_mipmap(struct st_context *st) blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; blend.colormask = PIPE_MASK_RGBA; + st->gen_mipmap.blend = blend; st->gen_mipmap.blend_cso = pipe->create_blend_state(pipe, &blend); memset(&depthstencil, 0, sizeof(depthstencil)); @@ -257,14 +258,14 @@ st_render_mipmap(struct st_context *st, sampler.normalized_coords = 1; - /* bind CSOs */ - pipe->bind_blend_state(pipe, st->gen_mipmap.blend_cso); - pipe->bind_depth_stencil_alpha_state(pipe, st->gen_mipmap.depthstencil_cso); - pipe->bind_rasterizer_state(pipe, st->gen_mipmap.rasterizer_cso); + /* bind state */ + cso_set_blend(st->cso_context, &st->gen_mipmap.blend); + cso_set_depth_stencil_alpha(st->cso_context, &st->gen_mipmap.depthstencil); + cso_set_rasterizer(st->cso_context, &st->gen_mipmap.rasterizer); /* bind shaders */ - pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->cso->data); - pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->cso->data); + pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->driver_shader); + pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->driver_shader); /* * XXX for small mipmap levels, it may be faster to use the software @@ -304,17 +305,18 @@ st_render_mipmap(struct st_context *st, /*pt->first_level = first_level_save;*/ /* restore pipe state */ - if (st->state.rasterizer) - pipe->bind_rasterizer_state(pipe, st->state.rasterizer->data); - if (st->state.fs) - pipe->bind_fs_state(pipe, st->state.fs->data); - if (st->state.vs) - pipe->bind_vs_state(pipe, st->state.vs->cso->data); - pipe->bind_sampler_states(pipe, st->state.num_samplers, - st->state.sampler); +#if 0 + cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + cso_set_samplers(st->cso_context, st->state.samplers_list); + pipe->bind_fs_state(pipe, st->fp->shader_program); + pipe->bind_vs_state(pipe, st->vp->shader_program); pipe->set_sampler_textures(pipe, st->state.num_textures, st->state.sampler_texture); pipe->set_viewport_state(pipe, &st->state.viewport); +#else + /* XXX is this sufficient? */ + st_invalidate_state(st->ctx, _NEW_COLOR | _NEW_TEXTURE); +#endif return TRUE; } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 97206752af3..9446b012ad7 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -683,8 +683,9 @@ find_temporaries(const struct gl_program *program, * \param tokens array to store translated tokens in * \param maxTokens size of the tokens array * + * \return number of tokens placed in 'tokens' buffer, or zero if error */ -GLboolean +GLuint tgsi_translate_mesa_program( uint procType, const struct gl_program *program, @@ -918,6 +919,6 @@ tgsi_translate_mesa_program( maxTokens - ti ); } - return GL_TRUE; + return ti; } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 3ababf13397..63fc855b535 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -39,7 +39,7 @@ extern "C" { struct tgsi_token; struct gl_program; -GLboolean +GLuint tgsi_translate_mesa_program( uint procType, const struct gl_program *program, diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index aa252c845ad..0f8784e132e 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -42,15 +42,29 @@ #include "tgsi/util/tgsi_dump.h" #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" +#include "cso_cache/cso_context.h" #define TGSI_DEBUG 0 +/** XXX we should use the version of this from p_util.h but including + * that header causes symbol collisions. + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + + /** * Translate a Mesa vertex shader into a TGSI shader. * \param outputMapping to map vertex program output registers to TGSI @@ -61,15 +75,15 @@ void st_translate_vertex_program(struct st_context *st, struct st_vertex_program *stvp, - const GLuint outputMapping[], - struct tgsi_token *tokensOut, - GLuint maxTokens) + const GLuint outputMapping[]) { + struct pipe_context *pipe = st->pipe; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; GLuint defaultOutputMapping[VERT_RESULT_MAX]; struct pipe_shader_state vs; - const struct cso_vertex_shader *cso; GLuint attr, i; GLuint num_generic = 0; + GLuint num_tokens; memset(&vs, 0, sizeof(vs)); @@ -240,7 +254,7 @@ st_translate_vertex_program(struct st_context *st, /* XXX: fix static allocation of tokens: */ - tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX, + num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX, &stvp->Base.Base, /* inputs */ vs.num_inputs, @@ -252,20 +266,21 @@ st_translate_vertex_program(struct st_context *st, vs.num_outputs, outputMapping, vs.output_semantic_name, - vs.output_semantic_index, + vs.output_semantic_index, /* tokenized result */ - tokensOut, maxTokens); + tokens, ST_MAX_SHADER_TOKENS); - vs.tokens = tokensOut; + vs.tokens = (struct tgsi_token *) + mem_dup(tokens, num_tokens * sizeof(tokens[0])); - cso = st_cached_vs_state(st, &vs); - stvp->cso = cso; + stvp->state = vs; /* struct copy */ + stvp->driver_shader = pipe->create_vs_state(pipe, &vs); if (0) _mesa_print_program(&stvp->Base.Base); if (TGSI_DEBUG) - tgsi_dump( tokensOut, 0 ); + tgsi_dump( vs.tokens, 0 ); } @@ -280,10 +295,10 @@ st_translate_vertex_program(struct st_context *st, const struct cso_fragment_shader * st_translate_fragment_program(struct st_context *st, struct st_fragment_program *stfp, - const GLuint inputMapping[], - struct tgsi_token *tokensOut, - GLuint maxTokens) + const GLuint inputMapping[]) { + struct pipe_context *pipe = st->pipe; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; GLuint outputMapping[FRAG_RESULT_MAX]; GLuint defaultInputMapping[FRAG_ATTRIB_MAX]; struct pipe_shader_state fs; @@ -293,6 +308,7 @@ st_translate_fragment_program(struct st_context *st, const GLbitfield inputsRead = stfp->Base.Base.InputsRead; GLuint vslot = 0; GLuint num_generic = 0; + GLuint num_tokens; memset(&fs, 0, sizeof(fs)); @@ -401,7 +417,7 @@ st_translate_fragment_program(struct st_context *st, /* XXX: fix static allocation of tokens: */ - tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT, + num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT, &stfp->Base.Base, /* inputs */ fs.num_inputs, @@ -415,18 +431,19 @@ st_translate_fragment_program(struct st_context *st, fs.output_semantic_name, fs.output_semantic_index, /* tokenized result */ - tokensOut, maxTokens); + tokens, ST_MAX_SHADER_TOKENS); - fs.tokens = tokensOut; + fs.tokens = (struct tgsi_token *) + mem_dup(tokens, num_tokens * sizeof(tokens[0])); - cso = st_cached_fs_state(st, &fs); - stfp->cso = cso; + stfp->state = fs; /* struct copy */ + stfp->driver_shader = pipe->create_fs_state(pipe, &fs); if (0) _mesa_print_program(&stfp->Base.Base); if (TGSI_DEBUG) - tgsi_dump( tokensOut, 0/*TGSI_DUMP_VERBOSE*/ ); + tgsi_dump( fs.tokens, 0/*TGSI_DUMP_VERBOSE*/ ); return cso; } diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 31558af6ced..78786dcbb61 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -60,11 +60,8 @@ struct st_fragment_program /** map FP input back to VP output */ GLuint input_map[PIPE_MAX_SHADER_INPUTS]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - - /** Pointer to the corresponding cached shader */ - const struct cso_fragment_shader *cso; + struct pipe_shader_state state; + struct pipe_shader_state *driver_shader; GLuint param_state; @@ -88,11 +85,8 @@ struct st_vertex_program /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */ GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - - /** Pointer to the corresponding cached shader */ - const struct cso_vertex_shader *cso; + struct pipe_shader_state state; + struct pipe_shader_state *driver_shader; /** For using our private draw module (glRasterPos) */ struct draw_vertex_shader *draw_shader; @@ -122,16 +116,12 @@ st_vertex_program( struct gl_vertex_program *vp ) extern const struct cso_fragment_shader * st_translate_fragment_program(struct st_context *st, struct st_fragment_program *fp, - const GLuint inputMapping[], - struct tgsi_token *tokens, - GLuint maxTokens); + const GLuint inputMapping[]); extern void st_translate_vertex_program(struct st_context *st, struct st_vertex_program *vp, - const GLuint vert_output_to_slot[], - struct tgsi_token *tokens, - GLuint maxTokens); + const GLuint vert_output_to_slot[]); #endif -- cgit v1.2.3 From 221adbd60116d6334996a6b71a8dd133e229a3e9 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 11 Mar 2008 20:03:37 -0600 Subject: cell: check for NULL shader pointer in cell_bind_vs_state() --- src/gallium/drivers/cell/ppu/cell_state_shader.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 935501441b1..269a5c15bac 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -131,7 +131,8 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) cell->vs = (const struct cell_vertex_shader_state *) vs; - draw_bind_vertex_shader(cell->draw, cell->vs->draw_data); + draw_bind_vertex_shader(cell->draw, + (cell->vs ? cell->vs->draw_data : NULL)); cell->dirty |= CELL_NEW_VS; } -- cgit v1.2.3 From 98ae83d5cc73b61826823c915b5c59746c2e85c7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Wed, 12 Mar 2008 10:39:25 +0000 Subject: gallium: Add TEX_FILTER_ANISO img filter Hardware almost universally expects us to set a special filtering mode when anisotropic filtering is enabled, as opposed to varying a max-aniso values. Do this once in the state tracker & simplify the driver code. --- src/gallium/drivers/i915simple/i915_state.c | 19 +++++------ .../drivers/i965simple/brw_wm_sampler_state.c | 38 +++++++++++----------- src/gallium/drivers/softpipe/sp_tex_sample.c | 3 ++ src/gallium/include/pipe/p_defines.h | 2 +- src/mesa/state_tracker/st_atom_sampler.c | 4 +++ 5 files changed, 35 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index d9ab483bfca..57b195ea8d7 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -71,6 +71,8 @@ static unsigned translate_img_filter( unsigned filter ) return FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + return FILTER_ANISOTROPIC; default: assert(0); return FILTER_NEAREST; @@ -84,7 +86,7 @@ static unsigned translate_mip_filter( unsigned filter ) return MIPFILTER_NONE; case PIPE_TEX_MIPFILTER_NEAREST: return MIPFILTER_NEAREST; - case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_MIPFILTER_LINEAR: return MIPFILTER_LINEAR; default: assert(0); @@ -211,16 +213,11 @@ i915_create_sampler_state(struct pipe_context *pipe, cso->templ = sampler; mipFilt = translate_mip_filter(sampler->min_mip_filter); - if (sampler->max_anisotropy > 1.0) { - minFilt = FILTER_ANISOTROPIC; - magFilt = FILTER_ANISOTROPIC; - if (sampler->max_anisotropy > 2.0) { - cso->state[0] |= SS2_MAX_ANISO_4; - } - } - else { - minFilt = translate_img_filter( sampler->min_img_filter ); - magFilt = translate_img_filter( sampler->mag_img_filter ); + minFilt = translate_img_filter( sampler->min_img_filter ); + magFilt = translate_img_filter( sampler->mag_img_filter ); + + if (sampler->max_anisotropy > 2.0) { + cso->state[0] |= SS2_MAX_ANISO_4; } { diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c index ff5ba7e7c7a..b9eaee56ee8 100644 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -136,6 +136,9 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp case PIPE_TEX_FILTER_LINEAR: sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; break; + case PIPE_TEX_FILTER_ANISO: + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + break; default: break; } @@ -155,26 +158,23 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp } /* Set Anisotropy: */ - if (pipe_sampler->max_anisotropy > 1.0) { - sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; - sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - - if (pipe_sampler->max_anisotropy > 2.0) { - sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, - BRW_ANISORATIO_16); - } + switch (pipe_sampler->mag_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + case PIPE_TEX_FILTER_ANISO: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; } - else { - switch (pipe_sampler->mag_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case PIPE_TEX_FILTER_LINEAR: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - break; - } + + if (pipe_sampler->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, + BRW_ANISORATIO_16); } sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 0ced585c7f3..34da6356d70 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -714,6 +714,7 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, } break; case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: for (j = 0; j < QUAD_SIZE; j++) { float tx[4][4], a, b; int x0, y0, x1, y1, c; @@ -846,6 +847,7 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, } break; case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: for (j = 0; j < QUAD_SIZE; j++) { float texel0[4][4], texel1[4][4]; float xw, yw, zw; /* interpolation weights */ @@ -972,6 +974,7 @@ sp_get_samples_rect(struct tgsi_sampler *sampler, } break; case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: for (j = 0; j < QUAD_SIZE; j++) { float tx[4][4], a, b; int x0, y0, x1, y1, c; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 0c662d65179..bc938ba2538 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -152,7 +152,7 @@ enum pipe_texture_target { */ #define PIPE_TEX_FILTER_NEAREST 0 #define PIPE_TEX_FILTER_LINEAR 1 -/* #define PIPE_TEX_FILTER_ANISO 2 */ +#define PIPE_TEX_FILTER_ANISO 2 #define PIPE_TEX_COMPARE_NONE 0 diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 1babba9b4fa..d376480c915 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -160,6 +160,10 @@ update_samplers(struct st_context *st) #endif sampler->max_anisotropy = texobj->MaxAnisotropy; + if (sampler->max_anisotropy > 1.0) { + sampler->min_img_filter = PIPE_TEX_FILTER_ANISO; + sampler->mag_img_filter = PIPE_TEX_FILTER_ANISO; + } /* only care about ARB_shadow, not SGI shadow */ if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) { -- cgit v1.2.3 From ba75e82b6ebaf88dd2e4a8f764b2d296d715bf8a Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Wed, 12 Mar 2008 16:41:25 +0100 Subject: tgsi: Remove ExtDivide field from existence. Implement OPCODE_TXP. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 46 +++++++--------------- src/gallium/auxiliary/tgsi/util/tgsi_build.c | 5 --- src/gallium/auxiliary/tgsi/util/tgsi_build.h | 1 - src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 9 ----- .../drivers/i915simple/i915_fpc_translate.c | 12 +++--- src/gallium/include/pipe/p_shader_tokens.h | 13 +----- 6 files changed, 21 insertions(+), 65 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index ac524414001..f2ed9e0353e 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1220,7 +1220,8 @@ fetch_texel( struct tgsi_sampler *sampler, static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod) + boolean biasLod, + boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; union tgsi_exec_channel r[8]; @@ -1234,17 +1235,9 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[1], 0, CHAN_W); micro_div( &r[0], &r[0], &r[1] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1266,19 +1259,11 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1300,19 +1285,11 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -2007,14 +1984,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE); + exec_tex(mach, inst, FALSE, FALSE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); break; case TGSI_OPCODE_TXD: @@ -2030,7 +2007,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, TRUE); break; case TGSI_OPCODE_UP2H: diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c index a00ff1c2a5f..9c883ab7043 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c @@ -719,7 +719,6 @@ tgsi_build_full_instruction( reg->SrcRegisterExtSwz.NegateY, reg->SrcRegisterExtSwz.NegateZ, reg->SrcRegisterExtSwz.NegateW, - reg->SrcRegisterExtSwz.ExtDivide, prev_token, instruction, header ); @@ -1057,7 +1056,6 @@ tgsi_default_src_register_ext_swz( void ) src_register_ext_swz.NegateY = 0; src_register_ext_swz.NegateZ = 0; src_register_ext_swz.NegateW = 0; - src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE; src_register_ext_swz.Padding = 0; src_register_ext_swz.Extended = 0; @@ -1084,7 +1082,6 @@ tgsi_build_src_register_ext_swz( unsigned negate_y, unsigned negate_z, unsigned negate_w, - unsigned ext_divide, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -1099,7 +1096,6 @@ tgsi_build_src_register_ext_swz( assert( negate_y <= 1 ); assert( negate_z <= 1 ); assert( negate_w <= 1 ); - assert( ext_divide <= TGSI_EXTSWIZZLE_ONE ); src_register_ext_swz = tgsi_default_src_register_ext_swz(); src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; @@ -1110,7 +1106,6 @@ tgsi_build_src_register_ext_swz( src_register_ext_swz.NegateY = negate_y; src_register_ext_swz.NegateZ = negate_z; src_register_ext_swz.NegateW = negate_w; - src_register_ext_swz.ExtDivide = ext_divide; prev_token->Extended = 1; instruction_grow( instruction, header ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h index 607860e7fc5..80bffc4ae71 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -229,7 +229,6 @@ tgsi_build_src_register_ext_swz( unsigned negate_y, unsigned negate_z, unsigned negate_w, - unsigned ext_divide, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 1913d482f1a..ceb407b884b 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -1049,11 +1049,6 @@ dump_instruction_short( CHR( ')' ); } - if (src->SrcRegisterExtSwz.ExtDivide != TGSI_EXTSWIZZLE_ONE) { - CHR( '/' ); - ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES_SHORT ); - } - first_reg = FALSE; } @@ -1368,10 +1363,6 @@ dump_instruction_verbose( TXT( "\nNegateW : " ); UID( src->SrcRegisterExtSwz.NegateW ); } - if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) { - TXT( "\nExtDivide : " ); - ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES ); - } if( ignored ) { TXT( "\nPadding : " ); UIX( src->SrcRegisterExtSwz.Padding ); diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index c2d9a93c6c8..7b4fca5db1b 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -872,19 +872,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_TEX: - if (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide - == TGSI_EXTSWIZZLE_W) { - emit_tex(p, inst, T0_TEXLDP); - } - else { - emit_tex(p, inst, T0_TEXLD); - } + emit_tex(p, inst, T0_TEXLD); break; case TGSI_OPCODE_TXB: emit_tex(p, inst, T0_TEXLDB); break; + case TGSI_OPCODE_TXP: + emit_tex(p, inst, T0_TEXLDP); + break; + case TGSI_OPCODE_XPD: /* Cross product: * result.x = src0.y * src1.z - src0.z * src1.y; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index f0bb43bc5ba..210169f54f2 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -655,9 +655,6 @@ struct tgsi_src_register_ext * * NegateX, NegateY, NegateZ and NegateW negate individual components of the * source register. - * - * ExtDivide specifies which component is used to divide all components of the - * source register. */ struct tgsi_src_register_ext_swz @@ -671,15 +668,7 @@ struct tgsi_src_register_ext_swz unsigned NegateY : 1; /* BOOL */ unsigned NegateZ : 1; /* BOOL */ unsigned NegateW : 1; /* BOOL */ - - /* - * XXX: Do not use. This field has been depricated. - * XXX: If using in conjunction with OPCODE_TEX, please use OPCODE_TXP - * XXX: and, if needed, perform a swizzle on the texture coordinate. - */ - unsigned ExtDivide : 4; /* TGSI_EXTSWIZZLE_ */ - - unsigned Padding : 3; + unsigned Padding : 7; unsigned Extended : 1; /* BOOL */ }; -- cgit v1.2.3 From 6bd5e5ce00b1870a8d94337cc10faa8134cbefd5 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Wed, 12 Mar 2008 18:40:37 +0100 Subject: nv30: line up the miptree creation to latest changes. --- src/gallium/drivers/nv30/nv30_miptree.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 23bcef08ebc..19945e9ab87 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -54,27 +54,29 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) nv30mt->total_size = offset; } -static void -nv30_miptree_create(struct pipe_screen *screen, struct pipe_texture **pt) +static struct pipe_texture * +nv30_miptree_create(struct pipe_screen *screen, struct pipe_texture *pt) { struct pipe_winsys *ws = screen->winsys; - struct nv30_miptree *nv30mt; + struct nv30_miptree *mt; - nv30mt = realloc(*pt, sizeof(struct nv30_miptree)); - if (!nv30mt) - return; - *pt = NULL; + mt = MALLOC(sizeof(struct nv30_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = screen; - nv30_miptree_layout(nv30mt); + nv30_miptree_layout(mt); - nv30mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, - nv30mt->total_size); - if (!nv30mt->buffer) { - free(nv30mt); - return; + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->total_size); + if (!mt->buffer) { + free(mt); + return NULL; } - *pt = &nv30mt->base; + return &mt->base; } static void -- cgit v1.2.3 From 830b4709f0ac27915450b53b622a8886264d8c8c Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Wed, 12 Mar 2008 18:43:29 +0100 Subject: nouveau: update to latest reg header. --- src/gallium/drivers/nouveau/nouveau_class.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index c80461038b7..61c3d97fd4c 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -1747,7 +1747,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV10TCL_DMA_NOTIFY 0x00000180 #define NV10TCL_DMA_IN_MEMORY0 0x00000184 #define NV10TCL_DMA_IN_MEMORY1 0x00000188 -#define NV10TCL_DISPLAY_LIST 0x0000018c +#define NV10TCL_DMA_VTXBUF0 0x0000018c #define NV10TCL_DMA_IN_MEMORY2 0x00000194 #define NV10TCL_DMA_IN_MEMORY3 0x00000198 #define NV10TCL_VIEWPORT_HORIZ 0x00000200 @@ -2866,11 +2866,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV10TCL_VERTEX_BEGIN_END_QUADS 0x00000008 #define NV10TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 #define NV10TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a -#define NV10TCL_DRAW_INDEX 0x00000e00 -#define NV10TCL_DRAW_INDEX_I0_SHIFT 0 -#define NV10TCL_DRAW_INDEX_I0_MASK 0x0000ffff -#define NV10TCL_DRAW_INDEX_I1_SHIFT 24 -#define NV10TCL_DRAW_INDEX_I1_MASK 0xff000000 +#define NV10TCL_VB_ELEMENT_U16 0x00000e00 +#define NV10TCL_VB_ELEMENT_U16_I0_SHIFT 0 +#define NV10TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff +#define NV10TCL_VB_ELEMENT_U16_I1_SHIFT 24 +#define NV10TCL_VB_ELEMENT_U16_I1_MASK 0xff000000 #define NV10TCL_VERTEX_BUFFER_BEGIN_END 0x000013fc #define NV10TCL_VERTEX_BUFFER_BEGIN_END_STOP 0x00000000 #define NV10TCL_VERTEX_BUFFER_BEGIN_END_POINTS 0x00000001 @@ -4099,9 +4099,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_VIEWPORT_SCALE_Y 0x00000a34 #define NV34TCL_VIEWPORT_SCALE_Z 0x00000a38 #define NV34TCL_VIEWPORT_SCALE_W 0x00000a3c -#define NV34TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a60 +#define NV34TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60 #define NV34TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 -#define NV34TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a68 +#define NV34TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68 #define NV34TCL_DEPTH_FUNC 0x00000a6c #define NV34TCL_DEPTH_FUNC_NEVER 0x00000200 #define NV34TCL_DEPTH_FUNC_LESS 0x00000201 -- cgit v1.2.3 From 3b2a9b01a0c9b80573556a21e9db11b6f64eff8e Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Wed, 12 Mar 2008 19:06:22 +0100 Subject: nv30: debug dumps vp constants --- src/gallium/drivers/nv30/nv30_vertprog.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 96bc4b5ef9a..75f7351261d 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -751,6 +751,11 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); OUT_RING (i + vp->data->start); OUT_RINGp ((uint32_t *)vpd->value, 4); +#if 0 + NOUVEAU_MSG("VP const %d: %f %f %f %f\n", + i, vpd->value[0], vpd->value[1], + vpd->value[2], vpd->value[3]); +#endif } if (map) { @@ -762,10 +767,9 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) if (upload_code) { #if 0 for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); + NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, vp->insns[i].data[0], vp->insns[i].data[1], + vp->insns[i].data[2], vp->insns[i].data[3]); } #endif BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); -- cgit v1.2.3 From 51809bc1bc719c8d988cd6e06b6c96af43d12026 Mon Sep 17 00:00:00 2001 From: Brian <brian@poulsbo.localnet.net> Date: Wed, 12 Mar 2008 13:22:58 -0600 Subject: remove reference to obsolete ExtDivide --- src/gallium/drivers/i965simple/brw_wm_glsl.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index 44f946ea748..5c905838243 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -124,10 +124,6 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, */ component = get_ext_swz(src->SrcRegisterExtSwz, component); - /* Can't handle this, don't know if we need to: - */ - assert(src->SrcRegisterExtSwz.ExtDivide == TGSI_EXTSWIZZLE_ONE); - /* Not handling indirect lookups yet: */ assert(src->SrcRegister.Indirect == 0); -- cgit v1.2.3 From 2109ba4c5d22e1f7effa33a6ff26ce587ce46fe3 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 12 Mar 2008 16:56:12 -0600 Subject: i915: handle NULL object in i915_bind_rasterizer_state() --- src/gallium/drivers/i915simple/i915_state.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 57b195ea8d7..503c0924003 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -668,22 +668,23 @@ i915_create_rasterizer_state(struct pipe_context *pipe, } static void i915_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) + void *raster ) { struct i915_context *i915 = i915_context(pipe); - i915->rasterizer = (struct i915_rasterizer_state *)setup; + i915->rasterizer = (struct i915_rasterizer_state *)raster; /* pass-through to draw module */ - draw_set_rasterizer_state(i915->draw, i915->rasterizer->templ); + draw_set_rasterizer_state(i915->draw, + (i915->rasterizer ? i915->rasterizer->templ : NULL)); i915->dirty |= I915_NEW_RASTERIZER; } static void i915_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *raster) { - FREE(setup); + FREE(raster); } static void i915_set_vertex_buffer( struct pipe_context *pipe, -- cgit v1.2.3 From bd4fe0e87c1b979973d9a76aa48de5fbbb8d52b7 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Thu, 13 Mar 2008 00:42:50 +0100 Subject: nouveau: update to latest nouveau_class.h --- src/gallium/drivers/nouveau/nouveau_class.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 61c3d97fd4c..1b015507903 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -2869,8 +2869,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV10TCL_VB_ELEMENT_U16 0x00000e00 #define NV10TCL_VB_ELEMENT_U16_I0_SHIFT 0 #define NV10TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff -#define NV10TCL_VB_ELEMENT_U16_I1_SHIFT 24 -#define NV10TCL_VB_ELEMENT_U16_I1_MASK 0xff000000 +#define NV10TCL_VB_ELEMENT_U16_I1_SHIFT 16 +#define NV10TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000 +#define NV10TCL_VB_ELEMENT_U32 0x00001100 #define NV10TCL_VERTEX_BUFFER_BEGIN_END 0x000013fc #define NV10TCL_VERTEX_BUFFER_BEGIN_END_STOP 0x00000000 #define NV10TCL_VERTEX_BUFFER_BEGIN_END_POINTS 0x00000001 -- cgit v1.2.3 From 9a4938d7033101122b627786273ff37229b5558a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 13 Mar 2008 12:36:35 +1100 Subject: nouveau: match interface changes --- src/gallium/drivers/nv30/nv30_fragprog.c | 11 ++++---- src/gallium/drivers/nv30/nv30_state.c | 30 +++++++++++--------- src/gallium/drivers/nv30/nv30_state.h | 4 +-- src/gallium/drivers/nv30/nv30_vertprog.c | 4 +-- src/gallium/drivers/nv40/nv40_context.h | 2 ++ src/gallium/drivers/nv40/nv40_fragprog.c | 11 ++++---- src/gallium/drivers/nv40/nv40_state.c | 47 +++++++++++++++++++++++--------- src/gallium/drivers/nv40/nv40_state.h | 4 +-- src/gallium/drivers/nv40/nv40_vertprog.c | 4 +-- src/gallium/drivers/nv50/nv50_state.c | 11 ++++---- 10 files changed, 76 insertions(+), 52 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 0a2ba04f958..6f61d36f4e8 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -583,15 +583,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); break; case TGSI_OPCODE_TEX: - if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide == - TGSI_EXTSWIZZLE_W) { - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); - } else - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); break; case TGSI_OPCODE_TXB: tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); break; + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; case TGSI_OPCODE_XPD: tmp = temp(fpc); arith(fpc, 0, MUL, tmp, mask, @@ -683,7 +682,7 @@ nv30_fragprog_translate(struct nv30_context *nv30, fpc->high_temp = -1; fpc->num_regs = 2; - tgsi_parse_init(&parse, fp->pipe->tokens); + tgsi_parse_init(&parse, fp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 319d53fccac..951a32bc81e 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -231,14 +231,15 @@ nv30_sampler_state_create(struct pipe_context *pipe, } static void -nv30_sampler_state_bind(struct pipe_context *pipe, unsigned unit, - void *hwcso) +nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) { struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_sampler_state *ps = hwcso; + unsigned unit; - nv30->tex_sampler[unit] = ps; - nv30->dirty_samplers |= (1 << unit); + for (unit = 0; unit < nr; unit++) { + nv30->tex_sampler[unit] = sampler[unit]; + nv30->dirty_samplers |= (1 << unit); + } } static void @@ -248,13 +249,16 @@ nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) } static void -nv30_set_sampler_texture(struct pipe_context *pipe, unsigned unit, - struct pipe_texture *miptree) +nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) { struct nv30_context *nv30 = nv30_context(pipe); + unsigned unit; - nv30->tex_miptree[unit] = (struct nv30_miptree *)miptree; - nv30->dirty_samplers |= (1 << unit); + for (unit = 0; unit < nr; unit++) { + nv30->tex_miptree[unit] = (struct nv30_miptree *)miptree[unit]; + nv30->dirty_samplers |= (1 << unit); + } } static void * @@ -440,7 +444,7 @@ nv30_vp_state_create(struct pipe_context *pipe, struct nv30_vertex_program *vp; vp = CALLOC(1, sizeof(struct nv30_vertex_program)); - vp->pipe = cso; + vp->pipe = *cso; return (void *)vp; } @@ -472,7 +476,7 @@ nv30_fp_state_create(struct pipe_context *pipe, struct nv30_fragment_program *fp; fp = CALLOC(1, sizeof(struct nv30_fragment_program)); - fp->pipe = cso; + fp->pipe = *cso; return (void *)fp; } @@ -709,9 +713,9 @@ nv30_init_state_functions(struct nv30_context *nv30) nv30->pipe.delete_blend_state = nv30_blend_state_delete; nv30->pipe.create_sampler_state = nv30_sampler_state_create; - nv30->pipe.bind_sampler_state = nv30_sampler_state_bind; + nv30->pipe.bind_sampler_states = nv30_sampler_state_bind; nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; - nv30->pipe.set_sampler_texture = nv30_set_sampler_texture; + nv30->pipe.set_sampler_textures = nv30_set_sampler_texture; nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index 233600f69ab..117520dd13c 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -60,7 +60,7 @@ struct nv30_vertex_program_data { }; struct nv30_vertex_program { - const struct pipe_shader_state *pipe; + struct pipe_shader_state pipe; boolean translated; struct nv30_vertex_program_exec *insns; @@ -84,7 +84,7 @@ struct nv30_fragment_program_data { }; struct nv30_fragment_program { - const struct pipe_shader_state *pipe; + struct pipe_shader_state pipe; boolean translated; boolean on_hw; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 75f7351261d..e9b62ff48b4 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -541,7 +541,7 @@ nv30_vertprog_prepare(struct nv30_vpc *vpc) struct tgsi_parse_context p; int nr_imm = 0; - tgsi_parse_init(&p, vpc->vp->pipe->tokens); + tgsi_parse_init(&p, vpc->vp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { const union tgsi_full_token *tok = &p.FullToken; @@ -582,7 +582,7 @@ nv30_vertprog_translate(struct nv30_context *nv30, return; } - tgsi_parse_init(&parse, vp->pipe->tokens); + tgsi_parse_init(&parse, vp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index e118776306b..100c678187a 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -136,6 +136,8 @@ struct nv40_context { unsigned idxbuf_format; struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; unsigned dirty_samplers; struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index d981a02a639..953f9cd9082 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -580,15 +580,14 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); break; case TGSI_OPCODE_TEX: - if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide == - TGSI_EXTSWIZZLE_W) { - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); - } else - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); break; case TGSI_OPCODE_TXB: tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); break; + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; case TGSI_OPCODE_XPD: tmp = temp(fpc); arith(fpc, 0, MUL, tmp, mask, @@ -680,7 +679,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, fpc->high_temp = -1; fpc->num_regs = 2; - tgsi_parse_init(&parse, fp->pipe->tokens); + tgsi_parse_init(&parse, fp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index caa2f9df0c6..321d5de0415 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "nv40_context.h" #include "nv40_state.h" @@ -251,14 +252,22 @@ nv40_sampler_state_create(struct pipe_context *pipe, } static void -nv40_sampler_state_bind(struct pipe_context *pipe, unsigned unit, - void *hwcso) +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_sampler_state *ps = hwcso; + unsigned unit; - nv40->tex_sampler[unit] = ps; - nv40->dirty_samplers |= (1 << unit); + for (unit = 0; unit < nr; unit++) { + nv40->tex_sampler[unit] = sampler[unit]; + nv40->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv40->nr_samplers; unit++) { + nv40->tex_sampler[unit] = NULL; + nv40->dirty_samplers |= (1 << unit); + } + + nv40->nr_samplers = nr; nv40->dirty |= NV40_NEW_SAMPLER; } @@ -269,13 +278,25 @@ nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) } static void -nv40_set_sampler_texture(struct pipe_context *pipe, unsigned unit, - struct pipe_texture *miptree) +nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) { struct nv40_context *nv40 = nv40_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv40->tex_miptree[unit], miptree[unit]); + nv40->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv40->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv40->tex_miptree[unit], NULL); + nv40->dirty_samplers |= (1 << unit); + } - nv40->tex_miptree[unit] = (struct nv40_miptree *)miptree; - nv40->dirty_samplers |= (1 << unit); + nv40->nr_textures = nr; nv40->dirty |= NV40_NEW_SAMPLER; } @@ -490,7 +511,7 @@ nv40_vp_state_create(struct pipe_context *pipe, struct nv40_vertex_program *vp; vp = CALLOC(1, sizeof(struct nv40_vertex_program)); - vp->pipe = cso; + vp->pipe = *cso; return (void *)vp; } @@ -521,7 +542,7 @@ nv40_fp_state_create(struct pipe_context *pipe, struct nv40_fragment_program *fp; fp = CALLOC(1, sizeof(struct nv40_fragment_program)); - fp->pipe = cso; + fp->pipe = *cso; return (void *)fp; } @@ -649,9 +670,9 @@ nv40_init_state_functions(struct nv40_context *nv40) nv40->pipe.delete_blend_state = nv40_blend_state_delete; nv40->pipe.create_sampler_state = nv40_sampler_state_create; - nv40->pipe.bind_sampler_state = nv40_sampler_state_bind; + nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; - nv40->pipe.set_sampler_texture = nv40_set_sampler_texture; + nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index e5217fe91c4..a02ea0c8781 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -23,7 +23,7 @@ struct nv40_vertex_program_data { }; struct nv40_vertex_program { - const struct pipe_shader_state *pipe; + struct pipe_shader_state pipe; boolean translated; struct nv40_vertex_program_exec *insns; @@ -48,7 +48,7 @@ struct nv40_fragment_program_data { }; struct nv40_fragment_program { - const struct pipe_shader_state *pipe; + struct pipe_shader_state pipe; boolean translated; unsigned samplers; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 5b7a343e55d..3d730c1a321 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -535,7 +535,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) struct tgsi_parse_context p; int nr_imm = 0; - tgsi_parse_init(&p, vpc->vp->pipe->tokens); + tgsi_parse_init(&p, vpc->vp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { const union tgsi_full_token *tok = &p.FullToken; @@ -576,7 +576,7 @@ nv40_vertprog_translate(struct nv40_context *nv40, return; } - tgsi_parse_init(&parse, vp->pipe->tokens); + tgsi_parse_init(&parse, vp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index aa65fd482e4..b096a2583d7 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -90,8 +90,7 @@ nv50_sampler_state_create(struct pipe_context *pipe, } static void -nv50_sampler_state_bind(struct pipe_context *pipe, unsigned unit, - void *hwcso) +nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) { } @@ -101,8 +100,8 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) } static void -nv50_set_sampler_texture(struct pipe_context *pipe, unsigned unit, - struct pipe_texture *pt) +nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **pt) { } @@ -442,9 +441,9 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.delete_blend_state = nv50_blend_state_delete; nv50->pipe.create_sampler_state = nv50_sampler_state_create; - nv50->pipe.bind_sampler_state = nv50_sampler_state_bind; + nv50->pipe.bind_sampler_states = nv50_sampler_state_bind; nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; - nv50->pipe.set_sampler_texture = nv50_set_sampler_texture; + nv50->pipe.set_sampler_textures = nv50_set_sampler_texture; nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; -- cgit v1.2.3 From 1cec61e441ad5b4b1ac8d1abcaa7535bc1827eb3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 13 Mar 2008 18:08:22 +1100 Subject: nouveau: NV9X is basically a G80, fix issue with NV6X being detected as G80. --- src/gallium/drivers/nv50/nv50_screen.c | 29 +++++++++++----- src/gallium/winsys/dri/nouveau/nouveau_context.c | 42 ++++++++++++++++++++---- src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 1 + 3 files changed, 57 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 62c23c790ca..721c6421d1f 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -6,8 +6,9 @@ #include "nouveau/nouveau_stateobj.h" -#define GRCLASS5097_CHIPSETS 0x00000000 -#define GRCLASS8297_CHIPSETS 0x00000010 +#define NV5X_GRCLASS5097_CHIPSETS 0x00000001 +#define NV8X_GRCLASS8297_CHIPSETS 0x00000010 +#define NV9X_GRCLASS8297_CHIPSETS 0x00000004 static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, @@ -117,12 +118,24 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, return NULL; } - if (GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) { - tesla_class = 0x5097; - } else - if (GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) { - tesla_class = 0x8297; - } else { + switch (chipset & 0xf0) { + case 0x50: + if (NV5X_GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x5097; + break; + case 0x80: + if (NV8X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x8297; + break; + case 0x90: + if (NV9X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x8297; + break; + default: + break; + } + + if (tesla_class == 0) { NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); nv50_screen_destroy(&screen->pipe); return NULL; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index 8dac08a5d21..8b20b3689c6 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -75,6 +75,7 @@ nouveau_channel_context_create(struct nouveau_device *nvdev, unsigned chipset) switch (chipset & 0xf0) { case 0x50: case 0x80: + case 0x90: ret = nouveau_surface_channel_create_nv50(nvc); break; default: @@ -109,6 +110,7 @@ nouveau_context_create(const __GLcontextModes *glVis, st_share = ((struct nouveau_context *)sharedContextPrivate)->st; } + /* Check for supported arch */ if ((ret = nouveau_device_get_param(nv_screen->device, NOUVEAU_GETPARAM_CHIPSET_ID, &nv->chipset))) { @@ -116,6 +118,22 @@ nouveau_context_create(const __GLcontextModes *glVis, return GL_FALSE; } + switch (nv->chipset & 0xf0) { + case 0x30: + /* NV30 */ + case 0x40: + case 0x60: + /* NV40 */ + case 0x50: + case 0x80: + case 0x90: + /* G80 */ + break; + default: + NOUVEAU_ERR("Unsupported chipset: NV%02x\n", nv->chipset); + return GL_FALSE; + } + driContextPriv->driverPrivate = (void *)nv; nv->nv_screen = nv_screen; nv->dri_screen = driScrnPriv; @@ -176,10 +194,15 @@ nouveau_context_create(const __GLcontextModes *glVis, } } - /*XXX: temporary - disable multi-context/single-channel on non-NV4x */ + /*XXX: temporary - disable multi-context/single-channel on pre-NV4x */ switch (nv->chipset & 0xf0) { case 0x40: case 0x60: + /* NV40 class */ + case 0x50: + case 0x80: + case 0x90: + /* G80 class */ break; default: nvc = NULL; @@ -215,12 +238,17 @@ nouveau_context_create(const __GLcontextModes *glVis, } /* Create pipe */ - if (nv->chipset < 0x50) - ret = nouveau_surface_init_nv04(nv); - else - ret = nouveau_surface_init_nv50(nv); - if (ret) { - return GL_FALSE; + switch (nv->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + if (nouveau_surface_init_nv50(nv)) + return GL_FALSE; + break; + default: + if (nouveau_surface_init_nv04(nv)) + return GL_FALSE; + break; } if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index dc7c4c3d71d..64e84fb68e4 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -93,6 +93,7 @@ nouveau_pipe_create(struct nouveau_context *nv) break; case 0x50: case 0x80: + case 0x90: hws_create = nv50_screen_create; hw_create = nv50_create; break; -- cgit v1.2.3 From 647213804582a6b6cfd4fbfeb1c9874ef53307f3 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 13 Mar 2008 11:19:50 -0700 Subject: Replicate TXP changes in the SPU version of TGSI exec Replicate changes from commit ba75e82b6ebaf88dd2e4a8f764b2d296d715bf8a in spu_exec.c --- src/gallium/drivers/cell/spu/spu_exec.c | 45 ++++++++++----------------------- 1 file changed, 14 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 1560c0f1574..061fbebf618 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -672,7 +672,7 @@ fetch_texel( struct spu_sampler *sampler, static void exec_tex(struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod) + boolean biasLod, boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; union spu_exec_channel r[8]; @@ -686,17 +686,9 @@ exec_tex(struct spu_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[1], 0, CHAN_W); r[0].q = micro_div(r[0].q, r[1].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -718,19 +710,11 @@ exec_tex(struct spu_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); r[0].q = micro_div(r[0].q, r[3].q); r[1].q = micro_div(r[1].q, r[3].q); r[2].q = micro_div(r[2].q, r[3].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -752,19 +736,11 @@ exec_tex(struct spu_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); r[0].q = micro_div(r[0].q, r[3].q); r[1].q = micro_div(r[1].q, r[3].q); r[2].q = micro_div(r[2].q, r[3].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1450,14 +1426,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE); + exec_tex(mach, inst, FALSE, FALSE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = load bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); break; case TGSI_OPCODE_TXD: @@ -1473,7 +1449,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = load bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, TRUE); break; case TGSI_OPCODE_UP2H: -- cgit v1.2.3 From 3115e8c968b51d22962b1b92b13946956dddd98e Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 13 Mar 2008 13:03:22 -0700 Subject: cell: Fix to work with commit fa9e7e9a8debb68611909ac2ffab527c6c39a3e5 --- src/gallium/drivers/cell/ppu/cell_context.h | 4 +++- src/gallium/drivers/cell/ppu/cell_state_derived.c | 20 ++++++++++---------- src/gallium/drivers/cell/ppu/cell_state_shader.c | 23 ++--------------------- 3 files changed, 15 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index c568922cbd4..b221424323f 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -37,13 +37,14 @@ #include "cell_winsys.h" #include "cell/common.h" #include "rtasm/rtasm_ppc_spe.h" - +#include "tgsi/util/tgsi_scan.h" struct cell_vbuf_render; struct cell_vertex_shader_state { struct pipe_shader_state shader; + struct tgsi_shader_info info; void *draw_data; }; @@ -51,6 +52,7 @@ struct cell_vertex_shader_state struct cell_fragment_shader_state { struct pipe_shader_state shader; + struct tgsi_shader_info info; void *data; }; diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 0c468292584..5c240a55c0b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -36,14 +36,14 @@ static int -find_vs_output(const struct pipe_shader_state *vs, +find_vs_output(const struct cell_vertex_shader_state *vs, uint semantic_name, uint semantic_index) { uint i; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == semantic_name && - vs->output_semantic_index[i] == semantic_index) + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == semantic_name && + vs->info.output_semantic_index[i] == semantic_index) return i; } return -1; @@ -58,8 +58,8 @@ find_vs_output(const struct pipe_shader_state *vs, static void calculate_vertex_layout( struct cell_context *cell ) { - const struct pipe_shader_state *vs = &cell->vs->shader; - const struct pipe_shader_state *fs = &cell->fs->shader; + const struct cell_vertex_shader_state *vs = cell->vs; + const struct cell_fragment_shader_state *fs = cell->fs; const enum interp_mode colorInterp = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; struct vertex_info *vinfo = &cell->vertex_info; @@ -91,15 +91,15 @@ calculate_vertex_layout( struct cell_context *cell ) * Loop over fragment shader inputs, searching for the matching output * from the vertex shader. */ - for (i = 0; i < fs->num_inputs; i++) { - switch (fs->input_semantic_name[i]) { + for (i = 0; i < fs->info.num_inputs; i++) { + switch (fs->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: /* already done above */ break; case TGSI_SEMANTIC_COLOR: src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, - fs->input_semantic_index[i]); + fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; @@ -117,7 +117,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, - fs->input_semantic_index[i]); + fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 269a5c15bac..fb2e940348a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -54,27 +54,7 @@ cell_create_fs_state(struct pipe_context *pipe, state->shader = *templ; -#if 0 - if (cell->dump_fs) { - tgsi_dump(state->shader.tokens, 0); - } - -#if defined(__i386__) || defined(__386__) - if (cell->use_sse) { - x86_init_func( &state->sse2_program ); - tgsi_emit_sse2_fs( state->shader.tokens, &state->sse2_program ); - } -#endif - -#ifdef MESA_LLVM - state->llvm_prog = 0; - if (!gallivm_global_cpu_engine()) { - gallivm_cpu_engine_create(state->llvm_prog); - } - else - gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); -#endif -#endif + tgsi_scan_shader(templ->tokens, &state->info); return state; } @@ -113,6 +93,7 @@ cell_create_vs_state(struct pipe_context *pipe, return NULL; state->shader = *templ; + tgsi_scan_shader(templ->tokens, &state->info); state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader); if (state->draw_data == NULL) { -- cgit v1.2.3 From 69c39b9ae28764194a6d310d58aa36b7ac596aa9 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 13 Mar 2008 14:57:19 -0600 Subject: gallium: set surface status to CLEAR or DEFINED in clearing/drawing code. Otherwise, we were never setting these flags. This confused the state tracker. Fixes progs/demos/texenv.c, probably others. --- src/gallium/drivers/softpipe/sp_clear.c | 2 ++ src/gallium/drivers/softpipe/sp_prim_setup.c | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index 8d295a30ca6..39aed151c73 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -55,6 +55,7 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) { sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue); + softpipe->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_CLEAR; #if TILE_CLEAR_OPTIMIZATION return; #endif @@ -63,6 +64,7 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { sp_tile_cache_clear(softpipe->cbuf_cache[i], clearValue); + softpipe->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_CLEAR; } } diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 2feee5c485e..7aa9cf8e851 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -1164,11 +1164,22 @@ static void setup_begin( struct draw_stage *stage ) struct setup_stage *setup = setup_stage(stage); struct softpipe_context *sp = setup->softpipe; const struct sp_fragment_shader *fs = setup->softpipe->fs; + uint i; if (sp->dirty) { softpipe_update_derived(sp); } + /* Mark surfaces as defined now */ + for (i = 0; i < sp->framebuffer.num_cbufs; i++){ + if (sp->framebuffer.cbufs[i]) { + sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + } + } + if (sp->framebuffer.zsbuf) { + sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + } + setup->quad.nr_attrs = fs->info.num_inputs; sp->quad.first->begin(sp->quad.first); -- cgit v1.2.3 From 7d5e38a55ae99a4c28873377572f77f383ce0c3e Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 13 Mar 2008 16:53:44 -0600 Subject: gallium: plug in new sp_vbuf_draw_arrays() function Will be used for pass-through mode. Also, call draw_set_render() to register the vbuf stage. Should probably rename that function to something like draw_set_vbuf_stage(). --- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 57 +++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 69bea8a8f5c..184aac16f41 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -183,6 +183,60 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) } +/** + * This function is hit when the draw module is working in pass-through mode. + * It's up to us to convert the vertex array into point/line/tri prims. + */ +static void +sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + struct softpipe_context *softpipe = cvbr->softpipe; + struct draw_stage *setup = softpipe->setup; + struct prim_header prim; + const void *vertex_buffer = cvbr->vertex_buffer; + const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); + unsigned i, j; + + prim.det = 0; + prim.reset_line_stipple = 0; + prim.edgeflags = 0; + prim.pad = 0; + +#define VERTEX(I) \ + (struct vertex_header *) ((char *) vertex_buffer + (I) * vertex_size) + + switch (cvbr->prim) { + case PIPE_PRIM_TRIANGLES: + assert(nr % 3 == 0); + for (i = 0; i < nr; i += 3) { + prim.v[0] = VERTEX(i + 0); + prim.v[1] = VERTEX(i + 1); + prim.v[2] = VERTEX(i + 2); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + case PIPE_PRIM_POLYGON: + /* draw as tri fan */ + for (i = 2; i < nr; i++) { + prim.v[0] = VERTEX(0); + prim.v[1] = VERTEX(i - 1); + prim.v[2] = VERTEX(i); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + default: + /* XXX finish remaining prim types */ + assert(0); + } + +#undef VERTEX +} + + + static void sp_vbuf_destroy(struct vbuf_render *vbr) { @@ -210,6 +264,7 @@ sp_init_vbuf(struct softpipe_context *sp) sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; sp->vbuf_render->base.draw = sp_vbuf_draw; + sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays; sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices; sp->vbuf_render->base.destroy = sp_vbuf_destroy; @@ -218,4 +273,6 @@ sp_init_vbuf(struct softpipe_context *sp) sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base); draw_set_rasterize_stage(sp->draw, sp->vbuf); + + draw_set_render(sp->draw, &sp->vbuf_render->base); } -- cgit v1.2.3 From 269fbeb5459952532f5d188dd3653fa6b7425cfe Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 13 Mar 2008 16:57:57 -0600 Subject: gallium: in softpipe_get_vertex_info() generate a vbuf vertex_info with real attribs Can't use the EMIT_ALL shortcut/optimization anymore because of passthrough mode. --- src/gallium/drivers/softpipe/sp_state_derived.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index eafbaed4b94..10483675eac 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -72,10 +72,22 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) */ struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; vinfo_vbuf->num_attribs = 0; +#if 0 + /* special-case to allow memcpy of whole vertex */ draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); /* size in dwords or floats */ vinfo_vbuf->size = 4 * draw_num_vs_outputs(softpipe->draw) + sizeof(struct vertex_header) / 4; +#else + /* for pass-through mode, we need a more explicit list of attribs */ + const uint num = draw_num_vs_outputs(softpipe->draw); + uint i; + draw_emit_vertex_attr(vinfo_vbuf, EMIT_HEADER, INTERP_NONE, 0); + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); + } + draw_compute_vertex_size(vinfo_vbuf); +#endif } /* -- cgit v1.2.3 From 78302c7ca30d27ef3d087deb4d1a22e83858ce4b Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 13 Mar 2008 18:19:47 -0600 Subject: gallium: need to all draw_flush() in softpipe_unmap_constant_buffers() Otherwise, we won't have our constants when we run the fragment shader. Fixes crash in glsl tests when SP_VBUF=1. --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 2049afda34f..5b5a0fe5735 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -62,6 +62,14 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; uint i; + + /* really need to flush all prims since the vert/frag shaders const buffers + * are going away now. + */ + draw_flush(sp->draw); + + draw_set_mapped_constant_buffer(sp->draw, NULL); + for (i = 0; i < 2; i++) { if (sp->constants[i].size) ws->buffer_unmap(ws, sp->constants[i].buffer); -- cgit v1.2.3 From e4cdce43cebe6a2b38f7ea5145474ca2b12c57bb Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Fri, 14 Mar 2008 08:42:45 +0000 Subject: gallium: fix some compiler warnings --- src/gallium/auxiliary/draw/draw_prim.c | 4 ++-- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 4 +++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index ff71ba9b73c..888fa536ea3 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -121,8 +121,6 @@ static void draw_prim_queue_flush( struct draw_context *draw ) static INLINE void fetch_and_store(struct draw_context *draw) { - unsigned i; - /* run vertex shader on vertex cache entries, four per invokation */ #if 0 { @@ -131,6 +129,8 @@ static INLINE void fetch_and_store(struct draw_context *draw) count * vinfo->size); } #elif 0 + unsigned i; + draw_update_vertex_fetch(draw); for (i = 0; i < draw->vs.queue_nr; i += 4) { struct vertex_header *dests[4]; diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 184aac16f41..db0913cb2bc 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -196,7 +196,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) struct prim_header prim; const void *vertex_buffer = cvbr->vertex_buffer; const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); - unsigned i, j; + unsigned i; prim.det = 0; prim.reset_line_stipple = 0; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 10483675eac..82cb31ece73 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -71,8 +71,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * simply emit the whole post-xform vertex as-is: */ struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - vinfo_vbuf->num_attribs = 0; #if 0 + vinfo_vbuf->num_attribs = 0; /* special-case to allow memcpy of whole vertex */ draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); /* size in dwords or floats */ @@ -82,6 +82,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* for pass-through mode, we need a more explicit list of attribs */ const uint num = draw_num_vs_outputs(softpipe->draw); uint i; + + vinfo_vbuf->num_attribs = 0; draw_emit_vertex_attr(vinfo_vbuf, EMIT_HEADER, INTERP_NONE, 0); for (i = 0; i < num; i++) { draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); -- cgit v1.2.3 From 027433176cddec58821d625fb2df45cfd95f1e33 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 14 Mar 2008 10:23:11 -0600 Subject: i915: check for NULL const buffer ptr --- src/gallium/drivers/i915simple/i915_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 503c0924003..1cec36e206d 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -516,7 +516,7 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, * stays the same. In that case we should only be updating the first * N constants, leaving any extras from shader translation alone. */ - { + if (buf) { void *mapped; if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, -- cgit v1.2.3 From 08e341e5dc2e15d8a6c4ba870c9d293295df9467 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 14 Mar 2008 15:54:02 -0600 Subject: gallium: remove DOS carriage returns --- src/gallium/drivers/softpipe/sp_state.h | 390 ++++++++++++++++---------------- 1 file changed, 195 insertions(+), 195 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 45d159143f7..0bb1095aece 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -1,195 +1,195 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef SP_STATE_H -#define SP_STATE_H - -#include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" - - -#define SP_NEW_VIEWPORT 0x1 -#define SP_NEW_RASTERIZER 0x2 -#define SP_NEW_FS 0x4 -#define SP_NEW_BLEND 0x8 -#define SP_NEW_CLIP 0x10 -#define SP_NEW_SCISSOR 0x20 -#define SP_NEW_STIPPLE 0x40 -#define SP_NEW_FRAMEBUFFER 0x80 -#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 -#define SP_NEW_CONSTANTS 0x200 -#define SP_NEW_SAMPLER 0x400 -#define SP_NEW_TEXTURE 0x800 -#define SP_NEW_VERTEX 0x1000 -#define SP_NEW_VS 0x2000 -#define SP_NEW_QUERY 0x4000 - - -struct tgsi_sampler; -struct tgsi_exec_machine; - - -/** Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ -struct sp_fragment_shader { - struct pipe_shader_state shader; - - struct tgsi_shader_info info; - - void (*prepare)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers); - - /* Run the shader - this interface will get cleaned up in the - * future: - */ - unsigned (*run)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct quad_header *quad ); - - - void (*delete)( struct sp_fragment_shader * ); -}; - -struct vertex_info; - -/** Subclass of pipe_shader_state */ -struct sp_vertex_shader { - struct pipe_shader_state shader; - struct draw_vertex_shader *draw_data; -}; - - - -void * -softpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void softpipe_bind_blend_state(struct pipe_context *, - void *); -void softpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -softpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); -void softpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -softpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -softpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void softpipe_bind_rasterizer_state(struct pipe_context *, void *); -void softpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void softpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void softpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void softpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void softpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - const struct pipe_constant_buffer *buf); - -void *softpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_fs_state(struct pipe_context *, void *); -void softpipe_delete_fs_state(struct pipe_context *, void *); -void *softpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_vs_state(struct pipe_context *, void *); -void softpipe_delete_vs_state(struct pipe_context *, void *); - -void softpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void softpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void softpipe_set_sampler_textures( struct pipe_context *, - unsigned num, - struct pipe_texture ** ); - -void softpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void softpipe_set_vertex_element(struct pipe_context *, - unsigned index, - const struct pipe_vertex_element *); - -void softpipe_set_vertex_buffer(struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer *); - - -void softpipe_update_derived( struct softpipe_context *softpipe ); - - -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); - - -void -softpipe_map_surfaces(struct softpipe_context *sp); - -void -softpipe_unmap_surfaces(struct softpipe_context *sp); - -void -softpipe_map_texture_surfaces(struct softpipe_context *sp); - -void -softpipe_unmap_texture_surfaces(struct softpipe_context *sp); - - -struct vertex_info * -softpipe_get_vertex_info(struct softpipe_context *softpipe); - -struct vertex_info * -softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); - - -#endif +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_STATE_H +#define SP_STATE_H + +#include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" + + +#define SP_NEW_VIEWPORT 0x1 +#define SP_NEW_RASTERIZER 0x2 +#define SP_NEW_FS 0x4 +#define SP_NEW_BLEND 0x8 +#define SP_NEW_CLIP 0x10 +#define SP_NEW_SCISSOR 0x20 +#define SP_NEW_STIPPLE 0x40 +#define SP_NEW_FRAMEBUFFER 0x80 +#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define SP_NEW_CONSTANTS 0x200 +#define SP_NEW_SAMPLER 0x400 +#define SP_NEW_TEXTURE 0x800 +#define SP_NEW_VERTEX 0x1000 +#define SP_NEW_VS 0x2000 +#define SP_NEW_QUERY 0x4000 + + +struct tgsi_sampler; +struct tgsi_exec_machine; + + +/** Subclass of pipe_shader_state (though it doesn't really need to be). + * + * This is starting to look an awful lot like a quad pipeline stage... + */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + + struct tgsi_shader_info info; + + void (*prepare)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler *samplers); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + unsigned (*run)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad ); + + + void (*delete)( struct sp_fragment_shader * ); +}; + +struct vertex_info; + +/** Subclass of pipe_shader_state */ +struct sp_vertex_shader { + struct pipe_shader_state shader; + struct draw_vertex_shader *draw_data; +}; + + + +void * +softpipe_create_blend_state(struct pipe_context *, + const struct pipe_blend_state *); +void softpipe_bind_blend_state(struct pipe_context *, + void *); +void softpipe_delete_blend_state(struct pipe_context *, + void *); + +void * +softpipe_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); +void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void softpipe_delete_sampler_state(struct pipe_context *, void *); + +void * +softpipe_create_depth_stencil_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); +void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); +void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); + +void * +softpipe_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void softpipe_bind_rasterizer_state(struct pipe_context *, void *); +void softpipe_delete_rasterizer_state(struct pipe_context *, void *); + +void softpipe_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + +void softpipe_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void softpipe_set_constant_buffer(struct pipe_context *, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void *softpipe_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_fs_state(struct pipe_context *, void *); +void softpipe_delete_fs_state(struct pipe_context *, void *); +void *softpipe_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_vs_state(struct pipe_context *, void *); +void softpipe_delete_vs_state(struct pipe_context *, void *); + +void softpipe_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void softpipe_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void softpipe_set_sampler_textures( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); + +void softpipe_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + +void softpipe_set_vertex_element(struct pipe_context *, + unsigned index, + const struct pipe_vertex_element *); + +void softpipe_set_vertex_buffer(struct pipe_context *, + unsigned index, + const struct pipe_vertex_buffer *); + + +void softpipe_update_derived( struct softpipe_context *softpipe ); + + +boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + +void +softpipe_map_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_surfaces(struct softpipe_context *sp); + +void +softpipe_map_texture_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_texture_surfaces(struct softpipe_context *sp); + + +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe); + +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); + + +#endif -- cgit v1.2.3 From 344356a0edee932604027386591c82f6666e607c Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 14 Mar 2008 16:00:15 -0600 Subject: gallium: remove DOS carriage returns --- src/gallium/drivers/softpipe/sp_context.c | 486 +++++++++++++++--------------- src/gallium/drivers/softpipe/sp_quad_fs.c | 418 ++++++++++++------------- src/gallium/drivers/softpipe/sp_texture.c | 404 ++++++++++++------------- 3 files changed, 654 insertions(+), 654 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 316ae552b8e..16fb06f176f 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -1,243 +1,243 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_util.h" -#include "sp_clear.h" -#include "sp_context.h" -#include "sp_flush.h" -#include "sp_prim_setup.h" -#include "sp_prim_vbuf.h" -#include "sp_state.h" -#include "sp_surface.h" -#include "sp_tile_cache.h" -#include "sp_texture.h" -#include "sp_winsys.h" -#include "sp_query.h" - - - -/** - * Map any drawing surfaces which aren't already mapped - */ -void -softpipe_map_surfaces(struct softpipe_context *sp) -{ - unsigned i; - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); - } - - sp_tile_cache_map_surfaces(sp->zsbuf_cache); -} - - -/** - * Unmap any mapped drawing surfaces - */ -void -softpipe_unmap_surfaces(struct softpipe_context *sp) -{ - uint i; - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) - sp_flush_tile_cache(sp, sp->cbuf_cache[i]); - sp_flush_tile_cache(sp, sp->zsbuf_cache); - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); - } - sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); -} - - -static void softpipe_destroy( struct pipe_context *pipe ) -{ - struct softpipe_context *softpipe = softpipe_context( pipe ); - struct pipe_winsys *ws = pipe->winsys; - uint i; - - draw_destroy( softpipe->draw ); - - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); - softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.colormask->destroy( softpipe->quad.colormask ); - softpipe->quad.output->destroy( softpipe->quad.output ); - - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - sp_destroy_tile_cache(softpipe->cbuf_cache[i]); - sp_destroy_tile_cache(softpipe->zsbuf_cache); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - sp_destroy_tile_cache(softpipe->tex_cache[i]); - - for (i = 0; i < Elements(softpipe->constants); i++) { - if (softpipe->constants[i].buffer) { - pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); - } - } - - FREE( softpipe ); -} - - -struct pipe_context * -softpipe_create( struct pipe_screen *screen, - struct pipe_winsys *pipe_winsys, - struct softpipe_winsys *softpipe_winsys ) -{ - struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); - uint i; - -#if defined(__i386__) || defined(__386__) - softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; -#else - softpipe->use_sse = FALSE; -#endif - - softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; - - softpipe->pipe.winsys = pipe_winsys; - softpipe->pipe.screen = screen; - softpipe->pipe.destroy = softpipe_destroy; - - /* state setters */ - softpipe->pipe.create_blend_state = softpipe_create_blend_state; - softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; - softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; - - softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; - softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; - - softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; - softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; - softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; - - softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; - softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; - softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; - - softpipe->pipe.create_fs_state = softpipe_create_fs_state; - softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; - softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; - - softpipe->pipe.create_vs_state = softpipe_create_vs_state; - softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; - softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; - - softpipe->pipe.set_blend_color = softpipe_set_blend_color; - softpipe->pipe.set_clip_state = softpipe_set_clip_state; - softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; - softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; - softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; - softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; - softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - - softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; - softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; - - softpipe->pipe.draw_arrays = softpipe_draw_arrays; - softpipe->pipe.draw_elements = softpipe_draw_elements; - - softpipe->pipe.clear = softpipe_clear; - softpipe->pipe.flush = softpipe_flush; - - softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( softpipe ); - - /* - * Alloc caches for accessing drawing surfaces and textures. - * Must be before quad stage setup! - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - softpipe->cbuf_cache[i] = sp_create_tile_cache(); - softpipe->zsbuf_cache = sp_create_tile_cache(); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tile_cache(); - - - /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); - softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad.output = sp_quad_output_stage(softpipe); - - softpipe->winsys = softpipe_winsys; - - /* - * Create drawing context and plug our rendering stage into it. - */ - softpipe->draw = draw_create(); - assert(softpipe->draw); - softpipe->setup = sp_draw_render_stage(softpipe); - - if (GETENV( "SP_VBUF" ) != NULL) { - sp_init_vbuf(softpipe); - } - else { - draw_set_rasterize_stage(softpipe->draw, softpipe->setup); - } - - /* plug in AA line/point stages */ - draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); - draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); - -#if USE_DRAW_STAGE_PSTIPPLE - /* Do polygon stipple w/ texture map + frag prog? */ - draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); -#endif - - sp_init_surface_functions(softpipe); - - return &softpipe->pipe; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_flush.h" +#include "sp_prim_setup.h" +#include "sp_prim_vbuf.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_query.h" + + + +/** + * Map any drawing surfaces which aren't already mapped + */ +void +softpipe_map_surfaces(struct softpipe_context *sp) +{ + unsigned i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); + } + + sp_tile_cache_map_surfaces(sp->zsbuf_cache); +} + + +/** + * Unmap any mapped drawing surfaces + */ +void +softpipe_unmap_surfaces(struct softpipe_context *sp) +{ + uint i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); + } + sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); +} + + +static void softpipe_destroy( struct pipe_context *pipe ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct pipe_winsys *ws = pipe->winsys; + uint i; + + draw_destroy( softpipe->draw ); + + softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); + softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); + softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); + softpipe->quad.coverage->destroy( softpipe->quad.coverage ); + softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.colormask->destroy( softpipe->quad.colormask ); + softpipe->quad.output->destroy( softpipe->quad.output ); + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + sp_destroy_tile_cache(softpipe->zsbuf_cache); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + sp_destroy_tile_cache(softpipe->tex_cache[i]); + + for (i = 0; i < Elements(softpipe->constants); i++) { + if (softpipe->constants[i].buffer) { + pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + } + } + + FREE( softpipe ); +} + + +struct pipe_context * +softpipe_create( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct softpipe_winsys *softpipe_winsys ) +{ + struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); + uint i; + +#if defined(__i386__) || defined(__386__) + softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; +#else + softpipe->use_sse = FALSE; +#endif + + softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; + + softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.screen = screen; + softpipe->pipe.destroy = softpipe_destroy; + + /* state setters */ + softpipe->pipe.create_blend_state = softpipe_create_blend_state; + softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; + softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; + + softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; + softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; + + softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; + softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; + softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; + + softpipe->pipe.create_fs_state = softpipe_create_fs_state; + softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; + softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; + + softpipe->pipe.create_vs_state = softpipe_create_vs_state; + softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; + softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + + softpipe->pipe.set_blend_color = softpipe_set_blend_color; + softpipe->pipe.set_clip_state = softpipe_set_clip_state; + softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; + softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; + softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; + softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; + + softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; + softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; + + softpipe->pipe.draw_arrays = softpipe_draw_arrays; + softpipe->pipe.draw_elements = softpipe_draw_elements; + + softpipe->pipe.clear = softpipe_clear; + softpipe->pipe.flush = softpipe_flush; + + softpipe_init_query_funcs( softpipe ); + softpipe_init_texture_funcs( softpipe ); + + /* + * Alloc caches for accessing drawing surfaces and textures. + * Must be before quad stage setup! + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + softpipe->cbuf_cache[i] = sp_create_tile_cache(); + softpipe->zsbuf_cache = sp_create_tile_cache(); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + softpipe->tex_cache[i] = sp_create_tile_cache(); + + + /* setup quad rendering stages */ + softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); + softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad.output = sp_quad_output_stage(softpipe); + + softpipe->winsys = softpipe_winsys; + + /* + * Create drawing context and plug our rendering stage into it. + */ + softpipe->draw = draw_create(); + assert(softpipe->draw); + softpipe->setup = sp_draw_render_stage(softpipe); + + if (GETENV( "SP_VBUF" ) != NULL) { + sp_init_vbuf(softpipe); + } + else { + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + } + + /* plug in AA line/point stages */ + draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + + sp_init_surface_functions(softpipe); + + return &softpipe->pipe; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 9198198db3d..861285101fd 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -1,209 +1,209 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_headers.h" -#include "sp_quad.h" -#include "sp_texture.h" -#include "sp_tex_sample.h" - - -struct quad_shade_stage -{ - struct quad_stage stage; - struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; - struct tgsi_exec_machine machine; - struct tgsi_exec_vector *inputs, *outputs; - int colorOutSlot, depthOutSlot; -}; - - -/** cast wrapper */ -static INLINE struct quad_shade_stage * -quad_shade_stage(struct quad_stage *qs) -{ - return (struct quad_shade_stage *) qs; -} - - - -/** - * Execute fragment shader for the four fragments in the quad. - */ -static void -shade_quad( - struct quad_stage *qs, - struct quad_header *quad ) -{ - struct quad_shade_stage *qss = quad_shade_stage( qs ); - struct softpipe_context *softpipe = qs->softpipe; - struct tgsi_exec_machine *machine = &qss->machine; - - /* Consts do not require 16 byte alignment. */ - machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; - - machine->InterpCoefs = quad->coef; - - /* run shader */ - quad->mask &= softpipe->fs->run( softpipe->fs, - &qss->machine, - quad ); - - /* store result color */ - if (qss->colorOutSlot >= 0) { - /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - memcpy( - quad->outputs.color, - &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], - sizeof( quad->outputs.color ) ); - } - - /* - * XXX the following code for updating quad->outputs.depth - * isn't really needed if we did early z testing. - */ - - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; - } - } - else { - /* copy input Z (which was interpolated by the executor) to output Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; - /* XXX not sure the above line is always correct. The following - * might be better: - quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; - */ - } - } - - /* shader may cull fragments */ - if( quad->mask ) { - qs->next->run( qs->next, quad ); - } -} - -/** - * Per-primitive (or per-begin?) setup - */ -static void shade_begin(struct quad_stage *qs) -{ - struct quad_shade_stage *qss = quad_shade_stage(qs); - struct softpipe_context *softpipe = qs->softpipe; - unsigned i; - unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers); - - /* set TGSI sampler state that varies */ - for (i = 0; i < num; i++) { - qss->samplers[i].state = softpipe->sampler[i]; - qss->samplers[i].texture = softpipe->texture[i]; - } - - /* find output slots for depth, color */ - qss->colorOutSlot = -1; - qss->depthOutSlot = -1; - for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { - switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - qss->depthOutSlot = i; - break; - case TGSI_SEMANTIC_COLOR: - qss->colorOutSlot = i; - break; - } - } - - softpipe->fs->prepare( softpipe->fs, - &qss->machine, - qss->samplers ); - - qs->next->begin(qs->next); -} - - -static void shade_destroy(struct quad_stage *qs) -{ - struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; - - tgsi_exec_machine_free_data(&qss->machine); - FREE( qss->inputs ); - FREE( qss->outputs ); - FREE( qs ); -} - - -struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) -{ - struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); - uint i; - - /* allocate storage for program inputs/outputs, aligned to 16 bytes */ - qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); - qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); - qss->machine.Inputs = align16(qss->inputs); - qss->machine.Outputs = align16(qss->outputs); - - qss->stage.softpipe = softpipe; - qss->stage.begin = shade_begin; - qss->stage.run = shade_quad; - qss->stage.destroy = shade_destroy; - - /* set TGSI sampler state that's constant */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - assert(softpipe->tex_cache[i]); - qss->samplers[i].get_samples = sp_get_samples; - qss->samplers[i].pipe = &softpipe->pipe; - qss->samplers[i].cache = softpipe->tex_cache[i]; - } - - tgsi_exec_machine_init( &qss->machine ); - - return &qss->stage; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" + + +struct quad_shade_stage +{ + struct quad_stage stage; + struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; + struct tgsi_exec_machine machine; + struct tgsi_exec_vector *inputs, *outputs; + int colorOutSlot, depthOutSlot; +}; + + +/** cast wrapper */ +static INLINE struct quad_shade_stage * +quad_shade_stage(struct quad_stage *qs) +{ + return (struct quad_shade_stage *) qs; +} + + + +/** + * Execute fragment shader for the four fragments in the quad. + */ +static void +shade_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = &qss->machine; + + /* Consts do not require 16 byte alignment. */ + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + + machine->InterpCoefs = quad->coef; + + /* run shader */ + quad->mask &= softpipe->fs->run( softpipe->fs, + &qss->machine, + quad ); + + /* store result color */ + if (qss->colorOutSlot >= 0) { + /* XXX need to handle multiple color outputs someday */ + assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + memcpy( + quad->outputs.color, + &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], + sizeof( quad->outputs.color ) ); + } + + /* + * XXX the following code for updating quad->outputs.depth + * isn't really needed if we did early z testing. + */ + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; + /* XXX not sure the above line is always correct. The following + * might be better: + quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; + */ + } + } + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} + +/** + * Per-primitive (or per-begin?) setup + */ +static void shade_begin(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + unsigned i; + unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers); + + /* set TGSI sampler state that varies */ + for (i = 0; i < num; i++) { + qss->samplers[i].state = softpipe->sampler[i]; + qss->samplers[i].texture = softpipe->texture[i]; + } + + /* find output slots for depth, color */ + qss->colorOutSlot = -1; + qss->depthOutSlot = -1; + for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { + switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + qss->depthOutSlot = i; + break; + case TGSI_SEMANTIC_COLOR: + qss->colorOutSlot = i; + break; + } + } + + softpipe->fs->prepare( softpipe->fs, + &qss->machine, + qss->samplers ); + + qs->next->begin(qs->next); +} + + +static void shade_destroy(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; + + tgsi_exec_machine_free_data(&qss->machine); + FREE( qss->inputs ); + FREE( qss->outputs ); + FREE( qs ); +} + + +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) +{ + struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); + uint i; + + /* allocate storage for program inputs/outputs, aligned to 16 bytes */ + qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); + qss->machine.Inputs = align16(qss->inputs); + qss->machine.Outputs = align16(qss->outputs); + + qss->stage.softpipe = softpipe; + qss->stage.begin = shade_begin; + qss->stage.run = shade_quad; + qss->stage.destroy = shade_destroy; + + /* set TGSI sampler state that's constant */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + assert(softpipe->tex_cache[i]); + qss->samplers[i].get_samples = sp_get_samples; + qss->samplers[i].pipe = &softpipe->pipe; + qss->samplers[i].cache = softpipe->tex_cache[i]; + } + + tgsi_exec_machine_init( &qss->machine ); + + return &qss->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index bbf2d80308e..64bf353aa4a 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -1,202 +1,202 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Michel Dänzer <michel@tungstengraphics.com> - */ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_util.h" -#include "pipe/p_winsys.h" - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_texture.h" -#include "sp_tile_cache.h" - - -/* Simple, maximally packed layout. - */ - -static unsigned minify( unsigned d ) -{ - return MAX2(1, d>>1); -} - - -static void -softpipe_texture_layout(struct softpipe_texture * spt) -{ - struct pipe_texture *pt = &spt->base; - unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - - spt->buffer_size = 0; - - for (level = 0; level <= pt->last_level; level++) { - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; - - spt->level_offset[level] = spt->buffer_size; - - spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - width * pt->cpp; - - width = minify(width); - height = minify(height); - depth = minify(depth); - } -} - - -static struct pipe_texture * -softpipe_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) -{ - struct pipe_winsys *ws = screen->winsys; - struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); - if (!spt) - return NULL; - - spt->base = *templat; - spt->base.refcount = 1; - spt->base.screen = screen; - - softpipe_texture_layout(spt); - - spt->buffer = ws->buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); - if (!spt->buffer) { - FREE(spt); - return NULL; - } - - assert(spt->base.refcount == 1); - - return &spt->base; -} - - -static void -softpipe_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) -{ - if (!*pt) - return; - - /* - DBG("%s %p refcount will be %d\n", - __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); - */ - if (--(*pt)->refcount <= 0) { - struct softpipe_texture *spt = softpipe_texture(*pt); - - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); - */ - - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); - - FREE(spt); - } - *pt = NULL; -} - - -static struct pipe_surface * -softpipe_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = screen->winsys; - struct softpipe_texture *spt = softpipe_texture(pt); - struct pipe_surface *ps; - - assert(level <= pt->last_level); - - ps = ws->surface_alloc(ws); - if (ps) { - assert(ps->refcount); - assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = ps->width; - ps->offset = spt->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { - ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - (pt->compressed ? ps->height/4 : ps->height) * - ps->width * ps->cpp; - } - else { - assert(face == 0); - assert(zslice == 0); - } - } - return ps; -} - - -static void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); - uint unit; - for (unit = 0; unit < softpipe->num_textures; unit++) { - if (softpipe->texture[unit] == texture) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); - } - } -} - - -void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ) -{ - softpipe->pipe.texture_update = softpipe_texture_update; -} - - -void -softpipe_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->texture_create = softpipe_texture_create_screen; - screen->texture_release = softpipe_texture_release_screen; - screen->get_tex_surface = softpipe_get_tex_surface_screen; -} +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +softpipe_texture_layout(struct softpipe_texture * spt) +{ + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + spt->buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + + spt->level_offset[level] = spt->buffer_size; + + spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +static struct pipe_texture * +softpipe_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *templat; + spt->base.refcount = 1; + spt->base.screen = screen; + + softpipe_texture_layout(spt); + + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + if (!spt->buffer) { + FREE(spt); + return NULL; + } + + assert(spt->base.refcount == 1); + + return &spt->base; +} + + +static void +softpipe_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct softpipe_texture *spt = softpipe_texture(*pt); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + */ + + pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + + FREE(spt); + } + *pt = NULL; +} + + +static struct pipe_surface * +softpipe_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct softpipe_texture *spt = softpipe_texture(pt); + struct pipe_surface *ps; + + assert(level <= pt->last_level); + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = spt->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + (pt->compressed ? ps->height/4 : ps->height) * + ps->width * ps->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + +static void +softpipe_texture_update(struct pipe_context *pipe, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint unit; + for (unit = 0; unit < softpipe->num_textures; unit++) { + if (softpipe->texture[unit] == texture) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); + } + } +} + + +void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ) +{ + softpipe->pipe.texture_update = softpipe_texture_update; +} + + +void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = softpipe_texture_create_screen; + screen->texture_release = softpipe_texture_release_screen; + screen->get_tex_surface = softpipe_get_tex_surface_screen; +} -- cgit v1.2.3 From 9a3320e0791a4a03f5f4b7a6f9c3b0d9d78655b3 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Sat, 15 Mar 2008 04:42:48 +0100 Subject: nv30: only 2 RTs. --- src/gallium/drivers/nv30/nv30_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 951a32bc81e..722626db1fa 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -546,7 +546,7 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, int i, colour_format = 0, zeta_format = 0; rt_enable = 0; - for (i = 0; i < 4; i++) { + for (i = 0; i < 2; i++) { if (!fb->cbufs[i]) continue; -- cgit v1.2.3 From 509044609d5121b2a09d64bd24d7aa37e3744a77 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Sat, 15 Mar 2008 04:43:12 +0100 Subject: nouveau: latest header. --- src/gallium/drivers/nouveau/nouveau_class.h | 44 ++++++++++++++++++----------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 1b015507903..7392490df01 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -1750,22 +1750,34 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV10TCL_DMA_VTXBUF0 0x0000018c #define NV10TCL_DMA_IN_MEMORY2 0x00000194 #define NV10TCL_DMA_IN_MEMORY3 0x00000198 -#define NV10TCL_VIEWPORT_HORIZ 0x00000200 -#define NV10TCL_VIEWPORT_HORIZ_X_SHIFT 0 -#define NV10TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff -#define NV10TCL_VIEWPORT_HORIZ_W_SHIFT 16 -#define NV10TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 -#define NV10TCL_VIEWPORT_VERT 0x00000204 -#define NV10TCL_VIEWPORT_VERT_Y_SHIFT 0 -#define NV10TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff -#define NV10TCL_VIEWPORT_VERT_H_SHIFT 16 -#define NV10TCL_VIEWPORT_VERT_H_MASK 0xffff0000 -#define NV10TCL_BUFFER_FORMAT 0x00000208 -#define NV10TCL_BUFFER_PITCH 0x0000020c -#define NV10TCL_BUFFER_PITCH_COLOR_PITCH_SHIFT 0 -#define NV10TCL_BUFFER_PITCH_COLOR_PITCH_MASK 0x0000ffff -#define NV10TCL_BUFFER_PITCH_ZETA_PITCH_SHIFT 16 -#define NV10TCL_BUFFER_PITCH_ZETA_PITCH_MASK 0xffff0000 +#define NV10TCL_RT_HORIZ 0x00000200 +#define NV10TCL_RT_HORIZ_X_SHIFT 0 +#define NV10TCL_RT_HORIZ_X_MASK 0x0000ffff +#define NV10TCL_RT_HORIZ_W_SHIFT 16 +#define NV10TCL_RT_HORIZ_W_MASK 0xffff0000 +#define NV10TCL_RT_VERT 0x00000204 +#define NV10TCL_RT_VERT_Y_SHIFT 0 +#define NV10TCL_RT_VERT_Y_MASK 0x0000ffff +#define NV10TCL_RT_VERT_H_SHIFT 16 +#define NV10TCL_RT_VERT_H_MASK 0xffff0000 +#define NV10TCL_RT_FORMAT 0x00000208 +#define NV10TCL_RT_FORMAT_TYPE_SHIFT 8 +#define NV10TCL_RT_FORMAT_TYPE_MASK 0x00000f00 +#define NV10TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV10TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV10TCL_RT_FORMAT_COLOR_SHIFT 0 +#define NV10TCL_RT_FORMAT_COLOR_MASK 0x0000001f +#define NV10TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV10TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV10TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV10TCL_RT_FORMAT_COLOR_B8 0x00000009 +#define NV10TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV10TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV10TCL_RT_PITCH 0x0000020c +#define NV10TCL_RT_PITCH_COLOR_PITCH_SHIFT 0 +#define NV10TCL_RT_PITCH_COLOR_PITCH_MASK 0x0000ffff +#define NV10TCL_RT_PITCH_ZETA_PITCH_SHIFT 16 +#define NV10TCL_RT_PITCH_ZETA_PITCH_MASK 0xffff0000 #define NV10TCL_COLOR_OFFSET 0x00000210 #define NV10TCL_ZETA_OFFSET 0x00000214 #define NV10TCL_TX_OFFSET(x) (0x00000218+((x)*4)) -- cgit v1.2.3 From d493203045b214770473f8afeaa610542fe42c2a Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Sat, 15 Mar 2008 05:37:57 +0100 Subject: nv10. --- configs/default | 2 +- configs/linux-dri | 2 +- src/gallium/drivers/nouveau/nouveau_winsys.h | 7 + src/gallium/drivers/nv10/Makefile | 28 + src/gallium/drivers/nv10/nv10_clear.c | 12 + src/gallium/drivers/nv10/nv10_context.c | 325 +++++++++++ src/gallium/drivers/nv10/nv10_context.h | 127 +++++ src/gallium/drivers/nv10/nv10_fragprog.c | 22 + src/gallium/drivers/nv10/nv10_fragtex.c | 145 +++++ src/gallium/drivers/nv10/nv10_miptree.c | 134 +++++ src/gallium/drivers/nv10/nv10_prim_vbuf.c | 224 ++++++++ src/gallium/drivers/nv10/nv10_screen.c | 150 +++++ src/gallium/drivers/nv10/nv10_screen.h | 19 + src/gallium/drivers/nv10/nv10_state.c | 697 +++++++++++++++++++++++ src/gallium/drivers/nv10/nv10_state.h | 135 +++++ src/gallium/drivers/nv10/nv10_state_emit.c | 69 +++ src/gallium/drivers/nv10/nv10_surface.c | 65 +++ src/gallium/drivers/nv10/nv10_vbo.c | 72 +++ src/gallium/winsys/dri/nouveau/Makefile | 1 + src/gallium/winsys/dri/nouveau/nouveau_context.c | 3 + src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 5 + 21 files changed, 2242 insertions(+), 2 deletions(-) create mode 100644 src/gallium/drivers/nv10/Makefile create mode 100644 src/gallium/drivers/nv10/nv10_clear.c create mode 100644 src/gallium/drivers/nv10/nv10_context.c create mode 100644 src/gallium/drivers/nv10/nv10_context.h create mode 100644 src/gallium/drivers/nv10/nv10_fragprog.c create mode 100644 src/gallium/drivers/nv10/nv10_fragtex.c create mode 100644 src/gallium/drivers/nv10/nv10_miptree.c create mode 100644 src/gallium/drivers/nv10/nv10_prim_vbuf.c create mode 100644 src/gallium/drivers/nv10/nv10_screen.c create mode 100644 src/gallium/drivers/nv10/nv10_screen.h create mode 100644 src/gallium/drivers/nv10/nv10_state.c create mode 100644 src/gallium/drivers/nv10/nv10_state.h create mode 100644 src/gallium/drivers/nv10/nv10_state_emit.c create mode 100644 src/gallium/drivers/nv10/nv10_surface.c create mode 100644 src/gallium/drivers/nv10/nv10_vbo.c (limited to 'src/gallium/drivers') diff --git a/configs/default b/configs/default index 49d3737ec7c..d010721a103 100644 --- a/configs/default +++ b/configs/default @@ -70,7 +70,7 @@ PROGRAM_DIRS = demos redbook samples glsl xdemos # Gallium directories and GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi rtasm util sct GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) -GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple nv30 nv40 nv50 failover +GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple nv10 nv30 nv40 nv50 failover GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) GALLIUM_WINSYS_DIRS = xlib diff --git a/configs/linux-dri b/configs/linux-dri index 67e60cbd4c5..bf7881937e4 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -65,4 +65,4 @@ GALLIUM_WINSYS_DIRS = dri # gamma are missing because they have not been converted to use the new # interface. -DRI_DIRS = intel +DRI_DIRS = nouveau diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 44c8bb919bb..e4b20478a0c 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -49,6 +49,13 @@ struct nouveau_winsys { unsigned, unsigned, unsigned, unsigned, unsigned); }; +extern struct pipe_screen * +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, + unsigned chipset); + +extern struct pipe_context * +nv10_create(struct pipe_screen *, unsigned pctx_id); + extern struct pipe_screen * nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, unsigned chipset); diff --git a/src/gallium/drivers/nv10/Makefile b/src/gallium/drivers/nv10/Makefile new file mode 100644 index 00000000000..4ba7ce586d6 --- /dev/null +++ b/src/gallium/drivers/nv10/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv10 + +DRIVER_SOURCES = \ + nv10_clear.c \ + nv10_context.c \ + nv10_fragprog.c \ + nv10_fragtex.c \ + nv10_miptree.c \ + nv10_prim_vbuf.c \ + nv10_screen.c \ + nv10_state.c \ + nv10_state_emit.c \ + nv10_surface.c \ + nv10_vbo.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv10/nv10_clear.c b/src/gallium/drivers/nv10/nv10_clear.c new file mode 100644 index 00000000000..be7e09cf4b0 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv10_context.h" + +void +nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c new file mode 100644 index 00000000000..2599acf2861 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -0,0 +1,325 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static void +nv10_flush(struct pipe_context *pipe, unsigned flags) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nouveau_winsys *nvws = nv10->nvws; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(celsius, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(celsius, 0x1fd8, 1); + OUT_RING (1); + } + + if (flags & PIPE_FLUSH_WAIT) { + nvws->notifier_reset(nv10->sync, 0); + BEGIN_RING(celsius, 0x104, 1); + OUT_RING (0); + BEGIN_RING(celsius, 0x100, 1); + OUT_RING (0); + } + + FIRE_RING(); + + if (flags & PIPE_FLUSH_WAIT) + nvws->notifier_wait(nv10->sync, 0, 0, 2000); +} + +static void +nv10_destroy(struct pipe_context *pipe) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nouveau_winsys *nvws = nv10->nvws; + + if (nv10->draw) + draw_destroy(nv10->draw); + + nvws->res_free(&nv10->vertprog.exec_heap); + nvws->res_free(&nv10->vertprog.data_heap); + + nvws->notifier_free(&nv10->sync); + + nvws->grobj_free(&nv10->celsius); + + free(nv10); +} + +static boolean +nv10_init_hwctx(struct nv10_context *nv10, int celsius_class) +{ + struct nouveau_winsys *nvws = nv10->nvws; + int ret; + int i; + + ret = nvws->grobj_alloc(nvws, celsius_class, &nv10->celsius); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); + OUT_RING (nv10->sync->handle); + BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); + BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); + OUT_RING (0); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING ((0x7ff<<16)|0x800); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING ((0x7ff<<16)|0x800); + + for (i=1;i<8;i++) { + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (0); + } + + BEGIN_RING(celsius, 0x290, 1); + OUT_RING ((0x10<<16)|1); + BEGIN_RING(celsius, 0x3f4, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + if (celsius_class != NV10TCL) { + /* For nv11, nv17 */ + BEGIN_RING(celsius, 0x120, 3); + OUT_RING (0); + OUT_RING (1); + OUT_RING (2); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + } + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + /* Set state */ + BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (0x207); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_RC_OUT_ALPHA(0), 6); + OUT_RING (0x00000c00); + OUT_RING (0); + OUT_RING (0x00000c00); + OUT_RING (0x18000000); + OUT_RING (0x300c0000); + OUT_RING (0x00001c80); + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); + OUT_RING (1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); + OUT_RING (1); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0x8006); + BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); + OUT_RING (0xff); + OUT_RING (0x207); + OUT_RING (0); + OUT_RING (0xff); + OUT_RING (0x1e00); + OUT_RING (0x1e00); + OUT_RING (0x1e00); + OUT_RING (0x1d01); + BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (0x201); + BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (8); + BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); + OUT_RING (8); + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (0x1b02); + OUT_RING (0x1b02); + BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (0x405); + OUT_RING (0x901); + BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_CLIP_PLANE_ENABLE(0), 8); + for (i=0;i<8;i++) { + OUT_RING (0); + } + BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RING (0x3fc00000); /* -1.50 */ + OUT_RING (0xbdb8aa0a); /* -0.09 */ + OUT_RING (0); /* 0.00 */ + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); + OUT_RING (0x802); + OUT_RING (2); + /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when + * using texturing, except when using the texture matrix + */ + BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); + OUT_RING (6); + BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (0x01010101); + + /* Set vertex component */ + BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); + OUT_RINGf (0.0); + BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (1); + + + FIRE_RING (); + return TRUE; +} + +struct pipe_context * +nv10_create(struct pipe_screen *screen, unsigned pctx_id) +{ + struct pipe_winsys *pipe_winsys = screen->winsys; + struct nouveau_winsys *nvws = nv10_screen(screen)->nvws; + unsigned chipset = nv10_screen(screen)->chipset; + struct nv10_context *nv10; + int celsius_class = 0, ret; + + if (chipset>=0x20) + celsius_class=NV11TCL; + else if (chipset>=0x17) + celsius_class=NV17TCL; + else if (chipset>=0x11) + celsius_class=NV11TCL; + else + celsius_class=NV10TCL; + + nv10 = CALLOC_STRUCT(nv10_context); + if (!nv10) + return NULL; + nv10->chipset = chipset; + nv10->nvws = nvws; + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &nv10->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv10_destroy(&nv10->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&nv10->vertprog.exec_heap, 0, 512) || + nvws->res_init(&nv10->vertprog.data_heap, 0, 256)) { + nv10_destroy(&nv10->pipe); + return NULL; + } + + /* Static celsius initialisation */ + if (!nv10_init_hwctx(nv10, celsius_class)) { + nv10_destroy(&nv10->pipe); + return NULL; + } + + /* Pipe context setup */ + nv10->pipe.winsys = pipe_winsys; + + nv10->pipe.destroy = nv10_destroy; + + nv10->pipe.draw_arrays = nv10_draw_arrays; + nv10->pipe.draw_elements = nv10_draw_elements; + nv10->pipe.clear = nv10_clear; + + nv10->pipe.flush = nv10_flush; + + nv10_init_surface_functions(nv10); + nv10_init_state_functions(nv10); + + nv10->draw = draw_create(); + assert(nv10->draw); + draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10)); + + return &nv10->pipe; +} + diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h new file mode 100644 index 00000000000..8269d6121f0 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -0,0 +1,127 @@ +#ifndef __NV10_CONTEXT_H__ +#define __NV10_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv10_context *ctx = nv10 +#include "nouveau/nouveau_push.h" + +#include "nv10_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV10_NEW_VERTPROG (1 << 1) +#define NV10_NEW_FRAGPROG (1 << 2) +#define NV10_NEW_ARRAYS (1 << 3) +#define NV10_NEW_VBO (1 << 4) + +struct nv10_context { + struct pipe_context pipe; + struct nouveau_winsys *nvws; + + struct draw_context *draw; + + int chipset; + struct nouveau_grobj *celsius; + struct nouveau_notifier *sync; + + uint32_t dirty; + + struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv10_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_buffer *rt[4]; + struct pipe_buffer *zeta; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[16]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + + struct vertex_info vertex_info; + struct { + + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv10_vertex_program *active; + + struct nv10_vertex_program *current; + struct pipe_buffer *constant_buf; + } vertprog; + + struct { + struct nv10_fragment_program *active; + + struct nv10_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; +}; + +static INLINE struct nv10_context * +nv10_context(struct pipe_context *pipe) +{ + return (struct nv10_context *)pipe; +} + +extern void nv10_init_state_functions(struct nv10_context *nv10); +extern void nv10_init_surface_functions(struct nv10_context *nv10); +extern void nv10_init_miptree_functions(struct pipe_screen *screen); + +/* nv10_clear.c */ +extern void nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv10_draw.c */ +extern struct draw_stage *nv10_draw_render_stage(struct nv10_context *nv10); + +/* nv10_fragprog.c */ +extern void nv10_fragprog_bind(struct nv10_context *, + struct nv10_fragment_program *); +extern void nv10_fragprog_destroy(struct nv10_context *, + struct nv10_fragment_program *); + +/* nv10_fragtex.c */ +extern void nv10_fragtex_bind(struct nv10_context *); + +/* nv10_prim_vbuf.c */ +struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ); + +/* nv10_state.c and friends */ +extern void nv10_emit_hw_state(struct nv10_context *nv10); +extern void nv10_state_tex_update(struct nv10_context *nv10); + +/* nv10_vbo.c */ +extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv10_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c new file mode 100644 index 00000000000..2a63c8a704e --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_fragprog.c @@ -0,0 +1,22 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" + +#include "nv10_context.h" + +void +nv10_fragprog_bind(struct nv10_context *nv10, struct nv10_fragment_program *fp) +{ +} + +void +nv10_fragprog_destroy(struct nv10_context *nv10, + struct nv10_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c new file mode 100644 index 00000000000..a4bf1082845 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -0,0 +1,145 @@ +#include "nv10_context.h" + +static INLINE int log2i(int i) +{ + int r = 0; + + if (i & 0xffff0000) { + i >>= 16; + r += 16; + } + if (i & 0x0000ff00) { + i >>= 8; + r += 8; + } + if (i & 0x000000f0) { + i >>= 4; + r += 4; + } + if (i & 0x0000000c) { + i >>= 2; + r += 2; + } + if (i & 0x00000002) { + r += 1; + } + return r; +} + +#define _(m,tf) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV10TCL_TX_FORMAT_FORMAT_##tf, \ +} + +struct nv10_texture_format { + boolean defined; + uint pipe; + int format; +}; + +static struct nv10_texture_format +nv10_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8), + _(A1R5G5B5_UNORM, A1R5G5B5), + _(A4R4G4B4_UNORM, A4R4G4B4), + _(U_L8 , L8 ), + _(U_A8 , A8 ), + _(U_A8_L8 , A8L8 ), +// _(RGB_DXT1 , DXT1, ), +// _(RGBA_DXT1 , DXT1, ), +// _(RGBA_DXT3 , DXT3, ), +// _(RGBA_DXT5 , DXT5, ), + {}, +}; + +static struct nv10_texture_format * +nv10_fragtex_format(uint pipe_format) +{ + struct nv10_texture_format *tf = nv10_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + return NULL; +} + + +static void +nv10_fragtex_build(struct nv10_context *nv10, int unit) +{ + struct nv10_sampler_state *ps = nv10->tex_sampler[unit]; + struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; + struct pipe_texture *pt = &nv10mt->base; + struct nv10_texture_format *tf; + uint32_t txf, txs, txp; + + tf = nv10_fragtex_format(pt->format); + if (!tf || !tf->defined) { + NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); + return; + } + + txf = tf->format << 8; + txf |= (pt->last_level + 1) << 16; + txf |= log2i(pt->width[0]) << 20; + txf |= log2i(pt->height[0]) << 24; + txf |= log2i(pt->depth[0]) << 28; + txf |= 8; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV10TCL_TX_FORMAT_CUBE_MAP; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= (2<<4); + break; + case PIPE_TEXTURE_1D: + txf |= (1<<4); + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (ps->wrap); + OUT_RING (0x40000000); /* enable */ + OUT_RING (txs); + OUT_RING (ps->filt | 0x2000 /* magic */); + OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING (ps->bcol); +} + +void +nv10_fragtex_bind(struct nv10_context *nv10) +{ + struct nv10_fragment_program *fp = nv10->fragprog.active; + unsigned samplers, unit; + + samplers = nv10->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (0); + } + + samplers = nv10->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + nv10_fragtex_build(nv10, unit); + } + + nv10->fp_samplers = fp->samplers; +} + diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c new file mode 100644 index 00000000000..7b7f39b80c9 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -0,0 +1,134 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static void +nv10_miptree_layout(struct nv10_miptree *nv10mt) +{ + struct pipe_texture *pt = &nv10mt->base; + boolean swizzled = FALSE; + uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint offset = 0; + int nr_faces, l, f; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + + if (swizzled) + nv10mt->level[l].pitch = pt->width[l] * pt->cpp; + else + nv10mt->level[l].pitch = pt->width[0] * pt->cpp; + nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63; + + nv10mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv10mt->level[l].image_offset[f] = offset; + offset += nv10mt->level[l].pitch * pt->height[l]; + } + } + + nv10mt->total_size = offset; +} + +static struct pipe_texture * +nv10_miptree_create(struct pipe_screen *screen, struct pipe_texture *pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *mt; + + mt = MALLOC(sizeof(struct nv10_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = screen; + + nv10_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->total_size); + if (!mt->buffer) { + free(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv10_miptree *nv10mt = (struct nv10_miptree *)mt; + int l; + + pipe_buffer_reference(ws, &nv10mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv10mt->level[l].image_offset) + free(nv10mt->level[l].image_offset); + } + free(nv10mt); + } +} + +static struct pipe_surface * +nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(ws, &ps->buffer, nv10mt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = nv10mt->level[level].pitch / ps->cpp; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv10mt->level[level].image_offset[face]; + } else { + ps->offset = nv10mt->level[level].image_offset[0]; + } + + return ps; +} +void +nv10_init_miptree_functions(struct pipe_screen *screen) +{ + struct nv10_screen *nv10screen = nv10_screen(screen); + + nv10screen->screen.texture_create = nv10_miptree_create; + nv10screen->screen.texture_release = nv10_miptree_release; + nv10screen->screen.get_tex_surface = nv10_miptree_surface_get; +} + diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c new file mode 100644 index 00000000000..15268912234 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -0,0 +1,224 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "draw/draw_vbuf.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" + +#include "nv10_context.h" +#include "nv10_state.h" + + +/** + * Primitive renderer for nv10. + */ +struct nv10_vbuf_render { + struct vbuf_render base; + + struct nv10_context *nv10; + + /** Vertex buffer */ + struct pipe_buffer* buffer; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Hardware primitive */ + unsigned hwprim; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv10_vbuf_render * +nv10_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct nv10_vbuf_render *)render; +} + + +static const struct vertex_info * +nv10_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + return &nv10->vertex_info; +} + + +static void * +nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + struct pipe_winsys *winsys = nv10->pipe.winsys; + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + + assert(!nv10_render->buffer); + nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); + + nv10->dirty |= NV10_NEW_VBO; + + return winsys->buffer_map(winsys, + nv10_render->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); +} + + +static void +nv10_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + nv10_render->hwprim = prim + 1; +} + + +static void +nv10_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + int push, i; + + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + + BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(nv10_render->hwprim); + + if (nr_indices & 1) { + BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (indices[0]); + indices++; nr_indices--; + } + + while (nr_indices) { + // XXX too big ? + push = MIN2(nr_indices, 2047 * 2); + + BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((indices[i+1] << 16) | indices[i]); + + nr_indices -= push; + indices += push; + } + + BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (0); +} + + +static void +nv10_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + struct pipe_winsys *winsys = nv10->pipe.winsys; + + assert(nv10_render->buffer); + winsys->buffer_unmap(winsys, nv10_render->buffer); + pipe_buffer_reference(winsys, &nv10_render->buffer, NULL); +} + + +static void +nv10_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + FREE(nv10_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv10_vbuf_render_create( struct nv10_context *nv10 ) +{ + struct nv10_vbuf_render *nv10_render = CALLOC_STRUCT(nv10_vbuf_render); + + nv10_render->nv10 = nv10; + + nv10_render->base.max_vertex_buffer_bytes = 1024*1024; + nv10_render->base.max_indices = 64*1024; + nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info; + nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices; + nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive; + nv10_render->base.draw = nv10_vbuf_render_draw; + nv10_render->base.release_vertices = nv10_vbuf_render_release_vertices; + nv10_render->base.destroy = nv10_vbuf_render_destroy; + + return &nv10_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = nv10_vbuf_render_create(nv10); + if(!render) + return NULL; + + stage = draw_vbuf_stage( nv10->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c new file mode 100644 index 00000000000..4d8e0b05ccd --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -0,0 +1,150 @@ +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static const char * +nv10_screen_get_name(struct pipe_screen *screen) +{ + struct nv10_screen *nv10screen = nv10_screen(screen); + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", nv10screen->chipset); + return buffer; +} + +static const char * +nv10_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv10_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 2; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 0; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 0; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv10_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 2.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + case PIPE_CAP_BITMAP_TEXCOORD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv10_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, uint type) +{ + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + return TRUE; + default: + break; + } + break; + default: + assert(0); + }; + + return FALSE; +} + +static void +nv10_screen_destroy(struct pipe_screen *screen) +{ + FREE(screen); +} + +struct pipe_screen * +nv10_screen_create(struct pipe_winsys *winsys, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv10_screen *nv10screen = CALLOC_STRUCT(nv10_screen); + + if (!nv10screen) + return NULL; + + nv10screen->chipset = chipset; + nv10screen->nvws = nvws; + + nv10screen->screen.winsys = winsys; + + nv10screen->screen.destroy = nv10_screen_destroy; + + nv10screen->screen.get_name = nv10_screen_get_name; + nv10screen->screen.get_vendor = nv10_screen_get_vendor; + nv10screen->screen.get_param = nv10_screen_get_param; + nv10screen->screen.get_paramf = nv10_screen_get_paramf; + nv10screen->screen.is_format_supported = + nv10_screen_is_format_supported; + + nv10_init_miptree_functions(&nv10screen->screen); + return &nv10screen->screen; +} + diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h new file mode 100644 index 00000000000..ac8c04072ab --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_screen.h @@ -0,0 +1,19 @@ +#ifndef __NV10_SCREEN_H__ +#define __NV10_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv10_screen { + struct pipe_screen screen; + + struct nouveau_winsys *nvws; + unsigned chipset; +}; + +static INLINE struct nv10_screen * +nv10_screen(struct pipe_screen *screen) +{ + return (struct nv10_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c new file mode 100644 index 00000000000..313230fe889 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -0,0 +1,697 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + + +#include "nv10_context.h" +#include "nv10_state.h" + +static void nv10_vertex_layout(struct pipe_context* pipe) +{ + struct nv10_context *nv10 = nv10_context(pipe); + const struct pipe_shader_state *fs = nv10->fragprog.current->pipe; + uint32_t src = 0; + int i; + struct vertex_info vinfo; + + memset(&vinfo, 0, sizeof(vinfo)); + + for (i = 0; i < fs->num_inputs; i++) { + switch (fs->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + default: + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + } + } + draw_compute_vertex_size(&vinfo); +} + +static void * +nv10_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv10_blend_state *cb; + + cb = malloc(sizeof(struct nv10_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + + cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + + cb->d_enable = cso->dither ? 1 : 0; + + return (void *)cb; +} + +static void +nv10_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_blend_state *cb = hwcso; + + BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (cb->d_enable); + + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (cb->b_enable); + OUT_RING (cb->b_srcfunc); + OUT_RING (cb->b_dstfunc); + + BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (cb->c_mask); +} + +static void +nv10_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT; + break; + } + + return ret >> NV10TCL_TX_FORMAT_WRAP_S_SHIFT; +} + +static void * +nv10_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv10_sampler_state *ps; + uint32_t filter = 0; + + ps = malloc(sizeof(struct nv10_sampler_state)); + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV10TCL_TX_FORMAT_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV10TCL_TX_FORMAT_WRAP_T_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy > 1.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + +/* if (cso->max_anisotropy >= 16.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_4X; + } else { + ps->en |= NV10TCL_TX_ENABLE_ANISO_2X; + }*/ + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV10TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ps->filt = filter; + +/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + }*/ + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv10_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv10_context *nv10 = nv10_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv10->tex_sampler[unit] = sampler[unit]; + nv10->dirty_samplers |= (1 << unit); + } +} + +static void +nv10_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void +nv10_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv10_context *nv10 = nv10_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv10->tex_miptree[unit] = (struct nv10_miptree *)miptree[unit]; + nv10->dirty_samplers |= (1 << unit); + } +} + +static void * +nv10_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv10_rasterizer_state *rs; + int i; + + /*XXX: ignored: + * light_twoside + * offset_cw/ccw -nohw + * scissor + * point_smooth -nohw + * multisample + * offset_units / offset_scale + */ + rs = malloc(sizeof(struct nv10_rasterizer_state)); + + rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; + + rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; + rs->line_smooth_en = cso->line_smooth ? 1 : 0; + + rs->point_size = *(uint32_t*)&cso->point_size; + + rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + + if (cso->front_winding == PIPE_WINDING_CCW) { + rs->front_face = NV10TCL_FRONT_FACE_CCW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); + } else { + rs->front_face = NV10TCL_FRONT_FACE_CW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); + } + + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CCW) + rs->cull_face = NV10TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV10TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_CW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CW) + rs->cull_face = NV10TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV10TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_BOTH: + rs->cull_face_en = 1; + rs->cull_face = NV10TCL_CULL_FACE_FRONT_AND_BACK; + break; + case PIPE_WINDING_NONE: + default: + rs->cull_face_en = 0; + rs->cull_face = 0; + break; + } + + if (cso->point_sprite) { + rs->point_sprite = (1 << 0); + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + rs->point_sprite |= (1 << (8 + i)); + } + } else { + rs->point_sprite = 0; + } + + return (void *)rs; +} + +static void +nv10_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_rasterizer_state *rs = hwcso; + + BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (rs->shade_model); + OUT_RING (rs->line_width); + + + BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (rs->point_size); + + BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (rs->poly_mode_front); + OUT_RING (rs->poly_mode_back); + + + BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (rs->cull_face); + OUT_RING (rs->front_face); + + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (rs->line_smooth_en); + OUT_RING (rs->poly_smooth_en); + + BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (rs->cull_face_en); + +/* BEGIN_RING(celsius, NV10TCL_POINT_SPRITE, 1); + OUT_RING (rs->point_sprite);*/ +} + +static void +nv10_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv10_depth_stencil_alpha_state *hw; + + hw = malloc(sizeof(struct nv10_depth_stencil_alpha_state)); + + hw->depth.func = nvgl_comparison_op(cso->depth.func); + hw->depth.write_enable = cso->depth.writemask ? 1 : 0; + hw->depth.test_enable = cso->depth.enabled ? 1 : 0; + + hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0; + hw->stencil.wmask = cso->stencil[0].write_mask; + hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func); + hw->stencil.ref = cso->stencil[0].ref_value; + hw->stencil.vmask = cso->stencil[0].value_mask; + hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op); + hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op); + hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op); + + hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; + hw->alpha.func = nvgl_comparison_op(cso->alpha.func); + hw->alpha.ref = float_to_ubyte(cso->alpha.ref); + + return (void *)hw; +} + +static void +nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_depth_stencil_alpha_state *hw = hwcso; + + BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 3); + OUT_RINGp ((uint32_t *)&hw->depth, 3); + BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (hw->stencil.enable); + BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp ((uint32_t *)&(hw->stencil.wmask), 7); + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 3); + OUT_RINGp ((uint32_t *)&hw->alpha.enabled, 3); +} + +static void +nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv10_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv10_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nv10_vertex_program)); + vp->pipe = cso; + + return (void *)vp; +} + +static void +nv10_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_vertex_program *vp = hwcso; + + nv10->vertprog.current = vp; + nv10->dirty |= NV10_NEW_VERTPROG; +} + +static void +nv10_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_vertex_program *vp = hwcso; + + //nv10_vertprog_destroy(nv10, vp); + free(vp); +} + +static void * +nv10_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv10_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv10_fragment_program)); + fp->pipe = cso; + + return (void *)fp; +} + +static void +nv10_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_fragment_program *fp = hwcso; + + nv10->fragprog.current = fp; + nv10->dirty |= NV10_NEW_FRAGPROG; +} + +static void +nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_fragment_program *fp = hwcso; + + nv10_fragprog_destroy(nv10, fp); + free(fp); +} + +static void +nv10_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0)); +} + +static void +nv10_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv10->vertprog.constant_buf = buf->buffer; + nv10->dirty |= NV10_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv10->fragprog.constant_buf = buf->buffer; + nv10->dirty |= NV10_NEW_FRAGPROG; + } +} + +static void +nv10_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct pipe_surface *rt, *zeta; + uint32_t rt_format, w, h; + int i, colour_format = 0, zeta_format = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); + OUT_RING ( (rt->pitch * rt->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); + nv10->rt[0] = rt->buffer; + + if (zeta_format) + { + nv10->zeta = zeta->buffer; + } + + BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING ((w << 16) | 0); + OUT_RING ((h << 16) | 0); + OUT_RING (rt_format); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (((w - 1) << 16) | 0); + OUT_RING (((h - 1) << 16) | 0); +} + +static void +nv10_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv10_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + // XXX +/* BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void +nv10_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv10_context *nv10 = nv10_context(pipe); + +/* OUT_RINGf (vpt->translate[0]); + OUT_RINGf (vpt->translate[1]); + OUT_RINGf (vpt->translate[2]); + OUT_RINGf (vpt->translate[3]);*/ + BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (vpt->scale[0]); + OUT_RINGf (vpt->scale[1]); + OUT_RINGf (vpt->scale[2]); + OUT_RINGf (vpt->scale[3]); +} + +static void +nv10_set_vertex_buffer(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_buffer *vb) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->vtxbuf[index] = *vb; + + nv10->dirty |= NV10_NEW_ARRAYS; +} + +static void +nv10_set_vertex_element(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_element *ve) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->vtxelt[index] = *ve; + + nv10->dirty |= NV10_NEW_ARRAYS; +} + +void +nv10_init_state_functions(struct nv10_context *nv10) +{ + nv10->pipe.create_blend_state = nv10_blend_state_create; + nv10->pipe.bind_blend_state = nv10_blend_state_bind; + nv10->pipe.delete_blend_state = nv10_blend_state_delete; + + nv10->pipe.create_sampler_state = nv10_sampler_state_create; + nv10->pipe.bind_sampler_states = nv10_sampler_state_bind; + nv10->pipe.delete_sampler_state = nv10_sampler_state_delete; + nv10->pipe.set_sampler_textures = nv10_set_sampler_texture; + + nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create; + nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind; + nv10->pipe.delete_rasterizer_state = nv10_rasterizer_state_delete; + + nv10->pipe.create_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_create; + nv10->pipe.bind_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_bind; + nv10->pipe.delete_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_delete; + + nv10->pipe.create_vs_state = nv10_vp_state_create; + nv10->pipe.bind_vs_state = nv10_vp_state_bind; + nv10->pipe.delete_vs_state = nv10_vp_state_delete; + + nv10->pipe.create_fs_state = nv10_fp_state_create; + nv10->pipe.bind_fs_state = nv10_fp_state_bind; + nv10->pipe.delete_fs_state = nv10_fp_state_delete; + + nv10->pipe.set_blend_color = nv10_set_blend_color; + nv10->pipe.set_clip_state = nv10_set_clip_state; + nv10->pipe.set_constant_buffer = nv10_set_constant_buffer; + nv10->pipe.set_framebuffer_state = nv10_set_framebuffer_state; + nv10->pipe.set_polygon_stipple = nv10_set_polygon_stipple; + nv10->pipe.set_scissor_state = nv10_set_scissor_state; + nv10->pipe.set_viewport_state = nv10_set_viewport_state; + + nv10->pipe.set_vertex_buffer = nv10_set_vertex_buffer; + nv10->pipe.set_vertex_element = nv10_set_vertex_element; +} + diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h new file mode 100644 index 00000000000..d4b703c7adb --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -0,0 +1,135 @@ +#ifndef __NV10_STATE_H__ +#define __NV10_STATE_H__ + +#include "pipe/p_state.h" + +struct nv10_blend_state { + uint32_t b_enable; + uint32_t b_srcfunc; + uint32_t b_dstfunc; + + uint32_t c_mask; + + uint32_t d_enable; +}; + +struct nv10_sampler_state { + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv10_rasterizer_state { + uint32_t shade_model; + + uint32_t line_width; + uint32_t line_smooth_en; + + uint32_t point_size; + + uint32_t poly_smooth_en; + + uint32_t poly_mode_front; + uint32_t poly_mode_back; + + uint32_t front_face; + uint32_t cull_face; + uint32_t cull_face_en; + + uint32_t point_sprite; +}; + +struct nv10_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv10_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv10_vertex_program { + const struct pipe_shader_state *pipe; + + boolean translated; + struct nv10_vertex_program_exec *insns; + unsigned nr_insns; + struct nv10_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; +}; + +struct nv10_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv10_fragment_program { + const struct pipe_shader_state *pipe; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv10_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + + +struct nv10_depth_stencil_alpha_state { + struct { + uint32_t func; + uint32_t write_enable; + uint32_t test_enable; + } depth; + + struct { + uint32_t enable; + uint32_t wmask; + uint32_t func; + uint32_t ref; + uint32_t vmask; + uint32_t fail; + uint32_t zfail; + uint32_t zpass; + } stencil; + + struct { + uint32_t enabled; + uint32_t func; + uint32_t ref; + } alpha; +}; + +struct nv10_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c new file mode 100644 index 00000000000..1d104e2f913 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -0,0 +1,69 @@ +#include "nv10_context.h" +#include "nv10_state.h" + +void +nv10_emit_hw_state(struct nv10_context *nv10) +{ + int i; + + if (nv10->dirty & NV10_NEW_FRAGPROG) { + nv10_fragprog_bind(nv10, nv10->fragprog.current); + /*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */ + } + + if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) { + nv10_fragtex_bind(nv10); + nv10->dirty &= ~NV10_NEW_FRAGPROG; + } + + if (nv10->dirty & NV10_NEW_VERTPROG) { + //nv10_vertprog_bind(nv10, nv10->vertprog.current); + nv10->dirty &= ~NV10_NEW_VERTPROG; + } + + if (nv10->dirty & NV10_NEW_VBO) { + + } + + nv10->dirty_samplers = 0; + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv10_vbo.c + */ + + /* Render target */ +// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly +// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); +// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + if (nv10->zeta) { +// XXX +// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); +// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); + OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + /* XXX for when we allocate LMA on nv17 */ +/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_OFFSET, 1); + OUT_RELOCl(nv10->zeta+...);*/ + } + + /* Texture images */ + for (i = 0; i < 2; i++) { + if (!(nv10->fp_samplers & (1 << i))) + continue; + BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 2); + OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD); + // XXX +/* OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | + NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, + NV10TCL_TX_FORMAT_DMA1);*/ + } +} + diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c new file mode 100644 index 00000000000..2e230ebbecc --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_surface.c @@ -0,0 +1,65 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv10_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/p_tile.h" + +static void +nv10_surface_copy(struct pipe_context *pipe, unsigned do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nouveau_winsys *nvws = nv10->nvws; + + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); +} + +static void +nv10_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nouveau_winsys *nvws = nv10->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv10_init_surface_functions(struct nv10_context *nv10) +{ + nv10->pipe.surface_copy = nv10_surface_copy; + nv10->pipe.surface_fill = nv10_surface_fill; +} diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c new file mode 100644 index 00000000000..025ad6de4bd --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv10_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct nv10_context *nv10 = nv10_context( pipe ); + struct draw_context *draw = nv10->draw; + unsigned i; + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (nv10->vtxbuf[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + nv10->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + /* draw! */ + draw_arrays(nv10->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (nv10->vtxbuf[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + +boolean nv10_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return nv10_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/winsys/dri/nouveau/Makefile b/src/gallium/winsys/dri/nouveau/Makefile index b463f218fd5..f637b2cebfc 100644 --- a/src/gallium/winsys/dri/nouveau/Makefile +++ b/src/gallium/winsys/dri/nouveau/Makefile @@ -8,6 +8,7 @@ MINIGLX_SOURCES = PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/nv10/libnv10.a \ $(TOP)/src/gallium/drivers/nv30/libnv30.a \ $(TOP)/src/gallium/drivers/nv40/libnv40.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index 8b20b3689c6..336dd65847b 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -119,6 +119,9 @@ nouveau_context_create(const __GLcontextModes *glVis, } switch (nv->chipset & 0xf0) { + case 0x10: + case 0x20: + /* NV10 */ case 0x30: /* NV30 */ case 0x40: diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index 64e84fb68e4..6c85aab9f5f 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -82,6 +82,11 @@ nouveau_pipe_create(struct nouveau_context *nv) return NULL; switch (nv->chipset & 0xf0) { + case 0x10: + case 0x20: + hws_create = nv10_screen_create; + hw_create = nv10_create; + break; case 0x30: hws_create = nv30_screen_create; hw_create = nv30_create; -- cgit v1.2.3 From 5e17088ee3d0ddfa8871d92d262bb5242bdd92bd Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 15 Mar 2008 16:45:49 +0100 Subject: cso_context_destroy calls bind_state functions with NULL parameter --- src/gallium/drivers/nv30/nv30_state.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 722626db1fa..80dfd9c5c0a 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -40,6 +40,10 @@ nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) struct nv30_context *nv30 = nv30_context(pipe); struct nv30_blend_state *cb = hwcso; + if (!hwcso) { + return; + } + BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1); OUT_RING (cb->d_enable); @@ -236,6 +240,10 @@ nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) struct nv30_context *nv30 = nv30_context(pipe); unsigned unit; + if (!sampler) { + return; + } + for (unit = 0; unit < nr; unit++) { nv30->tex_sampler[unit] = sampler[unit]; nv30->dirty_samplers |= (1 << unit); @@ -346,6 +354,10 @@ nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) struct nv30_context *nv30 = nv30_context(pipe); struct nv30_rasterizer_state *rs = hwcso; + if (!hwcso) { + return; + } + BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1); OUT_RING (rs->shade_model); @@ -422,6 +434,10 @@ nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) struct nv30_context *nv30 = nv30_context(pipe); struct nv30_depth_stencil_alpha_state *hw = hwcso; + if (!hwcso) { + return; + } + BEGIN_RING(rankine, NV34TCL_DEPTH_FUNC, 3); OUT_RINGp ((uint32_t *)&hw->depth, 3); BEGIN_RING(rankine, NV34TCL_STENCIL_BACK_ENABLE, 16); @@ -455,6 +471,10 @@ nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv30_context *nv30 = nv30_context(pipe); struct nv30_vertex_program *vp = hwcso; + if (!hwcso) { + return; + } + nv30->vertprog.current = vp; nv30->dirty |= NV30_NEW_VERTPROG; } @@ -487,6 +507,10 @@ nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv30_context *nv30 = nv30_context(pipe); struct nv30_fragment_program *fp = hwcso; + if (!hwcso) { + return; + } + nv30->fragprog.current = fp; nv30->dirty |= NV30_NEW_FRAGPROG; } -- cgit v1.2.3 From 7d2c63e90983088f1e2f49543caf0468aa91111f Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 16 Mar 2008 12:55:02 +1100 Subject: nv10: fix build after merge --- src/gallium/drivers/nv10/nv10_state.c | 8 +++++--- src/gallium/drivers/nv10/nv10_state.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 313230fe889..d7c445f1b39 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -10,15 +10,15 @@ static void nv10_vertex_layout(struct pipe_context* pipe) { struct nv10_context *nv10 = nv10_context(pipe); - const struct pipe_shader_state *fs = nv10->fragprog.current->pipe; + struct nv10_fragment_program *fp = nv10->fragprog.current; uint32_t src = 0; int i; struct vertex_info vinfo; memset(&vinfo, 0, sizeof(vinfo)); - for (i = 0; i < fs->num_inputs; i++) { - switch (fs->input_semantic_name[i]) { + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); break; @@ -476,6 +476,8 @@ nv10_fp_state_create(struct pipe_context *pipe, fp = CALLOC(1, sizeof(struct nv10_fragment_program)); fp->pipe = cso; + + tgsi_scan_shader(cso->tokens, &fp->info); return (void *)fp; } diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h index d4b703c7adb..9bda8a7d6a3 100644 --- a/src/gallium/drivers/nv10/nv10_state.h +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -2,6 +2,7 @@ #define __NV10_STATE_H__ #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" struct nv10_blend_state { uint32_t b_enable; @@ -77,6 +78,7 @@ struct nv10_fragment_program_data { struct nv10_fragment_program { const struct pipe_shader_state *pipe; + struct tgsi_shader_info info; boolean translated; boolean on_hw; -- cgit v1.2.3 From e1cf3f00e546f814effd25e9ccd072c941366444 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 13 Mar 2008 18:29:56 +1100 Subject: nv40: simple swtnl path (half broken, but getting there) --- src/gallium/drivers/nv40/nv40_context.c | 6 +- src/gallium/drivers/nv40/nv40_context.h | 25 +- src/gallium/drivers/nv40/nv40_draw.c | 334 +++++++++++++++++++++++-- src/gallium/drivers/nv40/nv40_fragprog.c | 3 +- src/gallium/drivers/nv40/nv40_shader.h | 2 + src/gallium/drivers/nv40/nv40_state.c | 27 +- src/gallium/drivers/nv40/nv40_state.h | 4 + src/gallium/drivers/nv40/nv40_state_clip.c | 8 +- src/gallium/drivers/nv40/nv40_state_emit.c | 130 +++++++--- src/gallium/drivers/nv40/nv40_state_viewport.c | 45 +++- src/gallium/drivers/nv40/nv40_vbo.c | 22 +- src/gallium/drivers/nv40/nv40_vertprog.c | 16 +- 12 files changed, 531 insertions(+), 91 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 203c843a013..58627443b80 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -74,8 +74,12 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) nv40_init_state_functions(nv40); nv40_init_miptree_functions(nv40); + /* Create, configure, and install fallback swtnl path */ nv40->draw = draw_create(); - assert(nv40->draw); + draw_wide_point_threshold(nv40->draw, 9999999.0); + draw_wide_line_threshold(nv40->draw, 9999999.0); + draw_enable_line_stipple(nv40->draw, FALSE); + draw_enable_point_sprites(nv40->draw, FALSE); draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); return &nv40->pipe; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 100c678187a..02ca20b8014 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -116,7 +116,20 @@ struct nv40_context { /* HW state derived from pipe states */ struct nv40_state state; - unsigned fallback; + struct { + struct nv40_vertex_program *vertprog; + + unsigned nr_attribs; + unsigned hw[PIPE_MAX_SHADER_INPUTS]; + unsigned draw[PIPE_MAX_SHADER_INPUTS]; + unsigned emit[PIPE_MAX_SHADER_INPUTS]; + } swtnl; + + enum { + HW, SWTNL, SWRAST + } render_mode; + unsigned fallback_swtnl; + unsigned fallback_swrast; /* Context state */ unsigned dirty; @@ -166,6 +179,10 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); +extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, + unsigned ib_size, unsigned mode, + unsigned start, unsigned count); /* nv40_vertprog.c */ extern void nv40_vertprog_destroy(struct nv40_context *, @@ -179,8 +196,9 @@ extern void nv40_fragprog_destroy(struct nv40_context *, extern void nv40_fragtex_bind(struct nv40_context *); /* nv40_state.c and friends */ -extern void nv40_emit_hw_state(struct nv40_context *nv40); -extern void nv40_state_tex_update(struct nv40_context *nv40); +extern boolean nv40_state_validate(struct nv40_context *nv40); +extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); +extern void nv40_state_emit(struct nv40_context *nv40); extern struct nv40_state_entry nv40_state_clip; extern struct nv40_state_entry nv40_state_rasterizer; extern struct nv40_state_entry nv40_state_scissor; @@ -194,6 +212,7 @@ extern struct nv40_state_entry nv40_state_viewport; extern struct nv40_state_entry nv40_state_framebuffer; extern struct nv40_state_entry nv40_state_fragtex; extern struct nv40_state_entry nv40_state_vbo; +extern struct nv40_state_entry nv40_state_vtxfmt; /* nv40_vbo.c */ extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index a39bb85e99b..ce0e0bc6f23 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -1,62 +1,350 @@ -#include "draw/draw_private.h" #include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" #include "nv40_context.h" +#define NV40_SHADER_NO_FUCKEDNESS +#include "nv40_shader.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all. Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ -struct nv40_draw_stage { - struct draw_stage draw; +struct nv40_render_stage { + struct draw_stage stage; struct nv40_context *nv40; + unsigned prim; }; +static INLINE struct nv40_render_stage * +nv40_render_stage(struct draw_stage *stage) +{ + return (struct nv40_render_stage *)stage; +} + +static INLINE void +nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) +{ + unsigned i; + + for (i = 0; i < nv40->swtnl.nr_attribs; i++) { + unsigned idx = nv40->swtnl.draw[i]; + unsigned hw = nv40->swtnl.hw[i]; + + switch (nv40->swtnl.emit[i]) { + case EMIT_OMIT: + break; + case EMIT_1F: + BEGIN_RING(curie, 0x1e40 + (hw * 4), 1); + OUT_RING (fui(v->data[idx][0])); + break; + case EMIT_2F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + break; + case EMIT_3F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + OUT_RING (fui(v->data[idx][2])); + break; + case EMIT_4F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + OUT_RING (fui(v->data[idx][2])); + OUT_RING (fui(v->data[idx][3])); + break; + case EMIT_4UB: + BEGIN_RING(curie, 0x1940 + (hw * 4), 1); + OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + float_to_ubyte(v->data[idx][1]), + float_to_ubyte(v->data[idx][2]), + float_to_ubyte(v->data[idx][3]))); + break; + default: + assert(0); + break; + } + } +} + +static INLINE void +nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, + unsigned mode, unsigned count) +{ + struct nv40_render_stage *rs = nv40_render_stage(stage); + struct nv40_context *nv40 = rs->nv40; + struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf; + unsigned i; + + /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ + if (pb->remaining < ((count * 20) + 6)) { + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + NOUVEAU_ERR("AIII, missed flush\n"); + assert(0); + } + FIRE_RING(); + nv40_state_emit(nv40); + } + + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (mode); + rs->prim = mode; + } + + /* Emit vertex data */ + for (i = 0; i < count; i++) + nv40_render_vertex(nv40, prim->v[i]); + + /* If it's likely we'll need to empty the push buffer soon, finish + * off the primitive now. + */ + if (pb->remaining < ((count * 20) + 6)) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + rs->prim = NV40TCL_BEGIN_END_STOP; + } +} + static void -nv40_draw_point(struct draw_stage *draw, struct prim_header *prim) +nv40_render_point(struct draw_stage *draw, struct prim_header *prim) { - NOUVEAU_ERR("\n"); + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1); } static void -nv40_draw_line(struct draw_stage *draw, struct prim_header *prim) +nv40_render_line(struct draw_stage *draw, struct prim_header *prim) { - NOUVEAU_ERR("\n"); + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2); } static void -nv40_draw_tri(struct draw_stage *draw, struct prim_header *prim) +nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) { - NOUVEAU_ERR("\n"); + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3); } static void -nv40_draw_flush(struct draw_stage *draw, unsigned flags) +nv40_render_flush(struct draw_stage *draw, unsigned flags) { + struct nv40_render_stage *rs = nv40_render_stage(draw); + struct nv40_context *nv40 = rs->nv40; + + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + rs->prim = NV40TCL_BEGIN_END_STOP; + } } static void -nv40_draw_reset_stipple_counter(struct draw_stage *draw) +nv40_render_reset_stipple_counter(struct draw_stage *draw) { - NOUVEAU_ERR("\n"); } static void -nv40_draw_destroy(struct draw_stage *draw) +nv40_render_destroy(struct draw_stage *draw) { free(draw); } +static INLINE void +emit_mov(struct nv40_vertex_program *vp, + unsigned dst, unsigned src, unsigned vor, unsigned mask) +{ + struct nv40_vertex_program_exec *inst; + + vp->insns = realloc(vp->insns, + sizeof(struct nv40_vertex_program_exec) * + ++vp->nr_insns); + inst = &vp->insns[vp->nr_insns - 1]; + + inst->data[0] = 0x401f9c6c; + inst->data[1] = 0x0040000d | (src << 8); + inst->data[2] = 0x8106c083; + inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); + inst->const_index = -1; + inst->has_branch_offset = FALSE; + + vp->ir |= (1 << src); + if (vor != ~0) + vp->or |= (1 << vor); +} + +static struct nv40_vertex_program * +create_drawvp(struct nv40_context *nv40) +{ + struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program); + unsigned i; + + emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); + for (i = 0; i < 8; i++) + emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); + + vp->insns[vp->nr_insns - 1].data[3] |= 1; + vp->translated = TRUE; + return vp; +} + struct draw_stage * nv40_draw_render_stage(struct nv40_context *nv40) { - struct nv40_draw_stage *nv40draw = CALLOC_STRUCT(nv40_draw_stage); + struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); + + if (!nv40->swtnl.vertprog) + nv40->swtnl.vertprog = create_drawvp(nv40); + + render->nv40 = nv40; + render->stage.draw = nv40->draw; + render->stage.point = nv40_render_point; + render->stage.line = nv40_render_line; + render->stage.tri = nv40_render_tri; + render->stage.flush = nv40_render_flush; + render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; + render->stage.destroy = nv40_render_destroy; + + return &render->stage; +} + +boolean +nv40_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxbuf_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + unsigned i; + void *map; + + if (!nv40_state_validate_swtnl(nv40)) + return FALSE; + nv40_state_emit(nv40); - nv40draw->nv40 = nv40; - nv40draw->draw.draw = nv40->draw; - nv40draw->draw.point = nv40_draw_point; - nv40draw->draw.line = nv40_draw_line; - nv40draw->draw.tri = nv40_draw_tri; - nv40draw->draw.flush = nv40_draw_flush; - nv40draw->draw.reset_stipple_counter = nv40_draw_reset_stipple_counter; - nv40draw->draw.destroy = nv40_draw_destroy; + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (!nv40->vtxbuf[i].buffer) + continue; + map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(nv40->draw, i, map); + } - return &nv40draw->draw; + if (idxbuf) { + map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); + } else { + draw_set_mapped_element_buffer(nv40->draw, 0, NULL); + } + + if (nv40->constbuf[PIPE_SHADER_VERTEX]) { + map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(nv40->draw, map); + } + + draw_arrays(nv40->draw, mode, start, count); + + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (!nv40->vtxbuf[i].buffer) + continue; + ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); + } + + if (idxbuf) + ws->buffer_unmap(ws, idxbuf); + + if (nv40->constbuf[PIPE_SHADER_VERTEX]) + ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); + + draw_flush(nv40->draw); + pipe->flush(pipe, 0); + + return TRUE; } +static INLINE void +emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, + unsigned semantic, unsigned index) +{ + unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned a = nv40->swtnl.nr_attribs++; + + nv40->swtnl.hw[a] = hw; + nv40->swtnl.emit[a] = emit; + nv40->swtnl.draw[a] = draw_out; +} + +static boolean +nv40_state_vtxfmt_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + unsigned colour = 0, texcoords = 0, fog = 0, i; + + /* Determine needed fragprog inputs */ + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + colour |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_GENERIC: + texcoords |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_FOG: + fog = 1; + break; + default: + assert(0); + } + } + + nv40->swtnl.nr_attribs = 0; + + /* Map draw vtxprog output to hw attribute IDs */ + for (i = 0; i < 2; i++) { + if (!(colour & (1 << i))) + continue; + emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); + } + + for (i = 0; i < 8; i++) { + if (!(texcoords & (1 << i))) + continue; + emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); + } + + if (fog) { + emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); + } + + emit_attrib(nv40, 0, EMIT_4F, TGSI_SEMANTIC_POSITION, 0); + + return FALSE; +} + +struct nv40_state_entry nv40_state_vtxfmt = { + .validate = nv40_state_vtxfmt_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 953f9cd9082..82dbcd3eef2 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -797,9 +797,10 @@ nv40_fragprog_validate(struct nv40_context *nv40) if (fp->translated) goto update_constants; + nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG; nv40_fragprog_translate(nv40, fp); if (!fp->translated) { - nv40->fallback |= NV40_FALLBACK_RAST; + nv40->fallback_swrast |= NV40_NEW_FRAGPROG; return FALSE; } diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h index 5909c70713c..854dccf5486 100644 --- a/src/gallium/drivers/nv40/nv40_shader.h +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -476,6 +476,7 @@ # define NV40_FP_SWIZZLE_W 3 #define NV40_FP_REG_NEGATE (1 << 17) +#ifndef NV40_SHADER_NO_FUCKEDNESS #define NV40SR_NONE 0 #define NV40SR_OUTPUT 1 #define NV40SR_INPUT 2 @@ -550,5 +551,6 @@ nv40_sr_scale(struct nv40_sreg src, int scale) src.dst_scale = scale; return src; } +#endif #endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 321d5de0415..3eafbece309 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -3,6 +3,8 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "draw/draw_context.h" + #include "nv40_context.h" #include "nv40_state.h" @@ -345,7 +347,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); break; default: - so_data(so, 0); + so_data(so, NV40TCL_CULL_FACE_BACK); break; } so_data(so, NV40TCL_FRONT_FACE_CCW); @@ -363,13 +365,13 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); break; default: - so_data(so, 0); + so_data(so, NV40TCL_CULL_FACE_BACK); break; } so_data(so, NV40TCL_FRONT_FACE_CW); } so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, cso->cull_mode != PIPE_WINDING_NONE ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, cso->poly_stipple_enable ? 1 : 0); @@ -419,6 +421,9 @@ static void nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_rasterizer_state *rsso = hwcso; + + draw_set_rasterizer_state(nv40->draw, &rsso->pipe); nv40->rasterizer = hwcso; nv40->dirty |= NV40_NEW_RAST; @@ -508,10 +513,12 @@ static void * nv40_vp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso) { + struct nv40_context *nv40 = nv40_context(pipe); struct nv40_vertex_program *vp; vp = CALLOC(1, sizeof(struct nv40_vertex_program)); vp->pipe = *cso; + vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); return (void *)vp; } @@ -520,6 +527,9 @@ static void nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp = hwcso; + + draw_bind_vertex_shader(nv40->draw, vp ? vp->draw : NULL); nv40->vertprog = hwcso; nv40->dirty |= NV40_NEW_VERTPROG; @@ -531,6 +541,7 @@ nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv40_context *nv40 = nv40_context(pipe); struct nv40_vertex_program *vp = hwcso; + draw_delete_vertex_shader(nv40->draw, vp->draw); nv40_vertprog_destroy(nv40, vp); FREE(vp); } @@ -544,6 +555,8 @@ nv40_fp_state_create(struct pipe_context *pipe, fp = CALLOC(1, sizeof(struct nv40_fragment_program)); fp->pipe = *cso; + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + return (void *)fp; } @@ -582,6 +595,8 @@ nv40_set_clip_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); + draw_set_clip_state(nv40->draw, clip); + nv40->clip = *clip; nv40->dirty |= NV40_NEW_UCP; } @@ -638,6 +653,8 @@ nv40_set_viewport_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); + draw_set_viewport_state(nv40->draw, vpt); + nv40->viewport = *vpt; nv40->dirty |= NV40_NEW_VIEWPORT; } @@ -648,6 +665,8 @@ nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index, { struct nv40_context *nv40 = nv40_context(pipe); + draw_set_vertex_buffer(nv40->draw, index, vb); + nv40->vtxbuf[index] = *vb; nv40->dirty |= NV40_NEW_ARRAYS; } @@ -658,6 +677,8 @@ nv40_set_vertex_element(struct pipe_context *pipe, unsigned index, { struct nv40_context *nv40 = nv40_context(pipe); + draw_set_vertex_element(nv40->draw, index, ve); + nv40->vtxelt[index] = *ve; nv40->dirty |= NV40_NEW_ARRAYS; } diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index a02ea0c8781..ab2866eb7a5 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -2,6 +2,7 @@ #define __NV40_STATE_H__ #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" struct nv40_sampler_state { uint32_t fmt; @@ -25,6 +26,8 @@ struct nv40_vertex_program_data { struct nv40_vertex_program { struct pipe_shader_state pipe; + struct draw_vertex_shader *draw; + boolean translated; struct nv40_vertex_program_exec *insns; unsigned nr_insns; @@ -49,6 +52,7 @@ struct nv40_fragment_program_data { struct nv40_fragment_program { struct pipe_shader_state pipe; + struct tgsi_shader_info info; boolean translated; unsigned samplers; diff --git a/src/gallium/drivers/nv40/nv40_state_clip.c b/src/gallium/drivers/nv40/nv40_state_clip.c index 93e690161f3..c52390f9edb 100644 --- a/src/gallium/drivers/nv40/nv40_state_clip.c +++ b/src/gallium/drivers/nv40/nv40_state_clip.c @@ -3,8 +3,12 @@ static boolean nv40_state_clip_validate(struct nv40_context *nv40) { - if (nv40->clip.nr) - nv40->fallback |= NV40_FALLBACK_TNL; + + if (nv40->render_mode == HW) { + nv40->fallback_swtnl &= ~NV40_NEW_UCP; + if (nv40->clip.nr) + nv40->fallback_swtnl |= NV40_NEW_UCP; + } return FALSE; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 9f268640e00..056238cc836 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -1,5 +1,6 @@ #include "nv40_context.h" #include "nv40_state.h" +#include "draw/draw_context.h" static struct nv40_state_entry *render_states[] = { &nv40_state_framebuffer, @@ -18,15 +19,27 @@ static struct nv40_state_entry *render_states[] = { NULL }; +static struct nv40_state_entry *swtnl_states[] = { + &nv40_state_framebuffer, + &nv40_state_rasterizer, + &nv40_state_clip, + &nv40_state_scissor, + &nv40_state_stipple, + &nv40_state_fragprog, + &nv40_state_fragtex, + &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, + &nv40_state_vtxfmt, + NULL +}; + static void -nv40_state_validate(struct nv40_context *nv40) +nv40_state_do_validate(struct nv40_context *nv40, + struct nv40_state_entry **states) { - struct nv40_state_entry **states = render_states; - unsigned last_fallback; - - last_fallback = nv40->fallback; - nv40->fallback = 0; - while (*states) { struct nv40_state_entry *e = *states; @@ -38,32 +51,15 @@ nv40_state_validate(struct nv40_context *nv40) states++; } nv40->dirty = 0; - - if (nv40->fallback & NV40_FALLBACK_TNL && - !(last_fallback & NV40_FALLBACK_TNL)) { - NOUVEAU_ERR("XXX: hwtnl->swtnl\n"); - } else - if (last_fallback & NV40_FALLBACK_TNL && - !(nv40->fallback & NV40_FALLBACK_TNL)) { - NOUVEAU_ERR("XXX: swtnl->hwtnl\n"); - } - - if (nv40->fallback & NV40_FALLBACK_RAST && - !(last_fallback & NV40_FALLBACK_RAST)) { - NOUVEAU_ERR("XXX: hwrast->swrast\n"); - } else - if (last_fallback & NV40_FALLBACK_RAST && - !(nv40->fallback & NV40_FALLBACK_RAST)) { - NOUVEAU_ERR("XXX: swrast->hwrast\n"); - } } -static void +void nv40_state_emit(struct nv40_context *nv40) { struct nv40_state *state = &nv40->state; struct nv40_screen *screen = nv40->screen; unsigned i, samplers; + uint64 states; if (nv40->pctx_id != screen->cur_pctx) { for (i = 0; i < NV40_STATE_MAX; i++) { @@ -74,14 +70,24 @@ nv40_state_emit(struct nv40_context *nv40) screen->cur_pctx = nv40->pctx_id; } - while (state->dirty) { - unsigned idx = ffsll(state->dirty) - 1; + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nv40->screen->state[i]); + so_emit(nv40->nvws, nv40->screen->state[i]); + states &= ~(1ULL << i); + } - so_ref (state->hw[idx], &nv40->screen->state[idx]); - so_emit(nv40->nvws, nv40->screen->state[idx]); - state->dirty &= ~(1ULL << idx); + if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | + (1ULL << NV40_STATE_FRAGTEX0))) { + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (1); } + state->dirty = 0; + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { if (!(samplers & (1 << i))) @@ -91,18 +97,62 @@ nv40_state_emit(struct nv40_context *nv40) samplers &= ~(1ULL << i); } so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); - so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); + if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW) + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); } -void -nv40_emit_hw_state(struct nv40_context *nv40) +boolean +nv40_state_validate(struct nv40_context *nv40) { - nv40_state_validate(nv40); - nv40_state_emit(nv40); + boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE; + + if (nv40->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nv40->fallback_swtnl & nv40->dirty) + != nv40->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nv40->pipe.flush(&nv40->pipe, 0); + nv40->dirty |= (NV40_NEW_VIEWPORT | + NV40_NEW_VERTPROG | + NV40_NEW_ARRAYS | + NV40_NEW_UCP); + nv40->render_mode = HW; + } + + nv40_state_do_validate(nv40, render_states); + if (nv40->fallback_swtnl || nv40->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); + + return TRUE; +} + +boolean +nv40_state_validate_swtnl(struct nv40_context *nv40) +{ + /* Setup for swtnl */ + if (nv40->render_mode == HW) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); + nv40->pipe.flush(&nv40->pipe, 0); + nv40->dirty |= (NV40_NEW_VIEWPORT | + NV40_NEW_VERTPROG | + NV40_NEW_ARRAYS | + NV40_NEW_UCP); + nv40->render_mode = SWTNL; + } + + nv40_state_do_validate(nv40, swtnl_states); + if (nv40->fallback_swrast) { + NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); + return FALSE; + } - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); + return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 3a325339072..9e5c7a72a7f 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -3,18 +3,43 @@ static boolean nv40_state_viewport_validate(struct nv40_context *nv40) { - struct nouveau_stateobj *so = so_new(9, 0); + struct nouveau_stateobj *so = so_new(11, 0); struct pipe_viewport_state *vpt = &nv40->viewport; - so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); + if (nv40->render_mode == HW) { + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 1); + } else { + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(0.0)); + /* Not entirely certain what this is yet. The DDX uses this + * value also as it fixes rendering when you pass + * pre-transformed vertices to the GPU. My best gusss is that + * this bypasses some culling/clipping stage. Might be worth + * noting that points/lines are uneffected by whatever this + * value fixes, only filled polygons are effected. + */ + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 0x110); + } so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); return TRUE; diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index f16afc23b84..fad423fdf85 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -8,6 +8,8 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" +#define FORCE_SWTNL 0 + static INLINE int nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) { @@ -165,7 +167,11 @@ nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned nr; nv40_vbo_set_idxbuf(nv40, NULL, 0); - nv40_emit_hw_state(nv40); + if (FORCE_SWTNL || !nv40_state_validate(nv40)) { + return nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + } + nv40_state_emit(nv40); BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); OUT_RING (nvgl_primitive(mode)); @@ -274,7 +280,7 @@ nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_winsys *ws = pipe->winsys; void *map; - nv40_emit_hw_state(nv40); + nv40_state_emit(nv40); map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { @@ -315,7 +321,7 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); unsigned nr; - nv40_emit_hw_state(nv40); + nv40_state_emit(nv40); BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); OUT_RING (nvgl_primitive(mode)); @@ -352,8 +358,16 @@ nv40_draw_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); + boolean idxbuf; + + idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); + if (FORCE_SWTNL || !nv40_state_validate(nv40)) { + return nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + } + nv40_state_emit(nv40); - if (nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize)) { + if (idxbuf) { nv40_draw_elements_vbo(pipe, mode, start, count); } else { nv40_draw_elements_inline(pipe, indexBuffer, indexSize, diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 3d730c1a321..9f1ee575ce9 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -634,21 +634,29 @@ out_err: static boolean nv40_vertprog_validate(struct nv40_context *nv40) { - struct nv40_vertex_program *vp = nv40->vertprog; - struct pipe_buffer *constbuf = - nv40->constbuf[PIPE_SHADER_VERTEX]; struct nouveau_winsys *nvws = nv40->nvws; struct pipe_winsys *ws = nv40->pipe.winsys; + struct nv40_vertex_program *vp; + struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; int i; + if (nv40->render_mode == HW) { + vp = nv40->vertprog; + constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + } else { + vp = nv40->swtnl.vertprog; + constbuf = NULL; + } + /* Translate TGSI shader into hw bytecode */ if (vp->translated) goto check_gpu_resources; + nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG; nv40_vertprog_translate(nv40, vp); if (!vp->translated) { - nv40->fallback |= NV40_FALLBACK_TNL; + nv40->fallback_swtnl |= NV40_NEW_VERTPROG; return FALSE; } -- cgit v1.2.3 From 49a687882a659bd03fd09ca7a7d592818914597a Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Sun, 16 Mar 2008 10:33:59 -0600 Subject: gallium: finish remaining prim types for sp_vbuf_draw_arrays() Not totally tested, but easily fixed if glitches are found. --- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 73 +++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index db0913cb2bc..e7d0cb2b870 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -207,6 +207,27 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) (struct vertex_header *) ((char *) vertex_buffer + (I) * vertex_size) switch (cvbr->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + prim.v[0] = VERTEX(i); + setup->point( setup, &prim ); + } + break; + case PIPE_PRIM_LINES: + assert(nr % 2 == 0); + for (i = 0; i < nr; i += 2) { + prim.v[0] = VERTEX(i); + prim.v[1] = VERTEX(i + 1); + setup->line( setup, &prim ); + } + break; + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i++) { + prim.v[0] = VERTEX(i - 1); + prim.v[1] = VERTEX(i); + setup->line( setup, &prim ); + } + break; case PIPE_PRIM_TRIANGLES: assert(nr % 3 == 0); for (i = 0; i < nr; i += 3) { @@ -217,6 +238,58 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) setup->tri( setup, &prim ); } break; + case PIPE_PRIM_TRIANGLE_STRIP: + assert(nr >= 3); + for (i = 2; i < nr; i++) { + prim.v[0] = VERTEX(i - 2); + prim.v[1] = VERTEX(i - 1); + prim.v[2] = VERTEX(i); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + case PIPE_PRIM_TRIANGLE_FAN: + assert(nr >= 3); + for (i = 2; i < nr; i++) { + prim.v[0] = VERTEX(0); + prim.v[1] = VERTEX(i - 1); + prim.v[2] = VERTEX(i); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + case PIPE_PRIM_QUADS: + assert(nr % 4 == 0); + for (i = 0; i < nr; i += 4) { + prim.v[0] = VERTEX(i + 0); + prim.v[1] = VERTEX(i + 1); + prim.v[2] = VERTEX(i + 2); + calc_det(&prim); + setup->tri( setup, &prim ); + + prim.v[0] = VERTEX(i + 0); + prim.v[1] = VERTEX(i + 2); + prim.v[2] = VERTEX(i + 3); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + assert(nr >= 4); + for (i = 2; i < nr; i += 2) { + prim.v[0] = VERTEX(i - 2); + prim.v[1] = VERTEX(i); + prim.v[2] = VERTEX(i + 1); + calc_det(&prim); + setup->tri( setup, &prim ); + + prim.v[0] = VERTEX(i - 2); + prim.v[1] = VERTEX(i + 1); + prim.v[2] = VERTEX(i - 1); + calc_det(&prim); + setup->tri( setup, &prim ); + } + break; case PIPE_PRIM_POLYGON: /* draw as tri fan */ for (i = 2; i < nr; i++) { -- cgit v1.2.3 From b2f01b0777f27a093849f299490b377ab8aab2fb Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Mon, 17 Mar 2008 03:14:11 +0100 Subject: nouveau: latest header. --- src/gallium/drivers/nouveau/nouveau_class.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 7392490df01..dc202086d2c 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -1784,6 +1784,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV10TCL_TX_OFFSET__SIZE 0x00000002 #define NV10TCL_TX_FORMAT(x) (0x00000220+((x)*4)) #define NV10TCL_TX_FORMAT__SIZE 0x00000002 +#define NV10TCL_TX_FORMAT_DMA0 (1 << 0) +#define NV10TCL_TX_FORMAT_DMA1 (1 << 1) #define NV10TCL_TX_FORMAT_CUBE_MAP (1 << 2) #define NV10TCL_TX_FORMAT_FORMAT_SHIFT 7 #define NV10TCL_TX_FORMAT_FORMAT_MASK 0x00000780 @@ -5360,12 +5362,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_2D_RECT_Y1 0x00000604 #define NV50_2D_RECT_X2 0x00000608 #define NV50_2D_RECT_Y2 0x0000060c -#define NV50_2D_BLIT_DST_X 0x000008b0 -#define NV50_2D_BLIT_DST_Y 0x000008b4 -#define NV50_2D_BLIT_DST_W 0x000008b8 -#define NV50_2D_BLIT_DST_H 0x000008bc -#define NV50_2D_BLIT_SRC_X 0x000008d4 -#define NV50_2D_BLIT_SRC_Y 0x000008dc #define NV50_2D_SIFC_UNK0800 0x00000800 #define NV50_2D_SIFC_FORMAT 0x00000804 #define NV50_2D_SIFC_FORMAT_32BPP 0x000000cf @@ -5384,6 +5380,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50_2D_SIFC_UNK0858 0x00000858 #define NV50_2D_SIFC_DST_Y 0x0000085c #define NV50_2D_SIFC_DATA 0x00000860 +#define NV50_2D_BLIT_DST_X 0x000008b0 +#define NV50_2D_BLIT_DST_Y 0x000008b4 +#define NV50_2D_BLIT_DST_W 0x000008b8 +#define NV50_2D_BLIT_DST_H 0x000008bc +#define NV50_2D_BLIT_SRC_X 0x000008d4 +#define NV50_2D_BLIT_SRC_Y 0x000008dc #define NV50_MEMORY_TO_MEMORY_FORMAT 0x00005039 -- cgit v1.2.3 From 5c15b1276ed56064382197d73d9d357201e5f71f Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Mon, 17 Mar 2008 03:32:07 +0100 Subject: nv10: fixes. --- src/gallium/drivers/nv10/nv10_context.h | 3 ++- src/gallium/drivers/nv10/nv10_state_emit.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 8269d6121f0..386138556e5 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -47,11 +47,12 @@ struct nv10_context { uint32_t rt_enable; struct pipe_buffer *rt[4]; struct pipe_buffer *zeta; + uint32_t lma_offset; struct { struct pipe_buffer *buffer; uint32_t format; - } tex[16]; + } tex[2]; unsigned vb_enable; struct { diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 1d104e2f913..8bf0bd2d683 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -49,21 +49,21 @@ nv10_emit_hw_state(struct nv10_context *nv10) OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ /* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_OFFSET, 1); - OUT_RELOCl(nv10->zeta+...);*/ + OUT_RELOCl(nv10->zeta+lma_offset);*/ } /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv10->fp_samplers & (1 << i))) continue; - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 2); + BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - // XXX -/* OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); + OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, - NV10TCL_TX_FORMAT_DMA1);*/ + NV10TCL_TX_FORMAT_DMA1); } } -- cgit v1.2.3 From 767cd2ed6e97ae09526b15728495f361d5e22cb2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 17 Mar 2008 13:49:03 +1100 Subject: nv40: workaround main swtnl breakage Not sure where the real bug is here yet, but for now this gives us correct rendering in far more cases than previously. --- src/gallium/drivers/nv40/nv40_draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index ce0e0bc6f23..eb4f2395c48 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -335,7 +335,7 @@ nv40_state_vtxfmt_validate(struct nv40_context *nv40) emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); } - emit_attrib(nv40, 0, EMIT_4F, TGSI_SEMANTIC_POSITION, 0); + emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); return FALSE; } -- cgit v1.2.3 From 9425a702bef7d3f601e9ddc2b801f00a3d52dbb8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Mon, 17 Mar 2008 10:10:45 +0000 Subject: gallium: improvements, or extensions at least, to the passthrough path Passthrough is actually more tricky than you'd think... --- src/gallium/auxiliary/draw/draw_passthrough.c | 349 +++++++++++++++++------- src/gallium/auxiliary/draw/draw_vbuf.h | 6 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 12 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 13 +- 4 files changed, 275 insertions(+), 105 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c index d16f056191c..fdec6a591b8 100644 --- a/src/gallium/auxiliary/draw/draw_passthrough.c +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -85,14 +85,52 @@ fetch_store_general( struct draw_context *draw, const unsigned *pitch = draw->vertex_fetch.pitch; const ubyte **src = draw->vertex_fetch.src_ptr; - for (i = start; i < count; i++) { + for (i = start; i < start + count; i++) { for (j = 0; j < nr_attrs; j++) { + /* vinfo->src_index is the output of the vertex shader + * matching this hw-vertex component. + * + * In passthrough, we require a 1:1 mapping between vertex + * shader outputs and inputs, which in turn correspond to + * vertex elements in the state. So, this is the vertex + * element we're interested in... + */ const uint jj = vinfo->src_index[j]; const enum pipe_format srcFormat = draw->vertex_element[jj].src_format; const ubyte *from = src[jj] + i * pitch[jj]; float attrib[4]; + /* Except... When we're not. Two cases EMIT_HEADER & + * EMIT_1F_PSIZE don't consume an input. Should have some + * method for indicating this, or change the logic here + * somewhat so it doesn't matter. + * + * Just hack this up now, do something better about it later. + */ + if (vinfo->emit[j] == EMIT_HEADER) { + memset(out, 0, sizeof(struct vertex_header)); + out += sizeof(struct vertex_header) / 4; + continue; + } + else if (vinfo->emit[j] == EMIT_1F_PSIZE) { + out[0] = 1.0; /* xxx */ + out += 1; + continue; + } + + + /* The normal fetch/emit code: + */ switch (srcFormat) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + ubyte *ub = (ubyte *) from; + attrib[0] = UBYTE_TO_FLOAT(ub[0]); + attrib[1] = UBYTE_TO_FLOAT(ub[1]); + attrib[2] = UBYTE_TO_FLOAT(ub[2]); + attrib[3] = UBYTE_TO_FLOAT(ub[3]); + } + break; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *f = (float *) from; @@ -130,14 +168,21 @@ fetch_store_general( struct draw_context *draw, } break; default: - abort(); + assert(0); } - /* XXX this will probably only work for softpipe */ + debug_printf("attrib %d: %f %f %f %f\n", j, + attrib[0], attrib[1], attrib[2], attrib[3]); + switch (vinfo->emit[j]) { - case EMIT_HEADER: - memset(out, 0, sizeof(struct vertex_header)); - out += sizeof(struct vertex_header) / 4; + case EMIT_1F: + out[0] = attrib[0]; + out += 1; + break; + case EMIT_2F: + out[0] = attrib[0]; + out[1] = attrib[1]; + out += 2; break; case EMIT_4F: out[0] = attrib[0]; @@ -147,64 +192,15 @@ fetch_store_general( struct draw_context *draw, out += 4; break; default: - abort(); + assert(0); } - } + debug_printf("\n"); } } -/* Example of a fetch/emit passthrough shader which could be - * generated when bypass_clipping is enabled on a passthrough vertex - * shader. - */ -static void fetch_xyz_rgb_st( struct draw_context *draw, - float *out, - unsigned start, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - unsigned i; - - const ubyte *xyzw = src[0] + start * pitch[0]; - const ubyte *rgba = src[1] + start * pitch[1]; - const ubyte *st = src[2] + start * pitch[2]; - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyzw; xyzw += pitch[0]; - /* decode input, encode output. Assume both are float[4] */ - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - } - - { - const float *in = (const float *)rgba; rgba += pitch[1]; - /* decode input, encode output. Assume both are float[4] */ - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = in[3]; - } - - { - const float *in = (const float *)st; st += pitch[2]; - /* decode input, encode output. Assume both are float[2] */ - out[8] = in[0]; - out[9] = in[1]; - } - - out += 10; - } -} - static boolean update_shader( struct draw_context *draw ) { const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); @@ -229,70 +225,166 @@ static boolean update_shader( struct draw_context *draw ) draw->pt.hw_vertex_size = vinfo->size * 4; - /* Just trying to figure out how this would work: - */ - if (draw->rasterizer->bypass_vs || - (nr_attrs == 3 && 0 /* some other tests */)) - { -#if 0 - draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st; -#else - draw->vertex_fetch.pt_fetch = fetch_store_general; -#endif - /*assert(vinfo->size == 10);*/ + draw->vertex_fetch.pt_fetch = fetch_store_general; + return TRUE; +} + + + + +static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + return TRUE; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; return TRUE; + case PIPE_PRIM_LINE_STRIP: + *first = 2; + *incr = 1; + return TRUE; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + *first = 3; + *incr = 1; + return TRUE; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + return TRUE; + default: + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + return FALSE; } - - return FALSE; } static boolean set_prim( struct draw_context *draw, - unsigned prim ) + unsigned prim, + unsigned count ) { assert(!draw->user.elts); - draw->pt.prim = prim; - switch (prim) { case PIPE_PRIM_LINE_LOOP: + if (count > 1024) + return FALSE; + return draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP ); + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + if (count > 1024) + return FALSE; + return draw->render->set_primitive( draw->render, prim ); + case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: - return FALSE; + return draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES ); + default: - draw->render->set_primitive( draw->render, prim ); - return TRUE; + return draw->render->set_primitive( draw->render, prim ); + break; } + + return TRUE; } -boolean -draw_passthrough_arrays(struct draw_context *draw, - unsigned prim, - unsigned start, - unsigned count) + +#define INDEX(i) (start + (i)) +static void pt_draw_arrays( struct draw_context *draw, + unsigned start, + unsigned length ) { - float *hw_verts; + ushort *tmp = NULL; + unsigned i, j; - if (draw_need_pipeline(draw)) - return FALSE; + switch (draw->pt.prim) { + case PIPE_PRIM_LINE_LOOP: + tmp = MALLOC( sizeof(ushort) * (length + 1) ); - if (!set_prim(draw, prim)) - return FALSE; + for (i = 0; i < length; i++) + tmp[i] = INDEX(i); + tmp[length] = 0; - if (!update_shader(draw)) - return FALSE; + draw->render->draw( draw->render, + tmp, + length+1 ); + break; - hw_verts = draw->render->allocate_vertices( draw->render, - draw->pt.hw_vertex_size, - count ); + + case PIPE_PRIM_QUAD_STRIP: + tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) ); + + for (j = i = 0; i + 3 < length; i += 2, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+2); + tmp[j+4] = INDEX(i+0); + tmp[j+5] = INDEX(i+3); + } + + if (j) + draw->render->draw( draw->render, tmp, j ); + break; + + case PIPE_PRIM_QUADS: + tmp = MALLOC( sizeof(int) * (length / 4 * 6) ); + + for (j = i = 0; i + 3 < length; i += 4, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+1); + tmp[j+4] = INDEX(i+2); + tmp[j+5] = INDEX(i+3); + } + + if (j) + draw->render->draw( draw->render, tmp, j ); + break; + + default: + draw->render->draw_arrays( draw->render, + start, + length ); + break; + } + + if (tmp) + FREE(tmp); +} + + + +static boolean do_draw( struct draw_context *draw, + unsigned start, unsigned count ) +{ + float *hw_verts = + draw->render->allocate_vertices( draw->render, + (ushort)draw->pt.hw_vertex_size, + (ushort)count ); if (!hw_verts) return FALSE; - /* Single routine to fetch vertices, run shader and emit HW verts. - * Clipping and viewport transformation are done on hardware. + /* Single routine to fetch vertices and emit HW verts. */ draw->vertex_fetch.pt_fetch( draw, hw_verts, @@ -301,9 +393,9 @@ draw_passthrough_arrays(struct draw_context *draw, /* Draw arrays path to avoid re-emitting index list again and * again. */ - draw->render->draw_arrays( draw->render, - start, - count ); + pt_draw_arrays( draw, + 0, + count ); draw->render->release_vertices( draw->render, @@ -314,3 +406,68 @@ draw_passthrough_arrays(struct draw_context *draw, return TRUE; } + +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count) +{ + unsigned i = 0; + unsigned first, incr; + + //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); + + split_prim_inplace(prim, &first, &incr); + + count -= (count - first) % incr; + + debug_printf("%s %d %d %d\n", __FUNCTION__, prim, start, count); + + if (draw_need_pipeline(draw)) + return FALSE; + + debug_printf("%s AAA\n", __FUNCTION__); + + if (!set_prim(draw, prim, count)) + return FALSE; + + /* XXX: need a single value that reflects the most recent call to + * driver->set_primitive: + */ + draw->pt.prim = prim; + + debug_printf("%s BBB\n", __FUNCTION__); + + if (!update_shader(draw)) + return FALSE; + + debug_printf("%s CCC\n", __FUNCTION__); + + /* Chop this up into bite-sized pieces that a driver should be able + * to devour -- problem is we don't have a quick way to query the + * driver on the maximum size for this chunk in the current state. + */ + while (i + first <= count) { + int nr = MIN2( count - i, 1024 ); + + /* snap to prim boundary + */ + nr -= (nr - first) % incr; + + if (!do_draw( draw, start + i, nr )) { + assert(0); + return FALSE; + } + + /* increment allowing for repeated vertices + */ + i += nr - (first - incr); + } + + + debug_printf("%s DONE\n", __FUNCTION__); + return TRUE; +} + + diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 5e7de905c16..e90f37872a1 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -74,9 +74,11 @@ struct vbuf_render { /** * Notify the renderer of the current primitive when it changes. - * Prim is restricted to TRIANGLES, LINES and POINTS. + * Must succeed for TRIANGLES, LINES and POINTS. Other prims at + * the discretion of the driver, for the benefit of the passthrough + * path. */ - void (*set_primitive)( struct vbuf_render *, unsigned prim ); + boolean (*set_primitive)( struct vbuf_render *, unsigned prim ); /** * DrawElements, note indices are ushort: diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 9d5f609220a..eb64f51943b 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -116,7 +116,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, } -static void +static boolean i915_vbuf_render_set_primitive( struct vbuf_render *render, unsigned prim ) { @@ -125,15 +125,17 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, switch(prim) { case PIPE_PRIM_POINTS: i915_render->hwprim = PRIM3D_POINTLIST; - break; + return TRUE; case PIPE_PRIM_LINES: i915_render->hwprim = PRIM3D_LINELIST; - break; + return TRUE; case PIPE_PRIM_TRIANGLES: i915_render->hwprim = PRIM3D_TRILIST; - break; + return TRUE; default: - assert(0); + /* Actually, can handle a lot more just fine... Fixme. + */ + return FALSE; } } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e7d0cb2b870..d940718ed2b 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -101,11 +101,20 @@ sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, } -static void +static boolean sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - cvbr->prim = prim; + if (prim == PIPE_PRIM_TRIANGLES || + prim == PIPE_PRIM_LINES || + prim == PIPE_PRIM_POINTS) { + cvbr->prim = prim; + return TRUE; + } + else { + return FALSE; + } + } -- cgit v1.2.3 From 6b3269900101a4e4745f95028bfc0c7cfced12a8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 17 Mar 2008 23:05:46 +1100 Subject: nv40: a few more fp opcodes --- src/gallium/drivers/nv40/nv40_fragprog.c | 50 ++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 82dbcd3eef2..4fb28a01ea2 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -468,6 +468,34 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, case TGSI_OPCODE_COS: arith(fpc, sat, COS, dst, mask, src[0], none, none); break; + case TGSI_OPCODE_DDX: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDX, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DDY: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDY, dst, mask, src[0], none, none); + } + break; case TGSI_OPCODE_DP3: arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); break; @@ -567,15 +595,33 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, swz(src[0], X, X, X, X), none, none); } break; - case TGSI_OPCODE_SIN: - arith(fpc, sat, SIN, dst, mask, src[0], none, none); + case TGSI_OPCODE_SEQ: + arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SFL: + arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SGE: arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_SGT: + arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SLE: + arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SLT: arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_SNE: + arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_STR: + arith(fpc, sat, STR, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SUB: arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); break; -- cgit v1.2.3 From a33da10b6c999a8ea348789ea13d2147f117a722 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Mon, 17 Mar 2008 18:13:18 +0100 Subject: nv30: only 2 render targets --- src/gallium/drivers/nv30/nv30_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index c63847a087c..21cffc6d274 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -48,7 +48,7 @@ struct nv30_context { unsigned vp_samplers; uint32_t rt_enable; - struct pipe_buffer *rt[4]; + struct pipe_buffer *rt[2]; struct pipe_buffer *zeta; struct { -- cgit v1.2.3 From 3394ba65b10a1ec01345c37b7888e18dcfdbe808 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Mon, 17 Mar 2008 19:03:38 +0100 Subject: nv30: another 2 rt, and set viewport tx origin, so we render at the proper place \o/ --- src/gallium/drivers/nv30/nv30_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 80dfd9c5c0a..b0055892aeb 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -565,7 +565,7 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nv30_context *nv30 = nv30_context(pipe); - struct pipe_surface *rt[4], *zeta = NULL; + struct pipe_surface *rt[2], *zeta = NULL; uint32_t rt_enable, rt_format, w = 0, h = 0; int i, colour_format = 0, zeta_format = 0; @@ -667,6 +667,8 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); OUT_RING (((w - 1) << 16) | 0); OUT_RING (((h - 1) << 16) | 0); + BEGIN_RING(rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1); + OUT_RING (0); } static void -- cgit v1.2.3 From 1936e4bdfd776f78f9fe44f77ce66066fd166360 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Mon, 17 Mar 2008 15:45:52 -0700 Subject: cell: Initial code-gen for alpha / stencil / depth testing Alpha test is currently broken because all per-fragment testing occurs before alpha is calculated. Stencil test is currently broken because the Z-clear code asserts if there is a stencil buffer. --- src/gallium/drivers/cell/common.h | 10 + src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_context.h | 25 +- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 37 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 24 +- .../drivers/cell/ppu/cell_state_per_fragment.c | 1020 ++++++++++++++++++++ .../drivers/cell/ppu/cell_state_per_fragment.h | 35 + src/gallium/drivers/cell/spu/Makefile | 1 + src/gallium/drivers/cell/spu/spu_main.c | 25 +- src/gallium/drivers/cell/spu/spu_main.h | 16 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 191 ++++ src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 32 + src/gallium/drivers/cell/spu/spu_render.c | 4 +- src/gallium/drivers/cell/spu/spu_tri.c | 23 +- src/gallium/drivers/cell/spu/spu_ztest.h | 135 --- 15 files changed, 1409 insertions(+), 170 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_state_per_fragment.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_per_fragment.h create mode 100644 src/gallium/drivers/cell/spu/spu_per_fragment_op.c create mode 100644 src/gallium/drivers/cell/spu/spu_per_fragment_op.h delete mode 100644 src/gallium/drivers/cell/spu/spu_ztest.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 9a4004535ea..fe93fd8e1a2 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -104,6 +104,16 @@ +/** + */ +struct cell_command_depth_stencil_alpha_test { + uint64_t base; /**< Effective address of code start. */ + unsigned size; /**< Size in bytes of test code. */ + unsigned read_depth; /**< Flag: should depth be read? */ + unsigned read_stencil; /**< Flag: should stencil be read? */ +}; + + /** * Tell SPUs about the framebuffer size, location */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index d38fa6ce073..0389a9554cf 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -27,6 +27,7 @@ SOURCES = \ cell_flush.c \ cell_state_derived.c \ cell_state_emit.c \ + cell_state_per_fragment.c \ cell_state_shader.c \ cell_pipe_state.c \ cell_screen.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index b221424323f..9e79db0acef 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -57,16 +57,37 @@ struct cell_fragment_shader_state }; +struct cell_blend_state { + struct pipe_blend_state base; + + /** + * Generated code to perform alpha blending + */ + struct spe_function code; +}; + + +struct cell_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + + /** + * Generated code to perform alpha, stencil, and depth testing on the SPE + */ + struct spe_function code; + +}; + + struct cell_context { struct pipe_context pipe; struct cell_winsys *winsys; - const struct pipe_blend_state *blend; + const struct cell_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; uint num_samplers; - const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct cell_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; const struct cell_vertex_shader_state *vs; const struct cell_fragment_shader_state *fs; diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 025ed3bbbfe..66ede99d13b 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -36,6 +36,7 @@ #include "cell_context.h" #include "cell_state.h" #include "cell_texture.h" +#include "cell_state_per_fragment.h" @@ -43,7 +44,12 @@ static void * cell_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { - return mem_dup(blend, sizeof(*blend)); + struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state)); + + (void) memcpy(cb, blend, sizeof(*blend)); + cb->code.store = NULL; + + return cb; } @@ -54,7 +60,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend) draw_flush(cell->draw); - cell->blend = (const struct pipe_blend_state *)blend; + cell->blend = (const struct cell_blend_state *)blend; cell->dirty |= CELL_NEW_BLEND; } @@ -63,7 +69,10 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend) static void cell_delete_blend_state(struct pipe_context *pipe, void *blend) { - FREE(blend); + struct cell_blend_state *cb = (struct cell_blend_state *) blend; + + spe_release_func(& cb->code); + FREE(cb); } @@ -87,7 +96,13 @@ static void * cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { - return mem_dup(depth_stencil, sizeof(*depth_stencil)); + struct cell_depth_stencil_alpha_state *cdsa = + MALLOC(sizeof(struct cell_depth_stencil_alpha_state)); + + (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil)); + cdsa->code.store = NULL; + + return cdsa; } @@ -96,12 +111,16 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth_stencil) { struct cell_context *cell = cell_context(pipe); + struct cell_depth_stencil_alpha_state *cdsa = + (struct cell_depth_stencil_alpha_state *) depth_stencil; draw_flush(cell->draw); - cell->depth_stencil - = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; + if (cdsa->code.store == NULL) { + cell_generate_depth_stencil_test(cdsa); + } + cell->depth_stencil = cdsa; cell->dirty |= CELL_NEW_DEPTH_STENCIL; } @@ -109,7 +128,11 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, static void cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) { - FREE(depth); + struct cell_depth_stencil_alpha_state *cdsa = + (struct cell_depth_stencil_alpha_state *) depth; + + spe_release_func(& cdsa->code); + FREE(cdsa); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 670eb26bdd0..c8d5fdf709b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -71,9 +71,27 @@ cell_emit_state(struct cell_context *cell) } if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { - emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, - cell->depth_stencil, - sizeof(struct pipe_depth_stencil_alpha_state)); + struct cell_command_depth_stencil_alpha_test dsat; + + + dsat.base = (intptr_t) cell->depth_stencil->code.store; + dsat.size = (char *) cell->depth_stencil->code.csr + - (char *) cell->depth_stencil->code.store; + dsat.read_depth = TRUE; + dsat.read_stencil = FALSE; + + { + uint32_t *p = cell->depth_stencil->code.store; + + printf("\t.text\n"); + for (/* empty */; p < cell->depth_stencil->code.csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } + + emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, + sizeof(dsat)); } if (cell->dirty & CELL_NEW_SAMPLER) { diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c new file mode 100644 index 00000000000..60b64438d80 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -0,0 +1,1020 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Generate code to perform all per-fragment operations. + * + * Code generated by these functions perform both alpha, depth, and stencil + * testing as well as alpha blending. + * + * \note + * Occlusion query is not supported, but this is the right place to add that + * support. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "cell_context.h" + +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Generate code to perform alpha testing. + * + * The code generated by this function uses the register specificed by + * \c mask as both an input and an output. + * + * \param dsa Current alpha-test state + * \param f Function to which code should be appended + * \param mask Index of register containing active fragment mask + * \param alphas Index of register containing per-fragment alpha values + * + * \note Emits a maximum of 6 instructions. + */ +static void +emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int alphas) +{ + /* If the alpha function is either NEVER or ALWAYS, there is no need to + * load the reference value into a register. ALWAYS is a fairly common + * case, and this optimization saves 2 instructions. + */ + if (dsa->alpha.enabled + && (dsa->alpha.func != PIPE_FUNC_NEVER) + && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + int ref = spe_allocate_available_register(f); + int tmp_a = spe_allocate_available_register(f); + int tmp_b = spe_allocate_available_register(f); + union { + float f; + unsigned u; + } ref_val; + boolean complement = FALSE; + + ref_val.f = dsa->alpha.ref; + + spe_il(f, ref, ref_val.u & 0x0000ffff); + spe_ilh(f, ref, ref_val.u >> 16); + + switch (dsa->alpha.func) { + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_EQUAL: + spe_fceq(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GREATER: + spe_fcgt(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GEQUAL: + spe_fcgt(f, tmp_a, ref, alphas); + spe_fceq(f, tmp_b, ref, alphas); + spe_or(f, tmp_a, tmp_b, tmp_a); + break; + + case PIPE_FUNC_ALWAYS: + case PIPE_FUNC_NEVER: + default: + assert(0); + break; + } + + if (complement) { + spe_andc(f, mask, mask, tmp_a); + } else { + spe_and(f, mask, mask, tmp_a); + } + + spe_release_register(f, ref); + spe_release_register(f, tmp_a); + spe_release_register(f, tmp_b); + } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { + spe_il(f, mask, 0); + } +} + + +/** + * \param dsa Current depth-test state + * \param f Function to which code should be appended + * \param m Mask of allocated / free SPE registers + * \param mask Index of register to contain depth-pass mask + * \param stored Index of register containing values from depth buffer + * \param calculated Index of register containing per-fragment depth values + * + * \return + * If the calculated depth comparison mask is the actual mask, \c FALSE is + * returned. If the calculated depth comparison mask is the compliment of + * the actual mask, \c TRUE is returned. + * + * \note Emits a maximum of 3 instructions. + */ +static boolean +emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int stored, int calculated) +{ + unsigned func = (dsa->depth.enabled) + ? dsa->depth.func : PIPE_FUNC_ALWAYS; + int tmp = spe_allocate_available_register(f); + boolean compliment = FALSE; + + switch (func) { + case PIPE_FUNC_NEVER: + spe_il(f, mask, 0); + break; + + case PIPE_FUNC_NOTEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + spe_ceq(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + spe_clgt(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LESS: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + spe_clgt(f, mask, calculated, stored); + spe_ceq(f, tmp, calculated, stored); + spe_or(f, mask, mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + spe_il(f, mask, ~0); + break; + + default: + assert(0); + break; + } + + spe_release_register(f, tmp); + return compliment; +} + + +/** + * \note Emits a maximum of 5 instructions. + */ +static void +emit_stencil_op(struct spe_function *f, + int out, int in, int mask, unsigned op, unsigned ref) +{ + const int clamp = spe_allocate_available_register(f); + const int tmp = spe_allocate_available_register(f); + + switch(op) { + case PIPE_STENCIL_OP_KEEP: + assert(0); + case PIPE_STENCIL_OP_ZERO: + spe_il(f, out, 0); + break; + case PIPE_STENCIL_OP_REPLACE: + spe_il(f, out, ref); + break; + case PIPE_STENCIL_OP_INCR: + spe_il(f, clamp, 0x0ff); + spe_ai(f, out, in, 1); + spe_cgti(f, tmp, out, clamp); + spe_selb(f, out, out, clamp, tmp); + break; + case PIPE_STENCIL_OP_DECR: + spe_il(f, clamp, 0); + spe_ai(f, out, in, -1); + spe_cgti(f, tmp, out, clamp); + spe_selb(f, out, clamp, out, tmp); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + spe_ai(f, out, in, 1); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + spe_ai(f, out, in, -1); + break; + case PIPE_STENCIL_OP_INVERT: + spe_nor(f, out, in, in); + break; + default: + assert(0); + } + + spe_release_register(f, tmp); + spe_release_register(f, clamp); + + spe_selb(f, out, in, out, mask); +} + + +/** + * \param dsa Depth / stencil test state + * \param face 0 for front face, 1 for back face + * \param f Function to append instructions to + * \param reg_mask Mask of allocated registers + * \param mask Register containing mask of fragments passing the + * alpha test + * \param depth_mask Register containing mask of fragments passing the + * depth test + * \param depth_compliment Is \c depth_mask the compliment of the actual mask? + * \param stencil Register containing values from stencil buffer + * \param depth_pass Register to store mask of fragments passing stencil test + * and depth test + * + * \note + * Emits a maximum of 10 + (3 * 5) = 25 instructions. + */ +static int +emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, + unsigned face, + struct spe_function *f, + int mask, + int depth_mask, + boolean depth_complement, + int stencil, + int depth_pass) +{ + int stencil_fail = spe_allocate_available_register(f); + int depth_fail = spe_allocate_available_register(f); + int stencil_mask = spe_allocate_available_register(f); + int stencil_pass = spe_allocate_available_register(f); + int face_stencil = spe_allocate_available_register(f); + int stencil_src = stencil; + const unsigned ref = (dsa->stencil[face].ref_value + & dsa->stencil[face].value_mask); + boolean complement = FALSE; + int stored = spe_allocate_available_register(f); + int tmp = spe_allocate_available_register(f); + + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].value_mask != 0x0ff)) { + spe_andi(f, stored, stencil, dsa->stencil[face].value_mask); + } + + + switch (dsa->stencil[face].func) { + case PIPE_FUNC_NEVER: + spe_il(f, stencil_mask, 0); + break; + + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + spe_ceqi(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + spe_clgti(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + spe_clgti(f, stencil_mask, stored, ref); + spe_ceqi(f, tmp, stored, ref); + spe_or(f, stencil_mask, stencil_mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + /* See comment below. */ + break; + + default: + assert(0); + break; + } + + spe_release_register(f, stored); + spe_release_register(f, tmp); + + + /* ALWAYS is a very common stencil-test, so some effort is applied to + * optimize that case. The stencil-pass mask is the same as the input + * fragment mask. This makes the stencil-test (above) a no-op, and the + * input fragment mask can be "renamed" the stencil-pass mask. + */ + if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { + spe_release_register(f, stencil_pass); + stencil_pass = mask; + } else { + if (complement) { + spe_andc(f, stencil_pass, mask, stencil_mask); + } else { + spe_and(f, stencil_pass, mask, stencil_mask); + } + } + + if (depth_complement) { + spe_andc(f, depth_pass, stencil_pass, depth_mask); + } else { + spe_and(f, depth_pass, stencil_pass, depth_mask); + } + + + /* Conditionally emit code to update the stencil value under various + * condititons. Note that there is no need to generate code under the + * following circumstances: + * + * - Stencil write mask is zero. + * - For stencil-fail if the stencil test is ALWAYS + * - For depth-fail if the stencil test is NEVER + * - For depth-pass if the stencil test is NEVER + * - Any of the 3 conditions if the operation is KEEP + */ + if (dsa->stencil[face].write_mask != 0) { + if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { + if (complement) { + spe_and(f, stencil_fail, mask, stencil_mask); + } else { + spe_andc(f, stencil_fail, mask, stencil_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, + dsa->stencil[face].fail_op, + dsa->stencil[face].ref_value); + + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { + if (depth_complement) { + spe_and(f, depth_fail, stencil_pass, depth_mask); + } else { + spe_andc(f, depth_fail, stencil_pass, depth_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, depth_fail, + dsa->stencil[face].zfail_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { + emit_stencil_op(f, face_stencil, stencil_src, depth_pass, + dsa->stencil[face].zpass_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + } + + spe_release_register(f, stencil_fail); + spe_release_register(f, depth_fail); + spe_release_register(f, stencil_mask); + if (stencil_pass != mask) { + spe_release_register(f, stencil_pass); + } + + /* If all of the stencil operations were KEEP or the stencil write mask was + * zero, "stencil_src" will still be set to "stencil". In this case + * release the "face_stencil" register. Otherwise apply the stencil write + * mask to select bits from the calculated stencil value and the previous + * stencil value. + */ + if (stencil_src == stencil) { + spe_release_register(f, face_stencil); + } else if (dsa->stencil[face].write_mask != 0x0ff) { + int tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, dsa->stencil[face].write_mask); + spe_selb(f, stencil_src, stencil, stencil_src, tmp); + + spe_release_register(f, tmp); + } + + return stencil_src; +} + + +void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) +{ + struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; + struct spe_function *const f = &cdsa->code; + + /* This code generates a maximum of 6 (alpha test) + 3 (depth test) + * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round + * up to 64 to make it a happy power-of-two. + */ + spe_init_func(f, 4 * 64); + + + /* Allocate registers for the function's input parameters. Cleverly (and + * clever code is usually dangerous, but I couldn't resist) the generated + * function returns a structure. Returned structures start with register + * 3, and the structure fields are ordered to match up exactly with the + * input parameters. + */ + int mask = spe_allocate_register(f, 3); + int depth = spe_allocate_register(f, 4); + int stencil = spe_allocate_register(f, 5); + int zvals = spe_allocate_register(f, 6); + int frag_a = spe_allocate_register(f, 7); + int facing = spe_allocate_register(f, 8); + + int depth_mask = spe_allocate_available_register(f); + + boolean depth_complement; + + + emit_alpha_test(dsa, f, mask, frag_a); + + depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); + + if (dsa->stencil[0].enabled) { + const int front_depth_pass = spe_allocate_available_register(f); + int front_stencil = emit_stencil_test(dsa, 0, f, mask, + depth_mask, depth_complement, + stencil, front_depth_pass); + + if (dsa->stencil[1].enabled) { + const int back_depth_pass = spe_allocate_available_register(f); + int back_stencil = emit_stencil_test(dsa, 1, f, mask, + depth_mask, depth_complement, + stencil, back_depth_pass); + + /* If the front facing stencil value and the back facing stencil + * value are stored in the same register, there is no need to select + * a value based on the facing. This can happen if the stencil value + * was not modified due to the write masks being zero, the stencil + * operations being KEEP, etc. + */ + if (front_stencil != back_stencil) { + spe_selb(f, stencil, back_stencil, front_stencil, facing); + } + + if (back_stencil != stencil) { + spe_release_register(f, back_stencil); + } + + if (front_stencil != stencil) { + spe_release_register(f, front_stencil); + } + + spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); + + spe_release_register(f, back_depth_pass); + } else { + if (front_stencil != stencil) { + spe_or(f, stencil, front_stencil, front_stencil); + spe_release_register(f, front_stencil); + } + } + + spe_release_register(f, front_depth_pass); + } else if (dsa->depth.enabled) { + if (depth_complement) { + spe_andc(f, mask, mask, depth_mask); + } else { + spe_and(f, mask, mask, depth_mask); + } + } + + if (dsa->depth.writemask) { + spe_selb(f, depth, depth, zvals, mask); + } + + spe_bi(f, 0, 0, 0); +} + + +/** + * \note Emits a maximum of 3 instructions + */ +static int +emit_alpha_factor_calculation(struct spe_function *f, + unsigned factor, float const_alpha, + int src_alpha, int dst_alpha) +{ + union { + float f; + unsigned u; + } alpha; + int factor_reg; + int tmp; + + + alpha.f = const_alpha; + + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor_reg = spe_allocate_available_register(f); + + spe_or(f, factor_reg, src_alpha, src_alpha); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor_reg = dst_alpha; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + const_alpha = 1.0 - const_alpha; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor_reg = spe_allocate_available_register(f); + + spe_il(f, factor_reg, alpha.u & 0x0ffff); + spe_ilh(f, factor_reg, alpha.u >> 16); + break; + + case PIPE_BLENDFACTOR_ZERO: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, src_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, dst_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + factor_reg = -1; + break; + } + + return factor_reg; +} + + +/** + * \note Emits a maximum of 5 instructions + */ +static void +emit_color_factor_calculation(struct spe_function *f, + unsigned sF, unsigned mask, + const struct pipe_blend_color *blend_color, + const int *src, + const int *dst, + int *factor) +{ + union { + float f[4]; + unsigned u[4]; + } color; + int tmp; + unsigned i; + + + color.f[0] = blend_color->color[0]; + color.f[1] = blend_color->color[1]; + color.f[2] = blend_color->color[2]; + color.f[3] = blend_color->color[3]; + + factor[0] = -1; + factor[1] = -1; + factor[2] = -1; + factor[3] = -1; + + switch (sF) { + case PIPE_BLENDFACTOR_ONE: + break; + + case PIPE_BLENDFACTOR_SRC_COLOR: + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_or(f, factor[i], src[i], src[i]); + } + } + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_or(f, factor[0], src[3], src[3]); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor[0] = dst[3]; + factor[1] = dst[3]; + factor[2] = dst[3]; + break; + + case PIPE_BLENDFACTOR_DST_COLOR: + factor[0] = dst[0]; + factor[1] = dst[1]; + factor[2] = dst[2]; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + /* Alpha saturate means min(As, 1-Ad). + */ + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, tmp, tmp, dst[3]); + spe_fcgt(f, factor[0], tmp, src[3]); + spe_selb(f, factor[0], src[3], tmp, factor[0]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + color.f[0] = 1.0 - color.f[0]; + color.f[1] = 1.0 - color.f[1]; + color.f[2] = 1.0 - color.f[2]; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_COLOR: + for (i = 0; i < 3; i++) { + factor[i] = spe_allocate_available_register(f); + + spe_il(f, factor[i], color.u[i] & 0x0ffff); + spe_ilh(f, factor[i], color.u[i] >> 16); + } + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + color.f[3] = 1.0 - color.f[3]; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, factor[0], color.u[3] & 0x0ffff); + spe_ilh(f, factor[0], color.u[3] >> 16); + break; + + case PIPE_BLENDFACTOR_ZERO: + break; + + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, src[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, src[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, dst[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, dst[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + } +} + + +static void +emit_blend_calculation(struct spe_function *f, + unsigned func, unsigned sF, unsigned dF, + int src, int src_factor, int dst, int dst_factor) +{ + int tmp = spe_allocate_available_register(f); + + switch (func) { + case PIPE_BLEND_ADD: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fa(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fma(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, dst); + } + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fms(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_REVERSE_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, src); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, dst, src); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } + } else { + spe_fm(f, tmp, src, src_factor); + spe_fms(f, src, src, dst_factor, tmp); + } + break; + + case PIPE_BLEND_MIN: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, dst, src, tmp); + break; + + case PIPE_BLEND_MAX: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, src, dst, tmp); + break; + + default: + assert(0); + } + + spe_release_register(f, tmp); +} + + +/** + * Generate code to perform alpha blending on the SPE + */ +void +cell_generate_alpha_blend(struct cell_blend_state *cb, + const struct pipe_blend_color *blend_color) +{ + struct pipe_blend_state *const b = &cb->base; + struct spe_function *const f = &cb->code; + + /* This code generates a maximum of 3 (source alpha factor) + * + 3 (destination alpha factor) + (3 * 5) (source color factor) + * + (3 * 5) (destination color factor) + (4 * 2) (blend equation) + * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to + * make it a happy power-of-two. + */ + spe_init_func(f, 4 * 64); + + + const int frag[4] = { + spe_allocate_register(f, 3), + spe_allocate_register(f, 4), + spe_allocate_register(f, 5), + spe_allocate_register(f, 6), + }; + const int pixel[4] = { + spe_allocate_register(f, 7), + spe_allocate_register(f, 8), + spe_allocate_register(f, 9), + spe_allocate_register(f, 10), + }; + const int mask = spe_allocate_register(f, 11); + unsigned func[4]; + unsigned sF[4]; + unsigned dF[4]; + unsigned i; + int src_factor[4]; + int dst_factor[4]; + + + /* Does the selected blend mode make use of the source / destination + * color (RGB) blend factors? + */ + boolean need_color_factor = b->blend_enable + && (b->rgb_func != PIPE_BLEND_MIN) + && (b->rgb_func != PIPE_BLEND_MAX); + + /* Does the selected blend mode make use of the source / destination + * alpha blend factors? + */ + boolean need_alpha_factor = b->blend_enable + && (b->alpha_func != PIPE_BLEND_MIN) + && (b->alpha_func != PIPE_BLEND_MAX); + + + sF[0] = b->rgb_src_factor; + sF[1] = sF[0]; + sF[2] = sF[0]; + sF[3] = (b->alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) + ? PIPE_BLENDFACTOR_ONE : b->alpha_src_factor; + + dF[0] = b->rgb_dst_factor; + dF[1] = dF[0]; + dF[2] = dF[0]; + dF[3] = b->rgb_dst_factor; + + + /* If alpha writing is enabled and the alpha blend mode requires use of + * the alpha factor, calculate the alpha factor. + */ + if (((b->colormask & 8) != 0) && need_alpha_factor) { + src_factor[3] = emit_alpha_factor_calculation(f, sF[3], + blend_color->color[3], + frag[3], pixel[3]); + + /* If the alpha destination blend factor is the same as the alpha source + * blend factor, re-use the previously calculated value. + */ + dst_factor[3] = (dF[3] == sF[3]) + ? src_factor[3] + : emit_alpha_factor_calculation(f, dF[3], + blend_color->color[3], + frag[3], pixel[3]); + } + + + if (sF[0] == sF[3]) { + src_factor[0] = src_factor[3]; + src_factor[1] = src_factor[3]; + src_factor[2] = src_factor[3]; + } else if (sF[0] == dF[3]) { + src_factor[0] = dst_factor[3]; + src_factor[1] = dst_factor[3]; + src_factor[2] = dst_factor[3]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_src_factor, + b->colormask, + blend_color, + frag, pixel, src_factor); + } + + + if (dF[0] == sF[3]) { + dst_factor[0] = src_factor[3]; + dst_factor[1] = src_factor[3]; + dst_factor[2] = src_factor[3]; + } else if (dF[0] == dF[3]) { + dst_factor[0] = dst_factor[3]; + dst_factor[1] = dst_factor[3]; + dst_factor[2] = dst_factor[3]; + } else if (dF[0] == sF[0]) { + dst_factor[0] = src_factor[0]; + dst_factor[1] = src_factor[1]; + dst_factor[2] = src_factor[2]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_dst_factor, + b->colormask, + blend_color, + frag, pixel, dst_factor); + } + + + + func[0] = b->rgb_func; + func[1] = func[0]; + func[2] = func[0]; + func[3] = b->alpha_func; + + for (i = 0; i < 4; ++i) { + if ((b->colormask & (1U << i)) != 0) { + emit_blend_calculation(f, + func[i], sF[i], dF[i], + frag[i], src_factor[i], + pixel[i], dst_factor[i]); + spe_selb(f, frag[i], pixel[i], frag[i], mask); + } else { + spe_or(f, frag[i], pixel[i], pixel[i]); + } + } + + spe_bi(f, 0, 0, 0); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h new file mode 100644 index 00000000000..541c3b3be07 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -0,0 +1,35 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef CELL_STATE_PER_FRAGMENT_H +#define CELL_STATE_PER_FRAGMENT_H + +extern void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); + +extern void +cell_generate_alpha_blend(struct cell_blend_state *cb, + const struct pipe_blend_color *blend_color); + +#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index c071de1900b..115ca8cd901 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -19,6 +19,7 @@ SOURCES = \ spu_main.c \ spu_blend.c \ spu_dcache.c \ + spu_per_fragment_op.c \ spu_render.c \ spu_texture.c \ spu_tile.c \ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 59300028d4d..8e46f6e4719 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -58,6 +58,9 @@ struct spu_vs_context draw; static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] ALIGN16_ATTRIB; +static unsigned char depth_stencil_code_buffer[4 * 64] + ALIGN16_ATTRIB; + /** * Tell the PPU that this SPU has finished copying a buffer to * local store and that it may be reused by the PPU. @@ -248,14 +251,26 @@ cmd_state_blend(const struct pipe_blend_state *state) static void -cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state) +cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state) { if (Debug) printf("SPU %u: DEPTH_STENCIL: ztest %d\n", spu.init.id, - state->depth.enabled); + state->read_depth); + + ASSERT_ALIGN16(state->base); + + mfc_get(depth_stencil_code_buffer, + (unsigned int) state->base, /* src */ + ROUNDUP16(state->size), + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); - memcpy(&spu.depth_stencil, state, sizeof(*state)); + spu.frag_test = (frag_test_func) depth_stencil_code_buffer; + spu.read_depth = state->read_depth; + spu.read_stencil = state->read_stencil; } @@ -415,9 +430,9 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); break; case CELL_CMD_STATE_DEPTH_STENCIL: - cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) + cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8); break; case CELL_CMD_STATE_SAMPLER: cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index a13edd17029..444e2186452 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -56,6 +56,17 @@ typedef union { #define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ +struct spu_frag_test_results { + qword mask; + qword depth; + qword stencil; +}; + +typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask, + qword pixel_depth, qword pixel_stencil, qword frag_depth, + qword frag_alpha, qword facing); + + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -79,8 +90,9 @@ struct spu_global struct cell_init_info init; struct spu_framebuffer fb; - struct pipe_blend_state blend_stencil; - struct pipe_depth_stencil_alpha_state depth_stencil; + boolean read_depth; + boolean read_stencil; + frag_test_func frag_test; struct pipe_blend_state blend; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct cell_command_texture texture; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c new file mode 100644 index 00000000000..d42b522b41d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -0,0 +1,191 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file spu_per_fragment_op.c + * SPU implementation various per-fragment operations. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_format.h" +#include "spu_main.h" +#include "spu_per_fragment_op.h" + +#define ZERO 0x80 + +static void +read_ds_quad(tile_t *buffer, unsigned x, unsigned y, + enum pipe_format depth_format, qword *depth, + qword *stencil) +{ + const int ix = x / 2; + const int iy = y / 2; + + switch (depth_format) { + case PIPE_FORMAT_Z16_UNORM: { + qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; + + const qword shuf_vec = (qword) { + ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, + ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7 + }; + + + /* At even X values we want the first 4 shorts, and at odd X values we + * want the second 4 shorts. + */ + qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3)); + qword bias_mask = si_fsmbi(0x3333); + qword sv = si_a(shuf_vec, si_and(bias_mask, bias)); + + *depth = si_shufb(*ptr, *ptr, sv); + *stencil = si_il(0); + break; + } + + + case PIPE_FORMAT_Z32_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + + *depth = *ptr; + *stencil = si_il(0); + break; + } + + + case PIPE_FORMAT_Z24S8_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0x7777); + + *depth = si_and(*ptr, mask); + *stencil = si_rotmai(si_andc(*ptr, mask), -24); + break; + } + + + default: + assert(0); + break; + } +} + + +static void +write_ds_quad(tile_t *buffer, unsigned x, unsigned y, + enum pipe_format depth_format, + qword depth, qword stencil) +{ + const int ix = x / 2; + const int iy = y / 2; + + (void) stencil; + + switch (depth_format) { + case PIPE_FORMAT_Z16_UNORM: { + qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; + + qword sv = ((ix & 0x01) == 0) + ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15, + 24, 25, 26, 27, 28, 29, 30, 31 } + : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23, + 2, 3, 6, 7, 10, 11, 14, 15 }; + *ptr = si_shufb(depth, *ptr, sv); + break; + } + + + case PIPE_FORMAT_Z32_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + *ptr = depth; + break; + } + + + case PIPE_FORMAT_Z24S8_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0x7777); + + stencil = si_rotmai(stencil, 24); + *ptr = si_selb(stencil, depth, mask); + break; + } + + + default: + assert(0); + break; + } +} + + +qword +spu_do_depth_stencil(int x, int y, + qword frag_mask, qword frag_depth, qword frag_alpha, + qword facing) +{ + struct spu_frag_test_results result; + qword pixel_depth; + qword pixel_stencil; + + /* All of this preable code (everthing before the call to frag_test) should + * be generated on the PPU and upload to the SPU. + */ + if (spu.read_depth || spu.read_stencil) { + read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, + &pixel_depth, &pixel_stencil); + } + + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z16_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + case PIPE_FORMAT_Z32_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + case PIPE_FORMAT_Z24S8_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + default: + assert(0); + break; + } + + result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil, + frag_depth, frag_alpha, facing); + + + /* This code (everthing after the call to frag_test) should + * be generated on the PPU and upload to the SPU. + */ + if (spu.read_depth || spu.read_stencil) { + write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, + result.depth, result.stencil); + } + + return result.mask; +} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h new file mode 100644 index 00000000000..65712586992 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -0,0 +1,32 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SPU_PER_FRAGMENT_OP +#define SPU_PER_FRAGMENT_OP + +extern qword +spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, + qword frag_alpha, qword facing); + +#endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 20e77aa2e63..6df59abd36d 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -98,7 +98,7 @@ my_tile(uint tx, uint ty) static INLINE void get_cz_tiles(uint tx, uint ty) { - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); @@ -153,7 +153,7 @@ static INLINE void wait_put_cz_tiles(void) { wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { wait_on_mask(1 << TAG_WRITE_TILE_Z); } } diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index be9624cf7d9..81823f24633 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -38,8 +38,7 @@ #include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" - -#include "spu_ztest.h" +#include "spu_per_fragment_op.h" /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ @@ -264,16 +263,12 @@ do_depth_test(int x, int y, mask_t quadmask) zvals.v = eval_z((float) x, (float) y); - if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - int ix = (x - setup.cliprect_minx) / 4; - int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); - } - else { - int ix = (x - setup.cliprect_minx) / 2; - int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); - } + mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx, + y - setup.cliprect_miny, + (qword) quadmask, + (qword) zvals.v, + (qword) spu_splats((unsigned char) 0x0ffu), + (qword) spu_splats((unsigned int) 0x01u)); if (spu_extract(spu_orx(mask), 0)) spu.cur_ztile_status = TILE_STATUS_DIRTY; @@ -299,7 +294,7 @@ emit_quad( int x, int y, mask_t mask ) sp->quad.first->run(sp->quad.first, &setup.quad); #else - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { mask = do_depth_test(x, y, mask); } @@ -434,7 +429,7 @@ static void flush_spans( void ) } ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ //printf("SPU: %u: waiting for ztile\n", spu.init.id); diff --git a/src/gallium/drivers/cell/spu/spu_ztest.h b/src/gallium/drivers/cell/spu/spu_ztest.h deleted file mode 100644 index ce8ad003393..00000000000 --- a/src/gallium/drivers/cell/spu/spu_ztest.h +++ /dev/null @@ -1,135 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Zbuffer/depth test code. - */ - - -#ifndef SPU_ZTEST_H -#define SPU_ZTEST_H - - -#ifdef __SPU__ -#include <spu_intrinsics.h> -#endif - - - -/** - * Perform Z testing for a 16-bit/value Z buffer. - * - * \param zvals vector of four fragment zvalues as floats - * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this - * contains the Z values for 2 quads, 8 pixels. - * \param x x coordinate of quad (only lsbit is significant) - * \param inMask indicates which fragments in the quad are alive - * \return new mask indicating which fragments are alive after ztest - */ -static INLINE vector unsigned int -spu_z16_test_less(vector float zvals, vector unsigned short *zbuf, - uint x, vector unsigned int inMask) -{ -#define ZERO 0x80 - vector unsigned int zvals_ui4, zbuf_ui4, mask; - - /* convert floats to uints in [0, 65535] */ - zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */ - zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */ - - /* XXX this conditional could be removed with a bit of work */ - if (x & 1) { - /* convert zbuffer values from ushorts to uints */ - /* gather lower four ushorts */ - zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, - (vector unsigned int) *zbuf, - ((vector unsigned char) { - ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11, - ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15})); - /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf_ui4, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); - /* convert zbuffer values from uints back to ushorts, preserve lower 4 */ - *zbuf = (vector unsigned short) - spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, - ((vector unsigned char) { - 16, 17, 18, 19, 20, 21, 22, 23, - 2, 3, 6, 7, 10, 11, 14, 15})); - } - else { - /* convert zbuffer values from ushorts to uints */ - /* gather upper four ushorts */ - zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, - (vector unsigned int) *zbuf, - ((vector unsigned char) { - ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, - ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7})); - /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf_ui4, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); - /* convert zbuffer values from uints back to ushorts, preserve upper 4 */ - *zbuf = (vector unsigned short) - spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, - ((vector unsigned char) { - 2, 3, 6, 7, 10, 11, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31})); - } - return mask; -#undef ZERO -} - - -/** - * As above, but Zbuffer values as 32-bit uints - */ -static INLINE vector unsigned int -spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr, - vector unsigned int inMask) -{ - vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr; - - /* convert floats to uints in [0, 0xffffffff] */ - zvals_ui4 = spu_convtu(zvals, 32); - /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask); - - return mask; -} - - -#endif /* SPU_ZTEST_H */ -- cgit v1.2.3 From 9f93e6701902312edf48821bb4c0558c3d62aaa3 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Mon, 17 Mar 2008 16:09:28 -0700 Subject: cell: Don't segfault when unbinding alpha / stencil / depth test state --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 17 ++++++++++++----- src/gallium/drivers/cell/spu/spu_main.c | 23 ++++++++++++++++------- 3 files changed, 29 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 66ede99d13b..c880760e4bd 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -116,7 +116,7 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, draw_flush(cell->draw); - if (cdsa->code.store == NULL) { + if ((cdsa != NULL) && (cdsa->code.store == NULL)) { cell_generate_depth_stencil_test(cdsa); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index c8d5fdf709b..e2cc9de48ad 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -74,11 +74,18 @@ cell_emit_state(struct cell_context *cell) struct cell_command_depth_stencil_alpha_test dsat; - dsat.base = (intptr_t) cell->depth_stencil->code.store; - dsat.size = (char *) cell->depth_stencil->code.csr - - (char *) cell->depth_stencil->code.store; - dsat.read_depth = TRUE; - dsat.read_stencil = FALSE; + if (cell->depth_stencil != NULL) { + dsat.base = (intptr_t) cell->depth_stencil->code.store; + dsat.size = (char *) cell->depth_stencil->code.csr + - (char *) cell->depth_stencil->code.store; + dsat.read_depth = TRUE; + dsat.read_stencil = FALSE; + } else { + dsat.base = 0; + dsat.size = 0; + dsat.read_depth = FALSE; + dsat.read_stencil = FALSE; + } { uint32_t *p = cell->depth_stencil->code.store; diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 8e46f6e4719..b8bb8e9449b 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -260,13 +260,22 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat ASSERT_ALIGN16(state->base); - mfc_get(depth_stencil_code_buffer, - (unsigned int) state->base, /* src */ - ROUNDUP16(state->size), - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); + if (state->size != 0) { + mfc_get(depth_stencil_code_buffer, + (unsigned int) state->base, /* src */ + ROUNDUP16(state->size), + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + } else { + /* If there is no code, emit a return instruction. + */ + depth_stencil_code_buffer[0] = 0x35; + depth_stencil_code_buffer[1] = 0x00; + depth_stencil_code_buffer[2] = 0x00; + depth_stencil_code_buffer[3] = 0x00; + } spu.frag_test = (frag_test_func) depth_stencil_code_buffer; spu.read_depth = state->read_depth; -- cgit v1.2.3 From a514aeb77899816d82c5b31f3bf2206d82d68893 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Tue, 18 Mar 2008 13:20:59 +0100 Subject: nv30: update miptree stuff for texturing --- src/gallium/drivers/nv30/nv30_context.c | 1 + src/gallium/drivers/nv30/nv30_context.h | 4 +++- src/gallium/drivers/nv30/nv30_miptree.c | 22 ++++++++++++++++------ src/gallium/drivers/nv30/nv30_screen.c | 3 ++- 4 files changed, 22 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index b3906e28e3f..cdd662a9f19 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -234,6 +234,7 @@ nv30_create(struct pipe_screen *screen, unsigned pctx_id) nv30_init_query_functions(nv30); nv30_init_surface_functions(nv30); nv30_init_state_functions(nv30); + nv30_init_miptree_functions(nv30); nv30->draw = draw_create(); assert(nv30->draw); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 21cffc6d274..0ee6cfdb339 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -91,9 +91,11 @@ nv30_context(struct pipe_context *pipe) extern void nv30_init_state_functions(struct nv30_context *nv30); extern void nv30_init_surface_functions(struct nv30_context *nv30); -extern void nv30_init_miptree_functions(struct pipe_screen *screen); +extern void nv30_init_miptree_functions(struct nv30_context *nv30); extern void nv30_init_query_functions(struct nv30_context *nv30); +extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); + /* nv30_draw.c */ extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 19945e9ab87..f5659353ea5 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -55,7 +55,7 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) } static struct pipe_texture * -nv30_miptree_create(struct pipe_screen *screen, struct pipe_texture *pt) +nv30_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) { struct pipe_winsys *ws = screen->winsys; struct nv30_miptree *mt; @@ -99,6 +99,11 @@ nv30_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) } } +static void +nv30_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) +{ +} + static struct pipe_surface * nv30_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) @@ -128,13 +133,18 @@ nv30_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return ps; } + void -nv30_init_miptree_functions(struct pipe_screen *screen) +nv30_init_miptree_functions(struct nv30_context *nv30) { - struct nv30_screen *nv30screen = nv30_screen(screen); + nv30->pipe.texture_update = nv30_miptree_update; +} - nv30screen->screen.texture_create = nv30_miptree_create; - nv30screen->screen.texture_release = nv30_miptree_release; - nv30screen->screen.get_tex_surface = nv30_miptree_surface_get; +void +nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv30_miptree_create; + pscreen->texture_release = nv30_miptree_release; + pscreen->get_tex_surface = nv30_miptree_surface_get; } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 39f2ac1af5c..3ca50e4fbf4 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -147,7 +147,8 @@ nv30_screen_create(struct pipe_winsys *winsys, struct nouveau_winsys *nvws, nv30screen->screen.is_format_supported = nv30_screen_is_format_supported; - nv30_init_miptree_functions(&nv30screen->screen); + nv30_screen_init_miptree_functions(&nv30screen->screen); + return &nv30screen->screen; } -- cgit v1.2.3 From e4b5008de46659f13fca7dcd1b587504e9d4484c Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Tue, 18 Mar 2008 13:34:26 +0100 Subject: nv30: hmm, no buffer for texture --- src/gallium/drivers/nv30/nv30_state_emit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 70b98836f0f..2cb4692e176 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -64,6 +64,8 @@ nv30_emit_hw_state(struct nv30_context *nv30) for (i = 0; i < 16; i++) { if (!(nv30->fp_samplers & (1 << i))) continue; + if (!nv30->tex[i].buffer) + continue; BEGIN_RING(rankine, NV34TCL_TX_OFFSET(i), 2); OUT_RELOCl(nv30->tex[i].buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); -- cgit v1.2.3 From f432ac5a1126997f10072760fe5afbf1b96f44f7 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Tue, 18 Mar 2008 10:23:39 -0700 Subject: cell: Move and (conditionally) silence debug code --- src/gallium/drivers/cell/ppu/cell_state_emit.c | 10 ------ .../drivers/cell/ppu/cell_state_per_fragment.c | 39 ++++++++++++++++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index e2cc9de48ad..4d589bcdbf9 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -87,16 +87,6 @@ cell_emit_state(struct cell_context *cell) dsat.read_stencil = FALSE; } - { - uint32_t *p = cell->depth_stencil->code.store; - - printf("\t.text\n"); - for (/* empty */; p < cell->depth_stencil->code.csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } - emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, sizeof(dsat)); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 60b64438d80..0ad9344cd40 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -525,6 +525,45 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) } spe_bi(f, 0, 0, 0); + + +#if 0 + { + const uint32_t *p = f->store; + unsigned i; + + printf("# alpha (%sabled)\n", + (dsa->alpha.enabled) ? "en" : "dis"); + printf("# func: %u\n", dsa->alpha.func); + printf("# ref: %.2f\n", dsa->alpha.ref); + + printf("# depth (%sabled)\n", + (dsa->depth.enabled) ? "en" : "dis"); + printf("# func: %u\n", dsa->depth.func); + + for (i = 0; i < 2; i++) { + printf("# %s stencil (%sabled)\n", + (i == 0) ? "front" : "back", + (dsa->stencil[i].enabled) ? "en" : "dis"); + + printf("# func: %u\n", dsa->stencil[i].func); + printf("# op (sf, zf, zp): %u %u %u\n", + dsa->stencil[i].fail_op, + dsa->stencil[i].zfail_op, + dsa->stencil[i].zpass_op); + printf("# ref value / value mask / write mask: %02x %02x %02x\n", + dsa->stencil[i].ref_value, + dsa->stencil[i].value_mask, + dsa->stencil[i].write_mask); + } + + printf("\t.text\n"); + for (/* empty */; p < f->csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } +#endif } -- cgit v1.2.3 From 5fdaebc51c5433ebc73f89690fd435dae2fcef60 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Tue, 18 Mar 2008 10:26:45 -0700 Subject: cell: Minor changes to make stencil not crash I'm not sure these are quite correct. The reflect demo doesn't assert anymore, but it doesn't produce correct results either. SPE-based vertex shader code needs to be disabled for relfect to run. --- src/gallium/drivers/cell/spu/spu_main.c | 12 +++++++++--- src/gallium/drivers/cell/spu/spu_tile.h | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index b8bb8e9449b..122cf337a68 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -218,12 +218,18 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; - if (spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM) + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: spu.fb.zsize = 4; - else if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) + break; + case PIPE_FORMAT_Z16_UNORM: spu.fb.zsize = 2; - else + break; + default: spu.fb.zsize = 0; + break; + } if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) spu.color_shuffle = ((vector unsigned char) { diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h index 3105b848fdc..1b5491112db 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -56,13 +56,13 @@ clear_c_tile(tile_t *ctile) static INLINE void clear_z_tile(tile_t *ztile) { - if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { + if (spu.fb.zsize == 2) { memset16((ushort*) ztile->us, spu.fb.depth_clear_value, TILE_SIZE * TILE_SIZE); } else { - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); + ASSERT(spu.fb.zsize != 0); memset32((uint*) ztile->ui, spu.fb.depth_clear_value, TILE_SIZE * TILE_SIZE); -- cgit v1.2.3 From f3e686d24a54f65b98c9a61a952577faaf451148 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Tue, 18 Mar 2008 10:29:14 -0700 Subject: cell: Fix added whitespace errors. --- src/gallium/drivers/cell/ppu/cell_state_per_fragment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 0ad9344cd40..11c0aa5b23c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -532,12 +532,12 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) const uint32_t *p = f->store; unsigned i; - printf("# alpha (%sabled)\n", + printf("# alpha (%sabled)\n", (dsa->alpha.enabled) ? "en" : "dis"); printf("# func: %u\n", dsa->alpha.func); printf("# ref: %.2f\n", dsa->alpha.ref); - printf("# depth (%sabled)\n", + printf("# depth (%sabled)\n", (dsa->depth.enabled) ? "en" : "dis"); printf("# func: %u\n", dsa->depth.func); -- cgit v1.2.3 From 17b234ae3319d8a36afc44d0cceb30fea6b42d67 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Tue, 18 Mar 2008 11:47:37 -0700 Subject: cell: Fix depth read / write for s8z24. Stencil is still broken. --- src/gallium/drivers/cell/spu/spu_main.c | 1 + src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 25 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 122cf337a68..937962285d0 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -221,6 +221,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) switch (spu.fb.depth_format) { case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: spu.fb.zsize = 4; break; case PIPE_FORMAT_Z16_UNORM: diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index d42b522b41d..06d68f5604b 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -76,6 +76,16 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y, case PIPE_FORMAT_Z24S8_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0xEEEE); + + *depth = si_rotmai(si_and(*ptr, mask), -8); + *stencil = si_andc(*ptr, mask); + break; + } + + + case PIPE_FORMAT_S8Z24_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; qword mask = si_fsmbi(0x7777); @@ -124,10 +134,20 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, case PIPE_FORMAT_Z24S8_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0xEEEE); + + depth = si_shli(depth, 8); + *ptr = si_selb(stencil, depth, mask); + break; + } + + + case PIPE_FORMAT_S8Z24_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; qword mask = si_fsmbi(0x7777); - stencil = si_rotmai(stencil, 24); + stencil = si_shli(stencil, 24); *ptr = si_selb(stencil, depth, mask); break; } @@ -167,11 +187,12 @@ spu_do_depth_stencil(int x, int y, frag_depth = si_cfltu(frag_depth, 0); break; case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu))); frag_depth = si_cfltu(frag_depth, 0); break; default: - assert(0); + ASSERT(0); break; } -- cgit v1.2.3 From 75e714d476eb573a6e06585341ce693434eb237e Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Tue, 18 Mar 2008 21:50:01 +0100 Subject: nv30: disable emit texture image in state_emit, already done in fragtex --- src/gallium/drivers/nv30/nv30_context.h | 4 ++-- src/gallium/drivers/nv30/nv30_state_emit.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 0ee6cfdb339..c1cc3eca1e3 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -51,10 +51,10 @@ struct nv30_context { struct pipe_buffer *rt[2]; struct pipe_buffer *zeta; - struct { + /*struct { struct pipe_buffer *buffer; uint32_t format; - } tex[16]; + } tex[16];*/ unsigned vb_enable; struct { diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 2cb4692e176..eda2fd45f56 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -60,12 +60,11 @@ nv30_emit_hw_state(struct nv30_context *nv30) OUT_RING(0);*/ } - /* Texture images */ + /* Texture images, emitted in nv30_fragtex_build */ +#if 0 for (i = 0; i < 16; i++) { if (!(nv30->fp_samplers & (1 << i))) continue; - if (!nv30->tex[i].buffer) - continue; BEGIN_RING(rankine, NV34TCL_TX_OFFSET(i), 2); OUT_RELOCl(nv30->tex[i].buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); @@ -74,6 +73,7 @@ nv30_emit_hw_state(struct nv30_context *nv30) NOUVEAU_BO_OR, NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); } +#endif /* Fragment program */ BEGIN_RING(rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); -- cgit v1.2.3 From fa69a6e1bb7a1fe96848456255e5370f1904706d Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Tue, 18 Mar 2008 15:59:06 -0700 Subject: cell: Correctly load stencil for PIPE_FORMAT_S8Z24_UNORM --- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 06d68f5604b..b4cffeeb32a 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -87,10 +87,9 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y, case PIPE_FORMAT_S8Z24_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; - qword mask = si_fsmbi(0x7777); - *depth = si_and(*ptr, mask); - *stencil = si_rotmai(si_andc(*ptr, mask), -24); + *depth = si_and(*ptr, si_fsmbi(0x7777)); + *stencil = si_andi(si_roti(*ptr, 8), 0x0ff); break; } -- cgit v1.2.3 From fe40dae02d3f3a83ee5bb66819c1ed4a3f66da80 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Tue, 18 Mar 2008 15:59:33 -0700 Subject: cell: Fix various stencil test code-gen bugs --- .../drivers/cell/ppu/cell_state_per_fragment.c | 70 +++++++++++++--------- 1 file changed, 43 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 11c0aa5b23c..9c479684596 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -199,52 +199,62 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, /** * \note Emits a maximum of 5 instructions. + * + * \warning + * Since \c out and \c in might be the same register, this routine cannot + * generate code that uses \c out as a temporary. */ static void emit_stencil_op(struct spe_function *f, int out, int in, int mask, unsigned op, unsigned ref) { const int clamp = spe_allocate_available_register(f); - const int tmp = spe_allocate_available_register(f); + const int clamp_mask = spe_allocate_available_register(f); + const int result = spe_allocate_available_register(f); switch(op) { case PIPE_STENCIL_OP_KEEP: assert(0); case PIPE_STENCIL_OP_ZERO: - spe_il(f, out, 0); + spe_il(f, result, 0); break; case PIPE_STENCIL_OP_REPLACE: - spe_il(f, out, ref); + spe_il(f, result, ref); break; case PIPE_STENCIL_OP_INCR: spe_il(f, clamp, 0x0ff); - spe_ai(f, out, in, 1); - spe_cgti(f, tmp, out, clamp); - spe_selb(f, out, out, clamp, tmp); + spe_ai(f, result, in, 1); + spe_clgti(f, clamp_mask, result, 0x0ff); + spe_selb(f, result, result, clamp, clamp_mask); break; case PIPE_STENCIL_OP_DECR: spe_il(f, clamp, 0); - spe_ai(f, out, in, -1); - spe_cgti(f, tmp, out, clamp); - spe_selb(f, out, clamp, out, tmp); + spe_ai(f, result, in, -1); + + /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned + * arithmetic. + */ + spe_clgti(f, clamp_mask, result, 0x0ff); + spe_selb(f, result, result, clamp, clamp_mask); break; case PIPE_STENCIL_OP_INCR_WRAP: - spe_ai(f, out, in, 1); + spe_ai(f, result, in, 1); break; case PIPE_STENCIL_OP_DECR_WRAP: - spe_ai(f, out, in, -1); + spe_ai(f, result, in, -1); break; case PIPE_STENCIL_OP_INVERT: - spe_nor(f, out, in, in); + spe_nor(f, result, in, in); break; default: assert(0); } - spe_release_register(f, tmp); - spe_release_register(f, clamp); + spe_selb(f, out, in, result, mask); - spe_selb(f, out, in, out, mask); + spe_release_register(f, result); + spe_release_register(f, clamp_mask); + spe_release_register(f, clamp); } @@ -261,13 +271,13 @@ emit_stencil_op(struct spe_function *f, * \param stencil Register containing values from stencil buffer * \param depth_pass Register to store mask of fragments passing stencil test * and depth test - * + * * \note * Emits a maximum of 10 + (3 * 5) = 25 instructions. */ static int emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, - unsigned face, + unsigned face, struct spe_function *f, int mask, int depth_mask, @@ -284,14 +294,17 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, const unsigned ref = (dsa->stencil[face].ref_value & dsa->stencil[face].value_mask); boolean complement = FALSE; - int stored = spe_allocate_available_register(f); + int stored; int tmp = spe_allocate_available_register(f); if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) && (dsa->stencil[face].value_mask != 0x0ff)) { + stored = spe_allocate_available_register(f); spe_andi(f, stored, stencil, dsa->stencil[face].value_mask); + } else { + stored = stencil; } @@ -332,7 +345,9 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, break; } - spe_release_register(f, stored); + if (stored != stencil) { + spe_release_register(f, stored); + } spe_release_register(f, tmp); @@ -362,7 +377,7 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, /* Conditionally emit code to update the stencil value under various * condititons. Note that there is no need to generate code under the * following circumstances: - * + * * - Stencil write mask is zero. * - For stencil-fail if the stencil test is ALWAYS * - For depth-fail if the stencil test is NEVER @@ -425,7 +440,7 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, spe_release_register(f, face_stencil); } else if (dsa->stencil[face].write_mask != 0x0ff) { int tmp = spe_allocate_available_register(f); - + spe_il(f, tmp, dsa->stencil[face].write_mask); spe_selb(f, stencil_src, stencil, stencil_src, tmp); @@ -492,12 +507,12 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) if (front_stencil != back_stencil) { spe_selb(f, stencil, back_stencil, front_stencil, facing); } - - if (back_stencil != stencil) { + + if (back_stencil != stencil) { spe_release_register(f, back_stencil); } - if (front_stencil != stencil) { + if (front_stencil != stencil) { spe_release_register(f, front_stencil); } @@ -505,10 +520,11 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) spe_release_register(f, back_depth_pass); } else { - if (front_stencil != stencil) { + if (front_stencil != stencil) { spe_or(f, stencil, front_stencil, front_stencil); spe_release_register(f, front_stencil); } + spe_or(f, mask, front_depth_pass, front_depth_pass); } spe_release_register(f, front_depth_pass); @@ -997,7 +1013,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, blend_color->color[3], frag[3], pixel[3]); } - + if (sF[0] == sF[3]) { src_factor[0] = src_factor[3]; @@ -1036,7 +1052,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, frag, pixel, dst_factor); } - + func[0] = b->rgb_func; func[1] = func[0]; -- cgit v1.2.3 From 9575e35807c89c0b8a745671bc2dcd54d96379ff Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 19 Mar 2008 00:06:15 +1100 Subject: nouveau: pass nvws to nvws->push_* functions, rather than nouveau_channel* --- src/gallium/drivers/nouveau/nouveau_push.h | 7 ++- src/gallium/drivers/nouveau/nouveau_stateobj.h | 10 ++-- src/gallium/drivers/nouveau/nouveau_winsys.h | 4 +- src/gallium/drivers/nv40/nv40_dma.h | 66 ------------------------- src/gallium/drivers/nv40/nv40_screen.c | 2 +- src/gallium/drivers/nv50/nv50_screen.c | 2 +- src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 15 ++++-- 7 files changed, 23 insertions(+), 83 deletions(-) delete mode 100644 src/gallium/drivers/nv40/nv40_dma.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h index c5c5d988d58..225c17744ad 100644 --- a/src/gallium/drivers/nouveau/nouveau_push.h +++ b/src/gallium/drivers/nouveau/nouveau_push.h @@ -27,7 +27,7 @@ #define BEGIN_RING(obj,mthd,size) do { \ NOUVEAU_PUSH_CONTEXT(pc); \ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ - pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \ + pc->nvws->push_flush(pc->nvws, ((size) + 1)); \ OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ } while(0) @@ -38,13 +38,12 @@ #define FIRE_RING() do { \ NOUVEAU_PUSH_CONTEXT(pc); \ - pc->nvws->push_flush(pc->nvws->channel, 0); \ + pc->nvws->push_flush(pc->nvws, 0); \ } while(0) #define OUT_RELOC(bo,data,flags,vor,tor) do { \ NOUVEAU_PUSH_CONTEXT(pc); \ - pc->nvws->push_reloc(pc->nvws->channel, \ - pc->nvws->channel->pushbuf->cur++, \ + pc->nvws->push_reloc(pc->nvws, pc->nvws->channel->pushbuf->cur++, \ (bo), (data), (flags), (vor), (tor)); \ } while(0) diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 439c7e4734a..d465223748a 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -99,14 +99,14 @@ so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) nr = so->cur - so->push; if (pb->remaining < nr) - nvws->push_flush(nvws->channel, nr); + nvws->push_flush(nvws, nr); pb->remaining -= nr; memcpy(pb->cur, so->push, nr * 4); for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - nvws->push_reloc(nvws->channel, pb->cur + r->offset, r->bo, + nvws->push_reloc(nvws, pb->cur + r->offset, r->bo, r->data, r->flags, r->vor, r->tor); } pb->cur += nr; @@ -120,17 +120,17 @@ so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) i = so->cur_reloc << 1; if (nvws->channel->pushbuf->remaining < i) - nvws->push_flush(nvws->channel, i); + nvws->push_flush(nvws, i); nvws->channel->pushbuf->remaining -= i; for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - nvws->push_reloc(nvws->channel, pb->cur++, r->bo, r->packet, + nvws->push_reloc(nvws, pb->cur++, r->bo, r->packet, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART)) | NOUVEAU_BO_DUMMY, 0, 0); - nvws->push_reloc(nvws->channel, pb->cur++, r->bo, r->data, + nvws->push_reloc(nvws, pb->cur++, r->bo, r->data, r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor); } } diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index e4b20478a0c..7fa7cc0910c 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -24,10 +24,10 @@ struct nouveau_winsys { struct nouveau_resource **); void (*res_free)(struct nouveau_resource **); - int (*push_reloc)(struct nouveau_channel *, void *ptr, + int (*push_reloc)(struct nouveau_winsys *, void *ptr, struct pipe_buffer *, uint32_t data, uint32_t flags, uint32_t vor, uint32_t tor); - int (*push_flush)(struct nouveau_channel *, unsigned size); + int (*push_flush)(struct nouveau_winsys *, unsigned size); int (*grobj_alloc)(struct nouveau_winsys *, int grclass, struct nouveau_grobj **); diff --git a/src/gallium/drivers/nv40/nv40_dma.h b/src/gallium/drivers/nv40/nv40_dma.h deleted file mode 100644 index 1fb82677689..00000000000 --- a/src/gallium/drivers/nv40/nv40_dma.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef __NV40_DMA_H__ -#define __NV40_DMA_H__ - -#include "pipe/nouveau/nouveau_winsys.h" - -#define OUT_RING(data) do { \ - (*nv40->nvws->channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - memcpy(nv40->nvws->channel->pushbuf->cur, (src), (size) * 4); \ - nv40->nvws->channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - if (nv40->nvws->channel->pushbuf->remaining < ((size) + 1)) \ - nv40->nvws->push_flush(nv40->nvws->channel, ((size) + 1)); \ - OUT_RING((nv40->obj->subc << 13) | ((size) << 18) | (mthd)); \ - nv40->nvws->channel->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING_NI(obj,mthd,size) do { \ - BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ -} while(0) - -#define FIRE_RING() do { \ - nv40->nvws->push_flush(nv40->nvws->channel, 0); \ -} while(0) - -#define OUT_RELOC(bo,data,flags,vor,tor) do { \ - nv40->nvws->push_reloc(nv40->nvws->channel, \ - nv40->nvws->channel->pushbuf->cur, \ - (struct nouveau_bo *)(bo), \ - (data), (flags), (vor), (tor)); \ - OUT_RING(0); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - nv40->nvws->channel->vram->handle, \ - nv40->nvws->channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -#endif diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 268ca83ce0d..bf30fbeca19 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -260,7 +260,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, so_emit(nvws, so); so_ref(NULL, &so); - nvws->push_flush(nvws->channel, 0); + nvws->push_flush(nvws, 0); screen->pipe.winsys = ws; screen->pipe.destroy = nv40_screen_destroy; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 721c6421d1f..ff4aca81a5a 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -171,7 +171,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, so_emit(nvws, so); so_ref(NULL, &so); - nvws->push_flush(nvws->channel, 0); + nvws->push_flush(nvws, 0); screen->pipe.winsys = ws; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index 6c85aab9f5f..bf1afce5d9d 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -59,14 +59,21 @@ nouveau_pipe_surface_fill(struct nouveau_winsys *nvws, struct pipe_surface *dst, } static int -nouveau_pipe_emit_reloc(struct nouveau_channel *chan, void *ptr, +nouveau_pipe_push_reloc(struct nouveau_winsys *nvws, void *ptr, struct pipe_buffer *buf, uint32_t data, uint32_t flags, uint32_t vor, uint32_t tor) { - return nouveau_pushbuf_emit_reloc(chan, ptr, nouveau_buffer(buf)->bo, + return nouveau_pushbuf_emit_reloc(nvws->channel, ptr, + nouveau_buffer(buf)->bo, data, flags, vor, tor); } +static int +nouveau_pipe_push_flush(struct nouveau_winsys *nvws, unsigned size) +{ + return nouveau_pushbuf_flush(nvws->channel, size); +} + struct pipe_context * nouveau_pipe_create(struct nouveau_context *nv) { @@ -114,8 +121,8 @@ nouveau_pipe_create(struct nouveau_context *nv) nvws->res_alloc = nouveau_resource_alloc; nvws->res_free = nouveau_resource_free; - nvws->push_reloc = nouveau_pipe_emit_reloc; - nvws->push_flush = nouveau_pushbuf_flush; + nvws->push_reloc = nouveau_pipe_push_reloc; + nvws->push_flush = nouveau_pipe_push_flush; nvws->grobj_alloc = nouveau_pipe_grobj_alloc; nvws->grobj_free = nouveau_grobj_free; -- cgit v1.2.3 From 176df85568992a5d99aab7f0b1e382d41459aa13 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 19 Mar 2008 21:52:19 +1100 Subject: nv40: "implement" noise opcodes in fragprog For the moment, we do what NVIDIA does and return 0 unconditionally. This isn't correct, but it's an implementation at least. --- src/gallium/drivers/nv40/nv40_fragprog.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 4fb28a01ea2..33aac37d569 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -554,6 +554,12 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, case TGSI_OPCODE_MUL: arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + arith(fpc, sat, SFL, dst, mask, none, none, none); + break; case TGSI_OPCODE_POW: tmp = temp(fpc); arith(fpc, 0, LG2, tmp, MASK_X, -- cgit v1.2.3 From 4b39ba72166c6468525b19797c3d8a058814d789 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 19 Mar 2008 08:53:02 -0600 Subject: gallium: set tc->surface_map = NULL after unmapping --- src/gallium/drivers/softpipe/sp_tile_cache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 4caf2dd3fc9..19f71887e7e 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -156,6 +156,7 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, if (tc->surface_map) { /*assert(tc->surface != ps);*/ pipe_surface_unmap(tc->surface); + tc->surface_map = NULL; } pipe_surface_reference(&tc->surface, ps); -- cgit v1.2.3 From 4984487bc3338fc351a0631eaa4515e4adbb86a9 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 19 Mar 2008 17:08:16 -0600 Subject: gallium: add face, dirtyLevels params to pipe->texture_update() This provides better information about which images in texture object have changed. Also, call texture_update() from more places previously missed. --- src/gallium/auxiliary/draw/draw_aaline.c | 1 + src/gallium/auxiliary/draw/draw_pstipple.c | 2 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 3 ++- src/gallium/drivers/cell/ppu/cell_texture.c | 3 ++- src/gallium/drivers/i915simple/i915_texture.c | 3 ++- src/gallium/drivers/i965simple/brw_tex_layout.c | 3 ++- src/gallium/drivers/softpipe/sp_texture.c | 3 ++- src/gallium/include/pipe/p_context.h | 6 ++---- src/mesa/state_tracker/st_atom_texture.c | 5 ----- src/mesa/state_tracker/st_cb_drawpixels.c | 3 +++ src/mesa/state_tracker/st_cb_texture.c | 13 +++++++------ src/mesa/state_tracker/st_texture.h | 2 -- 12 files changed, 24 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index f2b983374e6..b4fa6bd9670 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -410,6 +410,7 @@ aaline_create_texture(struct aaline_stage *aaline) /* unmap */ pipe_surface_unmap(surface); pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); } } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 09d542002f0..9d154a68381 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -374,7 +374,7 @@ pstip_update_texture(struct pstip_stage *pstip) /* unmap */ pipe_surface_unmap(surface); pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pstip->texture); + pipe->texture_update(pipe, pstip->texture, 0, 0x1); } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 27141c4d13f..028b180a777 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -872,7 +872,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, pipe->flush(pipe, PIPE_FLUSH_WAIT); - /*pipe->texture_update(pipe, pt); not really needed */ + /* need to signal that the texture has changed _after_ rendering to it */ + pipe->texture_update(pipe, pt, face, (1 << dstLevel)); } /* restore state we changed */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index e235421107e..9c694e136d8 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -134,7 +134,8 @@ cell_texture_release_screen(struct pipe_screen *screen, static void -cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, + uint face, uint levelsMask) { /* XXX TO DO: re-tile the texture data ... */ diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index ef5adff550e..c39e747705b 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -542,7 +542,8 @@ i915_texture_release_screen(struct pipe_screen *screen, static void -i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, + uint face, uint levelsMask) { /* no-op? */ } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index b24ac87c378..b580f98204c 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -358,7 +358,8 @@ brw_texture_release_screen(struct pipe_screen *screen, static void -brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, + uint face, uint levelsMask) { /* no-op? */ } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 64bf353aa4a..a98b3b1a4a8 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -174,7 +174,8 @@ softpipe_get_tex_surface_screen(struct pipe_screen *screen, static void softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture) + struct pipe_texture *texture, + uint face, uint levelsMask) { struct softpipe_context *softpipe = softpipe_context(pipe); uint unit; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index b64948f0f3b..a3824601be9 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -192,12 +192,10 @@ struct pipe_context { /** * Called when texture data is changed. - * Note: we could pass some hints about which mip levels or cube faces - * have changed... - * XXX this may go away - could pass a 'write' flag to get_tex_surface() */ void (*texture_update)(struct pipe_context *pipe, - struct pipe_texture *texture); + struct pipe_texture *texture, + uint face, uint dirtyLevelsMask); diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index e53a897637c..2a711e513df 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -76,11 +76,6 @@ update_textures(struct st_context *st) pt = st_get_stobj_texture(stObj); pipe_texture_reference(&st->state.sampler_texture[unit], pt); - - if (stObj && stObj->dirtyData) { - st->pipe->texture_update(st->pipe, pt); - stObj->dirtyData = GL_FALSE; - } } st->pipe->set_sampler_textures(st->pipe, st->state.num_textures, diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 33d34445ee0..99d5e3e848f 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -513,6 +513,8 @@ make_texture(struct st_context *st, /* unmap */ pipe_surface_unmap(surface); pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pt, 0, 0x1); + assert(success); /* restore */ @@ -1100,6 +1102,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, /* Release surface */ pipe_surface_unmap(surface); pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pt, 0, 0x1); pt->format = format; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index d4731c77373..306b27c4235 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -476,6 +476,7 @@ st_TexImage(GLcontext * ctx, struct gl_texture_image *texImage, GLsizei imageSize, int compressed) { + struct pipe_context *pipe = ctx->st->pipe; struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); GLint postConvWidth, postConvHeight; @@ -690,8 +691,8 @@ st_TexImage(GLcontext * ctx, texImage->Data = NULL; } - /* flag data as dirty */ - stObj->dirtyData = GL_TRUE; + if (stObj->pt) + pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); if (level == texObj->BaseLevel && texObj->GenerateMipmap) { ctx->Driver.GenerateMipmap(ctx, target, texObj); @@ -866,6 +867,7 @@ st_TexSubimage(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { + struct pipe_context *pipe = ctx->st->pipe; struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); GLuint dstRowStride; @@ -924,8 +926,7 @@ st_TexSubimage(GLcontext * ctx, texImage->Data = NULL; } - /* flag data as dirty */ - stObj->dirtyData = GL_TRUE; + pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); } @@ -1179,8 +1180,7 @@ do_copy_texsubimage(GLcontext *ctx, pipe_surface_reference(&dest_surface, NULL); - /* flag data as dirty */ - stObj->dirtyData = GL_TRUE; + pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); if (level == texObj->BaseLevel && texObj->GenerateMipmap) { ctx->Driver.GenerateMipmap(ctx, target, texObj); @@ -1481,6 +1481,7 @@ st_finalize_texture(GLcontext *ctx, if (stImage && stObj->pt != stImage->pt) { copy_image_data_to_texture(ctx->st, stObj, level, stImage); *needFlush = GL_TRUE; + pipe->texture_update(pipe, stObj->pt, face, (1 << level)); } } } diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index 78f5f451ed6..7abccb3a69f 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -71,8 +71,6 @@ struct st_texture_object GLboolean imageOverride; GLint depthOverride; GLuint pitchOverride; - - GLboolean dirtyData; }; -- cgit v1.2.3 From 122ed506f4b808503b230bade421018614dbe696 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 19 Mar 2008 18:10:09 -0600 Subject: gallium: added fb_width/height fields to softpipe context These are convenience fields. Otherwise, we have to check cbuf[0] or zsbuf in various places. --- src/gallium/drivers/softpipe/sp_context.h | 2 ++ src/gallium/drivers/softpipe/sp_prim_setup.c | 2 +- src/gallium/drivers/softpipe/sp_quad_stipple.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 13 ++----------- src/gallium/drivers/softpipe/sp_state_surface.c | 12 ++++++++++++ 5 files changed, 18 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 19e6cfaf02e..31d7062dcc0 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -84,6 +84,8 @@ struct softpipe_context { unsigned num_samplers; unsigned num_textures; + uint fb_width, fb_height; + /* Counter for occlusion queries. Note this supports overlapping * queries. */ diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 7aa9cf8e851..f2d6043e2ed 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -485,7 +485,7 @@ setup_fragcoord_coeff(struct setup_stage *setup, uint slot) /*Y*/ if (setup->softpipe->rasterizer->origin_lower_left) { /* y=0=bottom */ - const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height; + const int winHeight = setup->softpipe->fb_height; setup->coef[slot].a0[1] = (float) (winHeight - 1); setup->coef[slot].dady[1] = -1.0; } diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 8660432259c..9a39249576b 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -25,7 +25,7 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) int y0, y1; uint stipple0, stipple1; if (softpipe->rasterizer->origin_lower_left) { - y0 = softpipe->framebuffer.cbufs[0]->height - 1 - quad->y0; + y0 = softpipe->fb_height - 1 - quad->y0; y1 = y0 - 1; } else { diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 82cb31ece73..53b2056b8a6 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -171,17 +171,8 @@ softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) static void compute_cliprect(struct softpipe_context *sp) { - unsigned surfWidth, surfHeight; - - if (sp->framebuffer.num_cbufs > 0) { - surfWidth = sp->framebuffer.cbufs[0]->width; - surfHeight = sp->framebuffer.cbufs[0]->height; - } - else { - /* no surface? */ - surfWidth = sp->scissor.maxx; - surfHeight = sp->scissor.maxy; - } + uint surfWidth = sp->fb_width; + uint surfHeight = sp->fb_height; if (sp->rasterizer->scissor) { /* clip to scissor rect */ diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index 124b18b7081..2f556844644 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -48,6 +48,9 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, struct softpipe_context *sp = softpipe_context(pipe); uint i; + /* updated below */ + sp->fb_width = sp->fb_height = 0; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { @@ -60,6 +63,10 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* update cache */ sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); } + if (fb->cbufs[i]) { + sp->fb_width = fb->cbufs[i]->width; + sp->fb_height = fb->cbufs[i]->height; + } } sp->framebuffer.num_cbufs = fb->num_cbufs; @@ -74,6 +81,11 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* update cache */ sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); + + if (!sp->fb_width && fb->zsbuf) { + sp->fb_width = fb->zsbuf->width; + sp->fb_height = fb->zsbuf->height; + } } #if 0 -- cgit v1.2.3 From 1d9049c4df24d47446218f4032b891b817af0d00 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 00:18:18 +1100 Subject: nouveau: match gallium API changes --- src/gallium/drivers/nv30/nv30_miptree.c | 3 ++- src/gallium/drivers/nv40/nv40_miptree.c | 3 ++- src/gallium/drivers/nv50/nv50_miptree.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index f5659353ea5..6fdcf42a243 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -100,7 +100,8 @@ nv30_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) } static void -nv30_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) +nv30_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, + uint face, uint levels) { } diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 94ba05b7104..0dff9b3ad6d 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -99,7 +99,8 @@ nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) } static void -nv40_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) +nv40_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, + uint face, uint levels) { } diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 720d33fda91..7474c657650 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -35,7 +35,8 @@ nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) } static void -nv50_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt) +nv50_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, + uint face, uint levels) { } -- cgit v1.2.3 From 2fb30b77ad09016efcf969456de9b0341bc53bac Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 20 Mar 2008 15:02:59 -0600 Subject: cell: use pipe_framebuffer_state.width, height --- src/gallium/drivers/cell/ppu/cell_state_derived.c | 13 ++----------- src/gallium/drivers/cell/ppu/cell_state_emit.c | 4 ++-- 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 5c240a55c0b..5480534ad90 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -141,17 +141,8 @@ calculate_vertex_layout( struct cell_context *cell ) static void compute_cliprect(struct cell_context *sp) { - unsigned surfWidth, surfHeight; - - if (sp->framebuffer.num_cbufs > 0) { - surfWidth = sp->framebuffer.cbufs[0]->width; - surfHeight = sp->framebuffer.cbufs[0]->height; - } - else { - /* no surface? */ - surfWidth = sp->scissor.maxx; - surfHeight = sp->scissor.maxy; - } + uint surfWidth = sp->framebuffer.width; + uint surfHeight = sp->framebuffer.height; if (sp->rasterizer->scissor) { /* clip to scissor rect */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 4d589bcdbf9..31cc938f074 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -60,8 +60,8 @@ cell_emit_state(struct cell_context *cell) fb->color_format = cbuf->format; fb->depth_start = cell->zsbuf_map; fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; - fb->width = cell->framebuffer.cbufs[0]->width; - fb->height = cell->framebuffer.cbufs[0]->height; + fb->width = cell->framebuffer.width; + fb->height = cell->framebuffer.height; } if (cell->dirty & CELL_NEW_BLEND) { -- cgit v1.2.3 From 09f67990abd4bb9b79349be2fca9a6ae850b6f5f Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 20 Mar 2008 15:03:43 -0600 Subject: gallium: use new framebuffer width, height fields --- src/gallium/drivers/softpipe/sp_context.h | 2 -- src/gallium/drivers/softpipe/sp_prim_setup.c | 2 +- src/gallium/drivers/softpipe/sp_quad_stipple.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 4 ++-- src/gallium/drivers/softpipe/sp_state_surface.c | 19 +++---------------- 5 files changed, 7 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 31d7062dcc0..19e6cfaf02e 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -84,8 +84,6 @@ struct softpipe_context { unsigned num_samplers; unsigned num_textures; - uint fb_width, fb_height; - /* Counter for occlusion queries. Note this supports overlapping * queries. */ diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index f2d6043e2ed..6a81e4d8cc2 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -485,7 +485,7 @@ setup_fragcoord_coeff(struct setup_stage *setup, uint slot) /*Y*/ if (setup->softpipe->rasterizer->origin_lower_left) { /* y=0=bottom */ - const int winHeight = setup->softpipe->fb_height; + const int winHeight = setup->softpipe->framebuffer.height; setup->coef[slot].a0[1] = (float) (winHeight - 1); setup->coef[slot].dady[1] = -1.0; } diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 9a39249576b..f1e9b80e09a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -25,7 +25,7 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) int y0, y1; uint stipple0, stipple1; if (softpipe->rasterizer->origin_lower_left) { - y0 = softpipe->fb_height - 1 - quad->y0; + y0 = softpipe->framebuffer.height - 1 - quad->y0; y1 = y0 - 1; } else { diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 53b2056b8a6..14abb20eeb2 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -171,8 +171,8 @@ softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) static void compute_cliprect(struct softpipe_context *sp) { - uint surfWidth = sp->fb_width; - uint surfHeight = sp->fb_height; + uint surfWidth = sp->framebuffer.width; + uint surfHeight = sp->framebuffer.height; if (sp->rasterizer->scissor) { /* clip to scissor rect */ diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index 2f556844644..ba8c9eece72 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -48,9 +48,6 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, struct softpipe_context *sp = softpipe_context(pipe); uint i; - /* updated below */ - sp->fb_width = sp->fb_height = 0; - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { @@ -63,10 +60,6 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* update cache */ sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); } - if (fb->cbufs[i]) { - sp->fb_width = fb->cbufs[i]->width; - sp->fb_height = fb->cbufs[i]->height; - } } sp->framebuffer.num_cbufs = fb->num_cbufs; @@ -81,11 +74,6 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, /* update cache */ sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); - - if (!sp->fb_width && fb->zsbuf) { - sp->fb_width = fb->zsbuf->width; - sp->fb_height = fb->zsbuf->height; - } } #if 0 @@ -113,9 +101,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, } #endif + sp->framebuffer.width = fb->width; + sp->framebuffer.height = fb->height; + sp->dirty |= SP_NEW_FRAMEBUFFER; } - - - - -- cgit v1.2.3 From 2b21bde3b1fa6fe357a3a5adc6249e89d6915524 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Wed, 19 Mar 2008 17:29:39 -0700 Subject: cell: Use code-gen for alpha blend So far this is only tested when GL_BLEND is disabled. --- src/gallium/drivers/cell/common.h | 10 +++++ src/gallium/drivers/cell/ppu/cell_pipe_state.c | 11 +++-- src/gallium/drivers/cell/ppu/cell_state_emit.c | 17 +++++-- src/gallium/drivers/cell/spu/Makefile | 1 - src/gallium/drivers/cell/spu/spu_blend.c | 62 -------------------------- src/gallium/drivers/cell/spu/spu_blend.h | 37 --------------- src/gallium/drivers/cell/spu/spu_main.c | 50 ++++++++++++++++++--- src/gallium/drivers/cell/spu/spu_main.h | 17 ++++++- src/gallium/drivers/cell/spu/spu_tri.c | 56 +++++++++++++++-------- 9 files changed, 129 insertions(+), 132 deletions(-) delete mode 100644 src/gallium/drivers/cell/spu/spu_blend.c delete mode 100644 src/gallium/drivers/cell/spu/spu_blend.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index fe93fd8e1a2..d59e4f7036e 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -114,6 +114,16 @@ struct cell_command_depth_stencil_alpha_test { }; +/** + * Upload code to perform framebuffer blend operation + */ +struct cell_command_blend { + uint64_t base; /**< Effective address of code start. */ + unsigned size; /**< Size in bytes of test code. */ + unsigned read_fb; /**< Flag: should framebuffer be read? */ +}; + + /** * Tell SPUs about the framebuffer size, location */ diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index c880760e4bd..4ca8c153b0b 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -54,14 +54,19 @@ cell_create_blend_state(struct pipe_context *pipe, static void -cell_bind_blend_state(struct pipe_context *pipe, void *blend) +cell_bind_blend_state(struct pipe_context *pipe, void *state) { struct cell_context *cell = cell_context(pipe); + struct cell_blend_state *blend = (struct cell_blend_state *) state; + draw_flush(cell->draw); - cell->blend = (const struct cell_blend_state *)blend; + if ((blend != NULL) && (blend->code.store == NULL)) { + cell_generate_depth_stencil_test(blend); + } + cell->blend = blend; cell->dirty |= CELL_NEW_BLEND; } @@ -70,7 +75,7 @@ static void cell_delete_blend_state(struct pipe_context *pipe, void *blend) { struct cell_blend_state *cb = (struct cell_blend_state *) blend; - + spe_release_func(& cb->code); FREE(cb); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 31cc938f074..5709b48f129 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -65,9 +65,20 @@ cell_emit_state(struct cell_context *cell) } if (cell->dirty & CELL_NEW_BLEND) { - emit_state_cmd(cell, CELL_CMD_STATE_BLEND, - cell->blend, - sizeof(struct pipe_blend_state)); + struct cell_command_blend blend; + + if (cell->blend != NULL) { + blend.base = (intptr_t) cell->blend->code.store; + blend.size = (char *) cell->blend->code.csr + - (char *) cell->blend->code.store; + blend.read_fb = TRUE; + } else { + blend.base = 0; + blend.size = 0; + blend.read_fb = FALSE; + } + + emit_state_cmd(cell, CELL_CMD_STATE_BLEND, &blend, sizeof(blend)); } if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 115ca8cd901..8e83610790e 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -17,7 +17,6 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o SOURCES = \ spu_main.c \ - spu_blend.c \ spu_dcache.c \ spu_per_fragment_op.c \ spu_render.c \ diff --git a/src/gallium/drivers/cell/spu/spu_blend.c b/src/gallium/drivers/cell/spu/spu_blend.c deleted file mode 100644 index 23ec0eeb451..00000000000 --- a/src/gallium/drivers/cell/spu/spu_blend.c +++ /dev/null @@ -1,62 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "spu_main.h" -#include "spu_blend.h" -#include "spu_colorpack.h" - - -void -blend_quad(uint itx, uint ity, vector float colors[4]) -{ - /* simple SRC_ALPHA, ONE_MINUS_SRC_ALPHA blending */ - vector float fbc00 = spu_unpack_color(spu.ctile.ui[ity][itx]); - vector float fbc01 = spu_unpack_color(spu.ctile.ui[ity][itx+1]); - vector float fbc10 = spu_unpack_color(spu.ctile.ui[ity+1][itx]); - vector float fbc11 = spu_unpack_color(spu.ctile.ui[ity+1][itx+1]); - - vector float alpha00 = spu_splats(spu_extract(colors[0], 3)); - vector float alpha01 = spu_splats(spu_extract(colors[1], 3)); - vector float alpha10 = spu_splats(spu_extract(colors[2], 3)); - vector float alpha11 = spu_splats(spu_extract(colors[3], 3)); - - vector float one_minus_alpha00 = spu_sub(spu_splats(1.0f), alpha00); - vector float one_minus_alpha01 = spu_sub(spu_splats(1.0f), alpha01); - vector float one_minus_alpha10 = spu_sub(spu_splats(1.0f), alpha10); - vector float one_minus_alpha11 = spu_sub(spu_splats(1.0f), alpha11); - - colors[0] = spu_add(spu_mul(colors[0], alpha00), - spu_mul(fbc00, one_minus_alpha00)); - colors[1] = spu_add(spu_mul(colors[1], alpha01), - spu_mul(fbc01, one_minus_alpha01)); - colors[2] = spu_add(spu_mul(colors[2], alpha10), - spu_mul(fbc10, one_minus_alpha10)); - colors[3] = spu_add(spu_mul(colors[3], alpha11), - spu_mul(fbc11, one_minus_alpha11)); -} - diff --git a/src/gallium/drivers/cell/spu/spu_blend.h b/src/gallium/drivers/cell/spu/spu_blend.h deleted file mode 100644 index 2b594b578b4..00000000000 --- a/src/gallium/drivers/cell/spu/spu_blend.h +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_BLEND_H -#define SPU_BLEND_H - - -extern void -blend_quad(uint itx, uint ity, vector float colors[4]); - - -#endif /* SPU_BLEND_H */ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 937962285d0..41bebf5362b 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -61,6 +61,25 @@ static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] static unsigned char depth_stencil_code_buffer[4 * 64] ALIGN16_ATTRIB; +static unsigned char fb_blend_code_buffer[4 * 64] + ALIGN16_ATTRIB; + +static struct spu_blend_results +default_blend(qword frag_r, qword frag_g, qword frag_b, qword frag_a, + qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, + qword frag_mask) +{ + struct spu_blend_results result; + + result.r = si_selb(pixel_r, frag_r, frag_mask); + result.g = si_selb(pixel_g, frag_g, frag_mask); + result.b = si_selb(pixel_b, frag_b, frag_mask); + result.a = si_selb(pixel_a, frag_a, frag_mask); + + return result; +} + + /** * Tell the PPU that this SPU has finished copying a buffer to * local store and that it may be reused by the PPU. @@ -246,14 +265,31 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) static void -cmd_state_blend(const struct pipe_blend_state *state) +cmd_state_blend(const struct cell_command_blend *state) { if (Debug) printf("SPU %u: BLEND: enabled %d\n", spu.init.id, - state->blend_enable); + (state->size != 0)); + + ASSERT_ALIGN16(state->base); - memcpy(&spu.blend, state, sizeof(*state)); + if (state->size != 0) { + mfc_get(fb_blend_code_buffer, + (unsigned int) state->base, /* src */ + ROUNDUP16(state->size), + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + spu.blend = (blend_func) fb_blend_code_buffer; + spu.read_fb = state->read_fb; + } else { + /* If there is no code, use the default; + */ + spu.blend = default_blend; + spu.read_fb = FALSE; + } } @@ -441,9 +477,8 @@ cmd_batch(uint opcode) pos += 1; break; case CELL_CMD_STATE_BLEND: - cmd_state_blend((struct pipe_blend_state *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); + cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8); break; case CELL_CMD_STATE_DEPTH_STENCIL: cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *) @@ -587,6 +622,9 @@ one_time_init(void) memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); + + spu.blend = default_blend; + spu.read_fb = FALSE; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 444e2186452..56d0968676b 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -67,6 +67,18 @@ typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask, qword frag_alpha, qword facing); +struct spu_blend_results { + qword r; + qword g; + qword b; + qword a; +}; + +typedef struct spu_blend_results (*blend_func)( + qword frag_r, qword frag_g, qword frag_b, qword frag_a, + qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, + qword frag_mask); + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -93,7 +105,10 @@ struct spu_global boolean read_depth; boolean read_stencil; frag_test_func frag_test; - struct pipe_blend_state blend; + + boolean read_fb; + blend_func blend; + struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct cell_command_texture texture; diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 81823f24633..c9f8cadcda3 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -29,10 +29,10 @@ * Triangle rendering within a tile. */ +#include <transpose_matrix4x4.h> #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_util.h" -#include "spu_blend.h" #include "spu_colorpack.h" #include "spu_main.h" #include "spu_texture.h" @@ -326,27 +326,45 @@ emit_quad( int x, int y, mask_t mask ) eval_coeff(1, (float) x, (float) y, colors); } -#if 1 - if (spu.blend.blend_enable) - blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); -#endif - if (spu_extract(mask, 0)) - spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); - if (spu_extract(mask, 1)) - spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); - if (spu_extract(mask, 2)) - spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); - if (spu_extract(mask, 3)) - spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); + /* Read the current framebuffer values. + * + * Ignore read_fb for now. In the future we can use this to avoid + * reading the framebuffer if read_fb is false and the fragment mask is + * all 0xffffffff. This is the common case, so it is probably worth + * the effort. We'll have to profile to determine whether or not the + * extra conditional branches hurt overall performance. + */ + vec_float4 aos_pix[4] = { + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]), + }; -#if 0 - /* SIMD_Z with swizzled color buffer (someday) */ - vector unsigned int uicolors = *((vector unsigned int *) &colors); - spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask); -#endif - } + qword soa_pix[4]; + qword soa_frag[4]; + /* Convert pixel and fragment data from AoS to SoA format. + */ + _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix); + _transpose_matrix4x4((vec_float4 *) soa_frag, colors); + + const struct spu_blend_results result = + (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], + soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3], + (qword) mask); + + + /* Convert final pixel data from SoA to AoS format. + */ + _transpose_matrix4x4(aos_pix, (const vec_float4 *) &result); + + spu.ctile.ui[iy+0][ix+0] = spu_pack_color_shuffle(aos_pix[0], shuffle); + spu.ctile.ui[iy+0][ix+1] = spu_pack_color_shuffle(aos_pix[1], shuffle); + spu.ctile.ui[iy+1][ix+0] = spu_pack_color_shuffle(aos_pix[2], shuffle); + spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(aos_pix[3], shuffle); + } #endif } -- cgit v1.2.3 From df1d6e2410dbc6af66ca416124587918b9764ee8 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 20 Mar 2008 17:36:05 -0700 Subject: cell: Fix bus error when there is no depth buffer --- src/gallium/drivers/cell/spu/spu_tri.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index c9f8cadcda3..c4272d6e93c 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -261,6 +261,9 @@ do_depth_test(int x, int y, mask_t quadmask) float4 zvals; mask_t mask; + if (spu.fb.depth_format == PIPE_FORMAT_NONE) + return quadmask; + zvals.v = eval_z((float) x, (float) y); mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx, -- cgit v1.2.3 From 4b9520fc05f169b74835c096c933d67c67c6d8cd Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 20 Mar 2008 17:36:31 -0700 Subject: cell: Call the correct function to generate blending code Cut-and-paste for the lose. :( --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 4ca8c153b0b..86fcdcff1f6 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -63,7 +63,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *state) draw_flush(cell->draw); if ((blend != NULL) && (blend->code.store == NULL)) { - cell_generate_depth_stencil_test(blend); + cell_generate_alpha_blend(blend, &cell->blend_color); } cell->blend = blend; -- cgit v1.2.3 From cab68957c72d6f198546a250b6fe0a74732cb3ec Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Thu, 20 Mar 2008 18:17:48 -0700 Subject: cell: Fix several bugs in blend code-gen - Alpha factor set to a _COLOR mode was mishandled - Cases when either dst factor or src factor was ZERO were mishandled - MIN and MAX cases were backwards - Case when blend was disabled was mishandled - Incorrect comments about number of instructions generated The tests blendminmax and blendsquare run correctly. --- .../drivers/cell/ppu/cell_state_per_fragment.c | 112 +++++++++++++++++---- 1 file changed, 92 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 9c479684596..988c251e205 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -669,7 +669,7 @@ emit_alpha_factor_calculation(struct spe_function *f, /** - * \note Emits a maximum of 5 instructions + * \note Emits a maximum of 6 instructions */ static void emit_color_factor_calculation(struct spe_function *f, @@ -864,7 +864,11 @@ emit_blend_calculation(struct spe_function *f, spe_il(f, src, 0); } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_or(f, src, dst, dst); + } else { + spe_fm(f, src, dst, dst_factor); } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); } else { spe_fm(f, tmp, dst, dst_factor); spe_fma(f, src, src, src_factor, tmp); @@ -884,7 +888,11 @@ emit_blend_calculation(struct spe_function *f, } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_il(f, tmp, 0); spe_fs(f, src, tmp, dst); + } else { + spe_fm(f, src, dst, dst_factor); } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); } else { spe_fm(f, tmp, dst, dst_factor); spe_fms(f, src, src, src_factor, tmp); @@ -904,7 +912,11 @@ emit_blend_calculation(struct spe_function *f, spe_il(f, src, 0); } else if (dF == PIPE_BLENDFACTOR_ONE) { spe_or(f, src, dst, dst); + } else { + spe_fm(f, src, dst, dst_factor); } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); } else { spe_fm(f, tmp, src, src_factor); spe_fms(f, src, src, dst_factor, tmp); @@ -913,12 +925,12 @@ emit_blend_calculation(struct spe_function *f, case PIPE_BLEND_MIN: spe_cgt(f, tmp, src, dst); - spe_selb(f, src, dst, src, tmp); + spe_selb(f, src, src, dst, tmp); break; case PIPE_BLEND_MAX: spe_cgt(f, tmp, src, dst); - spe_selb(f, src, src, dst, tmp); + spe_selb(f, src, dst, src, tmp); break; default: @@ -940,9 +952,9 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, struct spe_function *const f = &cb->code; /* This code generates a maximum of 3 (source alpha factor) - * + 3 (destination alpha factor) + (3 * 5) (source color factor) - * + (3 * 5) (destination color factor) + (4 * 2) (blend equation) - * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to + * + 3 (destination alpha factor) + (3 * 6) (source color factor) + * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) + * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to * make it a happy power-of-two. */ spe_init_func(f, 4 * 64); @@ -984,16 +996,57 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, && (b->alpha_func != PIPE_BLEND_MAX); - sF[0] = b->rgb_src_factor; - sF[1] = sF[0]; - sF[2] = sF[0]; - sF[3] = (b->alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) - ? PIPE_BLENDFACTOR_ONE : b->alpha_src_factor; + if (b->blend_enable) { + sF[0] = b->rgb_src_factor; + sF[1] = sF[0]; + sF[2] = sF[0]; + switch (b->alpha_src_factor & 0x0f) { + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + sF[3] = PIPE_BLENDFACTOR_ONE; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + sF[3] = b->alpha_src_factor + 1; + break; + default: + sF[3] = b->alpha_src_factor; + } - dF[0] = b->rgb_dst_factor; - dF[1] = dF[0]; - dF[2] = dF[0]; - dF[3] = b->rgb_dst_factor; + dF[0] = b->rgb_dst_factor; + dF[1] = dF[0]; + dF[2] = dF[0]; + switch (b->alpha_dst_factor & 0x0f) { + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + dF[3] = b->alpha_dst_factor + 1; + break; + default: + dF[3] = b->alpha_dst_factor; + } + + func[0] = b->rgb_func; + func[1] = func[0]; + func[2] = func[0]; + func[3] = b->alpha_func; + } else { + sF[0] = PIPE_BLENDFACTOR_ONE; + sF[1] = PIPE_BLENDFACTOR_ONE; + sF[2] = PIPE_BLENDFACTOR_ONE; + sF[3] = PIPE_BLENDFACTOR_ONE; + dF[0] = PIPE_BLENDFACTOR_ZERO; + dF[1] = PIPE_BLENDFACTOR_ZERO; + dF[2] = PIPE_BLENDFACTOR_ZERO; + dF[3] = PIPE_BLENDFACTOR_ZERO; + + func[0] = PIPE_BLEND_ADD; + func[1] = PIPE_BLEND_ADD; + func[2] = PIPE_BLEND_ADD; + func[3] = PIPE_BLEND_ADD; + } /* If alpha writing is enabled and the alpha blend mode requires use of @@ -1054,11 +1107,6 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, - func[0] = b->rgb_func; - func[1] = func[0]; - func[2] = func[0]; - func[3] = b->alpha_func; - for (i = 0; i < 4; ++i) { if ((b->colormask & (1U << i)) != 0) { emit_blend_calculation(f, @@ -1072,4 +1120,28 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, } spe_bi(f, 0, 0, 0); + +#if 0 + { + const uint32_t *p = f->store; + + printf("# %u instructions\n", f->csr - f->store); + printf("# blend (%sabled)\n", + (cb->base.blend_enable) ? "en" : "dis"); + printf("# RGB func / sf / df: %u %u %u\n", + cb->base.rgb_func, + cb->base.rgb_src_factor, + cb->base.rgb_dst_factor); + printf("# ALP func / sf / df: %u %u %u\n", + cb->base.alpha_func, + cb->base.alpha_src_factor, + cb->base.alpha_dst_factor); + + printf("\t.text\n"); + for (/* empty */; p < f->csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } +#endif } -- cgit v1.2.3 From 95e8cad9a38181052790b34837daa6717e0c5171 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 12:57:05 +1100 Subject: nv40: use new pipe_framebuffer width/height fields --- src/gallium/drivers/nv40/nv40_state_fb.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 71795ab1824..107b4400283 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -5,10 +5,12 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) { struct pipe_framebuffer_state *fb = &nv40->framebuffer; struct pipe_surface *rt[4], *zeta; - uint32_t rt_enable, rt_format, w, h; + uint32_t rt_enable, rt_format; int i, colour_format = 0, zeta_format = 0; struct nouveau_stateobj *so = so_new(64, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; rt_enable = 0; for (i = 0; i < 4; i++) { @@ -16,12 +18,8 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) continue; if (colour_format) { - assert(w == fb->cbufs[i]->width); - assert(h == fb->cbufs[i]->height); assert(colour_format == fb->cbufs[i]->format); } else { - w = fb->cbufs[i]->width; - h = fb->cbufs[i]->height; colour_format = fb->cbufs[i]->format; rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); rt[i] = fb->cbufs[i]; @@ -33,14 +31,6 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) rt_enable |= NV40TCL_RT_ENABLE_MRT; if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - zeta_format = fb->zsbuf->format; zeta = fb->zsbuf; } -- cgit v1.2.3 From ebde8d3a1276f5c72d39936efabe72b5325f8e98 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 21:17:57 +1100 Subject: nv40: use num_cbufs --- src/gallium/drivers/nv40/nv40_state_fb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 107b4400283..93b79377c92 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -13,10 +13,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) unsigned h = fb->height; rt_enable = 0; - for (i = 0; i < 4; i++) { - if (!fb->cbufs[i]) - continue; - + for (i = 0; i < fb->num_cbufs; i++) { if (colour_format) { assert(colour_format == fb->cbufs[i]->format); } else { -- cgit v1.2.3 From 32162871396f65e8afdd90c602b1ccd01233c2e2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 21:58:17 +1100 Subject: nv40: align each level to 64 pixels --- src/gallium/drivers/nv40/nv40_miptree.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 0dff9b3ad6d..01b3952fe66 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -12,7 +12,7 @@ nv40_miptree_layout(struct nv40_miptree *nv40mt) boolean swizzled = FALSE; uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; uint offset = 0; - int nr_faces, l, f; + int nr_faces, l, f, pitch; if (pt->target == PIPE_TEXTURE_CUBE) { nr_faces = 6; @@ -22,18 +22,18 @@ nv40_miptree_layout(struct nv40_miptree *nv40mt) } else { nr_faces = 1; } - + + pitch = pt->width[0]; for (l = 0; l <= pt->last_level; l++) { pt->width[l] = width; pt->height[l] = height; pt->depth[l] = depth; if (swizzled) - nv40mt->level[l].pitch = pt->width[l] * pt->cpp; - else - nv40mt->level[l].pitch = pt->width[0] * pt->cpp; - nv40mt->level[l].pitch = (nv40mt->level[l].pitch + 63) & ~63; + pitch = pt->width[l]; + pitch = (pitch + 63) & ~63; + nv40mt->level[l].pitch = pitch * pt->cpp; nv40mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); @@ -49,6 +49,7 @@ nv40_miptree_layout(struct nv40_miptree *nv40mt) offset += nv40mt->level[l].pitch * pt->height[l]; } } + NOUVEAU_ERR("\n"); nv40mt->total_size = offset; } -- cgit v1.2.3 From 46b8dd9c16755e97ae547c0a1823e338f7a7c791 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 22:01:22 +1100 Subject: nv40: s/free/FREE/ --- src/gallium/drivers/nv40/nv40_context.c | 2 +- src/gallium/drivers/nv40/nv40_draw.c | 2 +- src/gallium/drivers/nv40/nv40_fragprog.c | 4 ++-- src/gallium/drivers/nv40/nv40_miptree.c | 6 +++--- src/gallium/drivers/nv40/nv40_query.c | 2 +- src/gallium/drivers/nv40/nv40_vertprog.c | 8 ++++---- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 58627443b80..7fcf8b86197 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -40,7 +40,7 @@ nv40_destroy(struct pipe_context *pipe) if (nv40->draw) draw_destroy(nv40->draw); - free(nv40); + FREE(nv40); } struct pipe_context * diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index eb4f2395c48..7c5e0df5b85 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -158,7 +158,7 @@ nv40_render_reset_stipple_counter(struct draw_stage *draw) static void nv40_render_destroy(struct draw_stage *draw) { - free(draw); + FREE(draw); } static INLINE void diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 33aac37d569..2d82f86ef5c 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -801,7 +801,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->translated = TRUE; out_err: tgsi_parse_free(&parse); - free(fpc); + FREE(fpc); } static void @@ -903,7 +903,7 @@ nv40_fragprog_destroy(struct nv40_context *nv40, struct nv40_fragment_program *fp) { if (fp->insn_len) - free(fp->insn); + FREE(fp->insn); } struct nv40_state_entry nv40_state_fragprog = { diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 01b3952fe66..215b91fcd08 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -72,7 +72,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { - free(mt); + FREE(mt); return NULL; } @@ -93,9 +93,9 @@ nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) pipe_buffer_reference(ws, &nv40mt->buffer, NULL); for (l = 0; l <= mt->last_level; l++) { if (nv40mt->level[l].image_offset) - free(nv40mt->level[l].image_offset); + FREE(nv40mt->level[l].image_offset); } - free(nv40mt); + FREE(nv40mt); } } diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 03178456246..a0a30724066 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -34,7 +34,7 @@ nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) if (q->object) nv40->nvws->res_free(&q->object); - free(q); + FREE(q); } static void diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 9f1ee575ce9..385c8aa0780 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -572,7 +572,7 @@ nv40_vertprog_translate(struct nv40_context *nv40, vpc->high_temp = -1; if (!nv40_vertprog_prepare(vpc)) { - free(vpc); + FREE(vpc); return; } @@ -628,7 +628,7 @@ nv40_vertprog_translate(struct nv40_context *nv40, vp->translated = TRUE; out_err: tgsi_parse_free(&parse); - free(vpc); + FREE(vpc); } static boolean @@ -805,9 +805,9 @@ void nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) { if (vp->nr_consts) - free(vp->consts); + FREE(vp->consts); if (vp->nr_insns) - free(vp->insns); + FREE(vp->insns); } struct nv40_state_entry nv40_state_vertprog = { -- cgit v1.2.3 From 0c91f5991dee7827ea915214a2a6973c2d6a7257 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 22:02:49 +1100 Subject: nv40: oops --- src/gallium/drivers/nv40/nv40_miptree.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 215b91fcd08..502edc16298 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -49,7 +49,6 @@ nv40_miptree_layout(struct nv40_miptree *nv40mt) offset += nv40mt->level[l].pitch * pt->height[l]; } } - NOUVEAU_ERR("\n"); nv40mt->total_size = offset; } -- cgit v1.2.3 From 308d7b171179f40b767b6590f71f969473ade25c Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 22:09:29 +1100 Subject: nv40: swtnl fallback on unsupported array format --- src/gallium/drivers/nv40/nv40_vbo.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index fad423fdf85..4a5b8119390 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -423,8 +423,12 @@ nv40_vbo_validate(struct nv40_context *nv40) continue; } - if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) - assert(0); + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + nv40->fallback_swtnl |= NV40_NEW_ARRAYS; + so_ref(NULL, &vtxbuf); + so_ref(NULL, &vtxfmt); + return FALSE; + } so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, -- cgit v1.2.3 From 19b57690ad251e2b0714abe3a20893722bd99d54 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 22:23:19 +1100 Subject: nv40: call semi-magic 0x1d88 method --- src/gallium/drivers/nv40/nv40_state_fb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 93b79377c92..88baf61ffb3 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -128,6 +128,8 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); so_data (so, ((w - 1) << 16) | 0); so_data (so, ((h - 1) << 16) | 0); + so_method(so, nv40->screen->curie, 0x1d88, 1); + so_data (so, (1 << 12) | h); so_ref(so, &nv40->state.hw[NV40_STATE_FB]); return TRUE; -- cgit v1.2.3 From 3a0dd2e6d162fad6f98f337ee4f6b5dada1e37f3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 23:03:25 +1100 Subject: nv40: ensure vb relocs don't end up on list for swtnl Avoids bo code bailing out because of mapped buffers being validated. --- src/gallium/drivers/nv40/nv40_draw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 7c5e0df5b85..d05e5ad1936 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -236,6 +236,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, if (!nv40_state_validate_swtnl(nv40)) return FALSE; + nv40->dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); for (i = 0; i < PIPE_ATTRIB_MAX; i++) { -- cgit v1.2.3 From 3be8785e08128bc2821c0cdff97f7adbb46c745b Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 23:09:01 +1100 Subject: nv40: add 16-bit SSCALED vb formats --- src/gallium/drivers/nv40/nv40_vbo.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 4a5b8119390..4fae10f74b4 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -28,6 +28,12 @@ nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) case PIPE_FORMAT_R8G8B8A8_UNORM: *fmt = NV40TCL_VTXFMT_TYPE_UBYTE; break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = 5; + break; default: pf_sprint_name(fs, pipe); NOUVEAU_ERR("Unknown format %s\n", fs); @@ -37,18 +43,22 @@ nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) switch (pipe) { case PIPE_FORMAT_R8_UNORM: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_SSCALED: *ncomp = 1; break; case PIPE_FORMAT_R8G8_UNORM: case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: *ncomp = 2; break; case PIPE_FORMAT_R8G8B8_UNORM: case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: *ncomp = 3; break; case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: *ncomp = 4; break; default: -- cgit v1.2.3 From 75b85fd33abe143d9cca6f8405f0a4243b6a5ddb Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 21 Mar 2008 23:24:16 +1100 Subject: nv40: fix bug in query code --- src/gallium/drivers/nv40/nv40_query.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index a0a30724066..15961591b96 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -45,6 +45,15 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64 tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) assert(0); nv40->nvws->notifier_reset(nv40->screen->query, q->object->start); -- cgit v1.2.3 From b70a6babfbc035d64dbe35ac4bf9218e8232b435 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 21 Mar 2008 10:03:23 -0600 Subject: i915: added to-do note about setting the max_lod register to get proper min/mag filter selection --- src/gallium/drivers/i915simple/i915_state_sampler.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 9dbb1b1b238..84f6529a3ad 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -206,6 +206,12 @@ i915_update_texture(struct i915_context *i915, uint unit, | format | MS3_USE_FENCE_REGS); + /* + * XXX sampler->max_lod should be used to program the MAX_LOD field below. + * Also, when min_filter != mag_filter and there's just one mipmap level, + * set max_lod = 1 to make sure i915 chooses between min/mag filtering. + */ + /* MS4 state */ state[1] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) -- cgit v1.2.3 From 2902c164a22b6bcb6a42d7cd7fa82b608875093b Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Fri, 21 Mar 2008 10:23:52 -0700 Subject: cell: Remove unnecessary default_blend work-around I suspect that there was some other bug in the blend code-gen that made this work-around necessary. --- src/gallium/drivers/cell/spu/spu_main.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 41bebf5362b..0a490ab277f 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -64,21 +64,6 @@ static unsigned char depth_stencil_code_buffer[4 * 64] static unsigned char fb_blend_code_buffer[4 * 64] ALIGN16_ATTRIB; -static struct spu_blend_results -default_blend(qword frag_r, qword frag_g, qword frag_b, qword frag_a, - qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, - qword frag_mask) -{ - struct spu_blend_results result; - - result.r = si_selb(pixel_r, frag_r, frag_mask); - result.g = si_selb(pixel_g, frag_g, frag_mask); - result.b = si_selb(pixel_b, frag_b, frag_mask); - result.a = si_selb(pixel_a, frag_a, frag_mask); - - return result; -} - /** * Tell the PPU that this SPU has finished copying a buffer to @@ -285,9 +270,6 @@ cmd_state_blend(const struct cell_command_blend *state) spu.blend = (blend_func) fb_blend_code_buffer; spu.read_fb = state->read_fb; } else { - /* If there is no code, use the default; - */ - spu.blend = default_blend; spu.read_fb = FALSE; } } @@ -622,9 +604,6 @@ one_time_init(void) memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); - - spu.blend = default_blend; - spu.read_fb = FALSE; } -- cgit v1.2.3 From f140062b72ee2df05020d86abdc47336262494f9 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Fri, 21 Mar 2008 10:25:58 -0700 Subject: Tabs to spaces --- .../drivers/cell/ppu/cell_state_per_fragment.c | 54 +++++++++++----------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 988c251e205..f353aeab0a8 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -72,8 +72,8 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, int tmp_a = spe_allocate_available_register(f); int tmp_b = spe_allocate_available_register(f); union { - float f; - unsigned u; + float f; + unsigned u; } ref_val; boolean complement = FALSE; @@ -84,42 +84,42 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, switch (dsa->alpha.func) { case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ + complement = TRUE; + /* FALLTHROUGH */ case PIPE_FUNC_EQUAL: - spe_fceq(f, tmp_a, ref, alphas); - break; + spe_fceq(f, tmp_a, ref, alphas); + break; case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ + complement = TRUE; + /* FALLTHROUGH */ case PIPE_FUNC_GREATER: - spe_fcgt(f, tmp_a, ref, alphas); - break; + spe_fcgt(f, tmp_a, ref, alphas); + break; case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ + complement = TRUE; + /* FALLTHROUGH */ case PIPE_FUNC_GEQUAL: - spe_fcgt(f, tmp_a, ref, alphas); - spe_fceq(f, tmp_b, ref, alphas); - spe_or(f, tmp_a, tmp_b, tmp_a); - break; + spe_fcgt(f, tmp_a, ref, alphas); + spe_fceq(f, tmp_b, ref, alphas); + spe_or(f, tmp_a, tmp_b, tmp_a); + break; case PIPE_FUNC_ALWAYS: case PIPE_FUNC_NEVER: default: - assert(0); - break; + assert(0); + break; } if (complement) { - spe_andc(f, mask, mask, tmp_a); + spe_andc(f, mask, mask, tmp_a); } else { - spe_and(f, mask, mask, tmp_a); + spe_and(f, mask, mask, tmp_a); } spe_release_register(f, ref); @@ -674,7 +674,7 @@ emit_alpha_factor_calculation(struct spe_function *f, static void emit_color_factor_calculation(struct spe_function *f, unsigned sF, unsigned mask, - const struct pipe_blend_color *blend_color, + const struct pipe_blend_color *blend_color, const int *src, const int *dst, int *factor) @@ -754,10 +754,10 @@ emit_color_factor_calculation(struct spe_function *f, /* FALLTHROUGH */ case PIPE_BLENDFACTOR_CONST_COLOR: for (i = 0; i < 3; i++) { - factor[i] = spe_allocate_available_register(f); + factor[i] = spe_allocate_available_register(f); - spe_il(f, factor[i], color.u[i] & 0x0ffff); - spe_ilh(f, factor[i], color.u[i] >> 16); + spe_il(f, factor[i], color.u[i] & 0x0ffff); + spe_ilh(f, factor[i], color.u[i] >> 16); } break; @@ -946,7 +946,7 @@ emit_blend_calculation(struct spe_function *f, */ void cell_generate_alpha_blend(struct cell_blend_state *cb, - const struct pipe_blend_color *blend_color) + const struct pipe_blend_color *blend_color) { struct pipe_blend_state *const b = &cb->base; struct spe_function *const f = &cb->code; @@ -1054,7 +1054,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, */ if (((b->colormask & 8) != 0) && need_alpha_factor) { src_factor[3] = emit_alpha_factor_calculation(f, sF[3], - blend_color->color[3], + blend_color->color[3], frag[3], pixel[3]); /* If the alpha destination blend factor is the same as the alpha source @@ -1063,7 +1063,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, dst_factor[3] = (dF[3] == sF[3]) ? src_factor[3] : emit_alpha_factor_calculation(f, dF[3], - blend_color->color[3], + blend_color->color[3], frag[3], pixel[3]); } -- cgit v1.2.3 From 600499cf888fee9a91ff3106beca939ea0c7b2bd Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Fri, 21 Mar 2008 11:15:49 -0700 Subject: cell: Change code-gen for CONST_COLOR blend factor Previously the constant color blend factor was compiled into the generated code. This meant that the code had to be regenerated each time the constant color was changed. This doesn't fit with the model used in Gallium. As-is, the code could be better. The constant color is loaded for every quad processed, even if it is not used. Also, if a lot of (1-x) blend factors are used, 1.0 will be loaded and reloaded into registers many times. --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- .../drivers/cell/ppu/cell_state_per_fragment.c | 93 +++++++++++----------- .../drivers/cell/ppu/cell_state_per_fragment.h | 3 +- src/gallium/drivers/cell/spu/spu_main.h | 2 + src/gallium/drivers/cell/spu/spu_tri.c | 2 + 5 files changed, 53 insertions(+), 49 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 86fcdcff1f6..d956d6fad4d 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -63,7 +63,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *state) draw_flush(cell->draw); if ((blend != NULL) && (blend->code.store == NULL)) { - cell_generate_alpha_blend(blend, &cell->blend_color); + cell_generate_alpha_blend(blend); } cell->blend = blend; diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index f353aeab0a8..c750b1d89dc 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -588,19 +588,13 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) */ static int emit_alpha_factor_calculation(struct spe_function *f, - unsigned factor, float const_alpha, - int src_alpha, int dst_alpha) + unsigned factor, + int src_alpha, int dst_alpha, int const_alpha) { - union { - float f; - unsigned u; - } alpha; int factor_reg; int tmp; - alpha.f = const_alpha; - switch (factor) { case PIPE_BLENDFACTOR_ONE: factor_reg = -1; @@ -621,13 +615,17 @@ emit_alpha_factor_calculation(struct spe_function *f, break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - const_alpha = 1.0 - const_alpha; - /* FALLTHROUGH */ - case PIPE_BLENDFACTOR_CONST_ALPHA: factor_reg = spe_allocate_available_register(f); - spe_il(f, factor_reg, alpha.u & 0x0ffff); - spe_ilh(f, factor_reg, alpha.u >> 16); + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, const_alpha); + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor_reg = const_alpha; break; case PIPE_BLENDFACTOR_ZERO: @@ -674,24 +672,15 @@ emit_alpha_factor_calculation(struct spe_function *f, static void emit_color_factor_calculation(struct spe_function *f, unsigned sF, unsigned mask, - const struct pipe_blend_color *blend_color, const int *src, const int *dst, + const int *const_color, int *factor) { - union { - float f[4]; - unsigned u[4]; - } color; int tmp; unsigned i; - color.f[0] = blend_color->color[0]; - color.f[1] = blend_color->color[1]; - color.f[2] = blend_color->color[2]; - color.f[3] = blend_color->color[3]; - factor[0] = -1; factor[1] = -1; factor[2] = -1; @@ -748,29 +737,40 @@ emit_color_factor_calculation(struct spe_function *f, break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: - color.f[0] = 1.0 - color.f[0]; - color.f[1] = 1.0 - color.f[1]; - color.f[2] = 1.0 - color.f[2]; - /* FALLTHROUGH */ - case PIPE_BLENDFACTOR_CONST_COLOR: + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + for (i = 0; i < 3; i++) { factor[i] = spe_allocate_available_register(f); - spe_il(f, factor[i], color.u[i] & 0x0ffff); - spe_ilh(f, factor[i], color.u[i] >> 16); + spe_fs(f, factor[i], tmp, const_color[i]); + } + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_COLOR: + for (i = 0; i < 3; i++) { + factor[i] = const_color[i]; } break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - color.f[3] = 1.0 - color.f[3]; - /* FALLTHROUGH */ - case PIPE_BLENDFACTOR_CONST_ALPHA: factor[0] = spe_allocate_available_register(f); factor[1] = factor[0]; factor[2] = factor[0]; - spe_il(f, factor[0], color.u[3] & 0x0ffff); - spe_ilh(f, factor[0], color.u[3] >> 16); + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, const_color[3]); + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor[0] = const_color[3]; + factor[1] = factor[0]; + factor[2] = factor[0]; break; case PIPE_BLENDFACTOR_ZERO: @@ -945,8 +945,7 @@ emit_blend_calculation(struct spe_function *f, * Generate code to perform alpha blending on the SPE */ void -cell_generate_alpha_blend(struct cell_blend_state *cb, - const struct pipe_blend_color *blend_color) +cell_generate_alpha_blend(struct cell_blend_state *cb) { struct pipe_blend_state *const b = &cb->base; struct spe_function *const f = &cb->code; @@ -972,7 +971,13 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, spe_allocate_register(f, 9), spe_allocate_register(f, 10), }; - const int mask = spe_allocate_register(f, 11); + const int const_color[4] = { + spe_allocate_register(f, 11), + spe_allocate_register(f, 12), + spe_allocate_register(f, 13), + spe_allocate_register(f, 14), + }; + const int mask = spe_allocate_register(f, 15); unsigned func[4]; unsigned sF[4]; unsigned dF[4]; @@ -1053,8 +1058,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, * the alpha factor, calculate the alpha factor. */ if (((b->colormask & 8) != 0) && need_alpha_factor) { - src_factor[3] = emit_alpha_factor_calculation(f, sF[3], - blend_color->color[3], + src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], frag[3], pixel[3]); /* If the alpha destination blend factor is the same as the alpha source @@ -1062,8 +1066,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, */ dst_factor[3] = (dF[3] == sF[3]) ? src_factor[3] - : emit_alpha_factor_calculation(f, dF[3], - blend_color->color[3], + : emit_alpha_factor_calculation(f, dF[3], const_color[3], frag[3], pixel[3]); } @@ -1080,8 +1083,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, emit_color_factor_calculation(f, b->rgb_src_factor, b->colormask, - blend_color, - frag, pixel, src_factor); + frag, pixel, const_color, src_factor); } @@ -1101,8 +1103,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb, emit_color_factor_calculation(f, b->rgb_dst_factor, b->colormask, - blend_color, - frag, pixel, dst_factor); + frag, pixel, const_color, dst_factor); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h index 541c3b3be07..f699247f9e4 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -29,7 +29,6 @@ extern void cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); extern void -cell_generate_alpha_blend(struct cell_blend_state *cb, - const struct pipe_blend_color *blend_color); +cell_generate_alpha_blend(struct cell_blend_state *cb); #endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 56d0968676b..49f5d99674a 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -77,6 +77,7 @@ struct spu_blend_results { typedef struct spu_blend_results (*blend_func)( qword frag_r, qword frag_g, qword frag_b, qword frag_a, qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, + qword const_r, qword const_g, qword const_b, qword const_a, qword frag_mask); struct spu_framebuffer { @@ -108,6 +109,7 @@ struct spu_global boolean read_fb; blend_func blend; + qword const_blend_color[4] ALIGN16_ATTRIB; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct cell_command_texture texture; diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index c4272d6e93c..e6a1ce01dfd 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -356,6 +356,8 @@ emit_quad( int x, int y, mask_t mask ) const struct spu_blend_results result = (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3], + spu.const_blend_color[0], spu.const_blend_color[1], + spu.const_blend_color[2], spu.const_blend_color[3], (qword) mask); -- cgit v1.2.3 From 47531442e9c89c3ca764e9be225cfaec388609a1 Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Fri, 21 Mar 2008 11:20:49 -0700 Subject: cell: Generate blend / depth test code when state atom is created Code generation should be performed when the device-specific state atom is created, not when it is bound. --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 30 ++++++++------------------ 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index d956d6fad4d..00f4be7401e 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -1,5 +1,5 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /* Authors: @@ -47,7 +47,7 @@ cell_create_blend_state(struct pipe_context *pipe, struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state)); (void) memcpy(cb, blend, sizeof(*blend)); - cb->code.store = NULL; + cell_generate_alpha_blend(cb); return cb; } @@ -57,16 +57,10 @@ static void cell_bind_blend_state(struct pipe_context *pipe, void *state) { struct cell_context *cell = cell_context(pipe); - struct cell_blend_state *blend = (struct cell_blend_state *) state; - draw_flush(cell->draw); - if ((blend != NULL) && (blend->code.store == NULL)) { - cell_generate_alpha_blend(blend); - } - - cell->blend = blend; + cell->blend = (struct cell_blend_state *) state; cell->dirty |= CELL_NEW_BLEND; } @@ -105,7 +99,7 @@ cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, MALLOC(sizeof(struct cell_depth_stencil_alpha_state)); (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil)); - cdsa->code.store = NULL; + cell_generate_depth_stencil_test(cdsa); return cdsa; } @@ -116,16 +110,11 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth_stencil) { struct cell_context *cell = cell_context(pipe); - struct cell_depth_stencil_alpha_state *cdsa = - (struct cell_depth_stencil_alpha_state *) depth_stencil; draw_flush(cell->draw); - if ((cdsa != NULL) && (cdsa->code.store == NULL)) { - cell_generate_depth_stencil_test(cdsa); - } - - cell->depth_stencil = cdsa; + cell->depth_stencil = + (struct cell_depth_stencil_alpha_state *) depth_stencil; cell->dirty |= CELL_NEW_DEPTH_STENCIL; } @@ -362,4 +351,3 @@ cell_init_state_functions(struct cell_context *cell) cell->pipe.set_scissor_state = cell_set_scissor_state; cell->pipe.set_viewport_state = cell_set_viewport_state; } - -- cgit v1.2.3 From 2f8f6c2918e721f6525b2124cde053b84beecafe Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Sat, 22 Mar 2008 09:15:23 -0600 Subject: gallium: remove temporary _screen suffix from function names --- src/gallium/drivers/softpipe/sp_texture.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index a98b3b1a4a8..256586ec886 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -81,8 +81,8 @@ softpipe_texture_layout(struct softpipe_texture * spt) static struct pipe_texture * -softpipe_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) +softpipe_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) { struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); @@ -110,8 +110,8 @@ softpipe_texture_create_screen(struct pipe_screen *screen, static void -softpipe_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) +softpipe_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -136,9 +136,9 @@ softpipe_texture_release_screen(struct pipe_screen *screen, static struct pipe_surface * -softpipe_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) +softpipe_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) { struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = softpipe_texture(pt); @@ -197,7 +197,7 @@ softpipe_init_texture_funcs( struct softpipe_context *softpipe ) void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { - screen->texture_create = softpipe_texture_create_screen; - screen->texture_release = softpipe_texture_release_screen; - screen->get_tex_surface = softpipe_get_tex_surface_screen; + screen->texture_create = softpipe_texture_create; + screen->texture_release = softpipe_texture_release; + screen->get_tex_surface = softpipe_get_tex_surface; } -- cgit v1.2.3 From 62a8e7685f0567052f50a2b9aaa64054e5dfa0c6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 23 Mar 2008 14:29:35 +1100 Subject: nv40: add dxtn formats (disabled) --- src/gallium/drivers/nv40/nv40_fragtex.c | 8 ++++---- src/gallium/drivers/nv40/nv40_screen.c | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 436f954ceca..980705f94ad 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -30,10 +30,10 @@ nv40_texture_formats[] = { _(U_A8_L8 , A8L8 , S1, S1, S1, S1, X, X, X, Y), _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), -// _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT1 , 0x86, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT3 , 0x87, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT5 , 0x88, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), {}, }; diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index bf30fbeca19..75b965bb9d1 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -110,6 +110,12 @@ nv40_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_U_A8_L8: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z24S8_UNORM: +#if 0 /* state tracker not up to the task just yet. */ + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: +#endif return TRUE; default: break; -- cgit v1.2.3 From 01cb2cd93efe7ad94d7fd36aa5a776c2e3ab4c7d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 23 Mar 2008 16:11:03 +1100 Subject: nv40: workaround fp result.data[0] clobberage Temporary, the fp reg handling will get reworked at some point in the near future. But before that, there's a few bugs to find. --- src/gallium/drivers/nv40/nv40_fragprog.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 2d82f86ef5c..4e425366985 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -58,7 +58,7 @@ temp(struct nv40_fpc *fpc) int idx; idx = fpc->temp_temp_count++; - idx += fpc->high_temp + 1; + idx += fpc->high_temp + 2; return nv40_sr(NV40SR_TEMP, idx); } @@ -817,7 +817,9 @@ nv40_fragprog_upload(struct nv40_context *nv40, #if 0 for (i = 0; i < fp->insn_len; i++) { + fflush(stdout); fflush(stderr); NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + fflush(stdout); fflush(stderr); } #endif -- cgit v1.2.3 From ba223e91df8f372a983e99c453947e4340d7d884 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 23 Mar 2008 16:26:42 +1100 Subject: nv40: fp: fix multiple refs to a single const withing an instruction --- src/gallium/drivers/nv40/nv40_fragprog.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 4e425366985..4cdbf962ab4 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -41,6 +41,7 @@ struct nv40_fpc { uint colour_id; unsigned inst_offset; + unsigned have_const; struct { int pipe; @@ -113,7 +114,11 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sr |= (src.index << NV40_FP_REG_SRC_SHIFT); break; case NV40SR_CONST: - grow_insns(fpc, 4); + if (!fpc->have_const) { + grow_insns(fpc, 4); + fpc->have_const = 1; + } + hw = &fp->insn[fpc->inst_offset]; if (fpc->consts[src.index].pipe >= 0) { struct nv40_fragment_program_data *fpd; @@ -190,6 +195,7 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, uint32_t *hw; fpc->inst_offset = fp->insn_len; + fpc->have_const = 0; grow_insns(fpc, 4); hw = &fp->insn[fpc->inst_offset]; memset(hw, 0, sizeof(uint32_t) * 4); -- cgit v1.2.3 From 3158035154915ae11bebca045e3f0ce3b0e264ee Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 23 Mar 2008 16:31:31 +1100 Subject: nv40: catch fp extra-const cases where both const and immd have same idx --- src/gallium/drivers/nv40/nv40_fragprog.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 4cdbf962ab4..c854ae89f74 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -384,7 +384,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); struct nv40_sreg src[3], dst, tmp; int mask, sat, unit; - int ai = -1, ci = -1; + int ai = -1, ci = -1, ii = -1; int i; if (finst->Instruction.Opcode == TGSI_OPCODE_END) @@ -428,8 +428,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } break; case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { + if ((ci == -1 && ii == -1) || + ci == fsrc->SrcRegister.Index) { ci = fsrc->SrcRegister.Index; src[i] = tgsi_src(fpc, fsrc); } else { @@ -438,6 +438,17 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, tgsi_src(fpc, fsrc), none, none); } break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->SrcRegister.Index) { + ii = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; case TGSI_FILE_TEMPORARY: /* handled above */ break; -- cgit v1.2.3 From 4dfcf912964f7c14c877abde04255a3896fb903d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 23 Mar 2008 18:11:49 +1100 Subject: nv40: remove some badness from fp temp/result allocation Should hopefully be OK now (on the fragprog size) for MRT. The hack from a commit 01cb2cd93efe7ad94d7fd36aa5a776c2e3ab4c7d is no longer needed. --- src/gallium/drivers/nv40/nv40_fragprog.c | 158 +++++++++++++++++++++---------- 1 file changed, 107 insertions(+), 51 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index c854ae89f74..6d8a29ed9c0 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -33,12 +33,12 @@ struct nv40_fpc { uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - int high_temp; - int temp_temp_count; - int num_regs; + unsigned r_temps; + unsigned r_temps_discard; + struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nv40_sreg *r_temp; - uint depth_id; - uint colour_id; + int num_regs; unsigned inst_offset; unsigned have_const; @@ -56,13 +56,26 @@ struct nv40_fpc { static INLINE struct nv40_sreg temp(struct nv40_fpc *fpc) { - int idx; + int idx = ffs(~fpc->r_temps) - 1; - idx = fpc->temp_temp_count++; - idx += fpc->high_temp + 2; + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nv40_sr(NV40SR_TEMP, 0); + } + + fpc->r_temps |= (1 << idx); + fpc->r_temps_discard |= (1 << idx); return nv40_sr(NV40SR_TEMP, idx); } +static INLINE void +release_temps(struct nv40_fpc *fpc) +{ + fpc->r_temps &= ~fpc->r_temps_discard; + fpc->r_temps_discard = 0; +} + static INLINE struct nv40_sreg constant(struct nv40_fpc *fpc, int pipe, float vals[4]) { @@ -254,18 +267,11 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) src = fpc->imm[fsrc->SrcRegister.Index]; break; case TGSI_FILE_TEMPORARY: - src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index + 1); - if (fpc->high_temp < src.index) - fpc->high_temp = src.index; + src = fpc->r_temp[fsrc->SrcRegister.Index]; break; - /* This is clearly insane, but gallium hands us shaders like this. - * Luckily fragprog results are just temp regs.. - */ + /* NV40 fragprog result regs are just temps, so this is simple */ case TGSI_FILE_OUTPUT: - if (fsrc->SrcRegister.Index == fpc->colour_id) - return nv40_sr(NV40SR_OUTPUT, 0); - else - return nv40_sr(NV40SR_OUTPUT, 1); + src = fpc->r_result[fsrc->SrcRegister.Index]; break; default: NOUVEAU_ERR("bad src file\n"); @@ -283,20 +289,11 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) static INLINE struct nv40_sreg tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - int idx; - switch (fdst->DstRegister.File) { case TGSI_FILE_OUTPUT: - if (fdst->DstRegister.Index == fpc->colour_id) - return nv40_sr(NV40SR_OUTPUT, 0); - else - return nv40_sr(NV40SR_OUTPUT, 1); - break; + return fpc->r_result[fdst->DstRegister.Index]; case TGSI_FILE_TEMPORARY: - idx = fdst->DstRegister.Index + 1; - if (fpc->high_temp < idx) - fpc->high_temp = idx; - return nv40_sr(NV40SR_TEMP, idx); + return fpc->r_temp[fdst->DstRegister.Index]; case TGSI_FILE_NULL: return nv40_sr(NV40SR_NONE, 0); default: @@ -390,7 +387,6 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, if (finst->Instruction.Opcode == TGSI_OPCODE_END) return TRUE; - fpc->temp_temp_count = 0; for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; @@ -670,6 +666,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, return FALSE; } + release_temps(fpc); return TRUE; } @@ -719,45 +716,50 @@ static boolean nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, const struct tgsi_full_declaration *fdec) { + unsigned idx = fdec->u.DeclarationRange.First; + unsigned hw; + switch (fdec->Semantic.SemanticName) { case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->u.DeclarationRange.First; + hw = 1; break; case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->u.DeclarationRange.First; + switch (idx) { + case 0: hw = 0; break; + case 1: hw = 2; break; + case 2: hw = 3; break; + case 3: hw = 4; break; + default: + NOUVEAU_ERR("bad rcol index\n"); + return FALSE; + } break; default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; } + fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + fpc->r_temps |= (1 << hw); return TRUE; } -static void -nv40_fragprog_translate(struct nv40_context *nv40, - struct nv40_fragment_program *fp) +static boolean +nv40_fragprog_prepare(struct nv40_fpc *fpc) { - struct tgsi_parse_context parse; - struct nv40_fpc *fpc = NULL; + struct tgsi_parse_context p; + int high_temp = -1, i; - fpc = CALLOC(1, sizeof(struct nv40_fpc)); - if (!fpc) - return; - fpc->fp = fp; - fpc->high_temp = -1; - fpc->num_regs = 2; + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; - tgsi_parse_init(&parse, fp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { + tgsi_parse_token(&p); + switch(tok->Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; + fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { case TGSI_FILE_INPUT: if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) @@ -767,6 +769,12 @@ nv40_fragprog_translate(struct nv40_context *nv40, if (!nv40_fragprog_parse_decl_output(fpc, fdec)) goto out_err; break; + case TGSI_FILE_TEMPORARY: + if (fdec->u.DeclarationRange.Last > high_temp) { + high_temp = + fdec->u.DeclarationRange.Last; + } + break; default: break; } @@ -777,7 +785,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, struct tgsi_full_immediate *imm; float vals[4]; - imm = &parse.FullToken.FullImmediate; + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); @@ -788,6 +796,52 @@ nv40_fragprog_translate(struct nv40_context *nv40, fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); } break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0; + } + + return TRUE; + +out_err: + if (fpc->r_temp) + FREE(fpc->r_temp); + tgsi_parse_free(&p); + return FALSE; +} + +static void +nv40_fragprog_translate(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv40_fpc *fpc = NULL; + + fpc = CALLOC(1, sizeof(struct nv40_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->num_regs = 2; + + if (!nv40_fragprog_prepare(fpc)) { + FREE(fpc); + return; + } + + tgsi_parse_init(&parse, fp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: { const struct tgsi_full_instruction *finst; @@ -818,6 +872,8 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->translated = TRUE; out_err: tgsi_parse_free(&parse); + if (fpc->r_temp) + FREE(fpc->r_temp); FREE(fpc); } -- cgit v1.2.3 From e1ad8c232ba31985a6f9e5b76279f2f131312d1d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 23 Mar 2008 18:20:04 +1100 Subject: nv40: fix fp depth write --- src/gallium/drivers/nv40/nv40_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 6d8a29ed9c0..87bca41cf64 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -724,7 +724,7 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, hw = 1; break; case TGSI_SEMANTIC_COLOR: - switch (idx) { + switch (fdec->Semantic.SemanticIndex) { case 0: hw = 0; break; case 1: hw = 2; break; case 2: hw = 3; break; -- cgit v1.2.3 From 17491ea27ffa6b48e31c0ad6ad6f795dd000c476 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 23 Mar 2008 19:08:59 +1100 Subject: nv40: vp reg changes similar to recent fp changes --- src/gallium/drivers/nv40/nv40_vertprog.c | 106 +++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 385c8aa0780..e5ce8943752 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -42,10 +42,11 @@ struct nv40_vpc { struct nv40_vertex_program_exec *vpi; - unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; - - int high_temp; - int temp_temp_count; + unsigned r_temps; + unsigned r_temps_discard; + struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nv40_sreg *r_address; + struct nv40_sreg *r_temp; struct nv40_sreg *imm; unsigned nr_imm; @@ -54,13 +55,26 @@ struct nv40_vpc { static struct nv40_sreg temp(struct nv40_vpc *vpc) { - int idx; + int idx = ffs(~vpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nv40_sr(NV40SR_TEMP, 0); + } - idx = vpc->temp_temp_count++; - idx += vpc->high_temp + 1; + vpc->r_temps |= (1 << idx); + vpc->r_temps_discard |= (1 << idx); return nv40_sr(NV40SR_TEMP, idx); } +static INLINE void +release_temps(struct nv40_vpc *vpc) +{ + vpc->r_temps &= ~vpc->r_temps_discard; + vpc->r_temps_discard = 0; +} + static struct nv40_sreg constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) { @@ -257,9 +271,7 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { src = vpc->imm[fsrc->SrcRegister.Index]; break; case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->SrcRegister.Index) - vpc->high_temp = fsrc->SrcRegister.Index; - src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index); + src = vpc->r_temp[fsrc->SrcRegister.Index]; break; default: NOUVEAU_ERR("bad src file\n"); @@ -281,14 +293,13 @@ tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { switch (fdst->DstRegister.File) { case TGSI_FILE_OUTPUT: - dst = nv40_sr(NV40SR_OUTPUT, - vpc->output_map[fdst->DstRegister.Index]); - + dst = vpc->r_result[fdst->DstRegister.Index]; break; case TGSI_FILE_TEMPORARY: - dst = nv40_sr(NV40SR_TEMP, fdst->DstRegister.Index); - if (vpc->high_temp < dst.index) - vpc->high_temp = dst.index; + dst = vpc->r_temp[fdst->DstRegister.Index]; + break; + case TGSI_FILE_ADDRESS: + dst = vpc->r_address[fdst->DstRegister.Index]; break; default: NOUVEAU_ERR("bad dst file\n"); @@ -323,7 +334,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, if (finst->Instruction.Opcode == TGSI_OPCODE_END) return TRUE; - vpc->temp_temp_count = 0; for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; @@ -471,6 +481,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, return FALSE; } + release_temps(vpc); return TRUE; } @@ -478,6 +489,7 @@ static boolean nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, const struct tgsi_full_declaration *fdec) { + unsigned idx = fdec->u.DeclarationRange.First; int hw; switch (fdec->Semantic.SemanticName) { @@ -525,7 +537,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, return FALSE; } - vpc->output_map[fdec->u.DeclarationRange.First] = hw; + vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); return TRUE; } @@ -533,7 +545,7 @@ static boolean nv40_vertprog_prepare(struct nv40_vpc *vpc) { struct tgsi_parse_context p; - int nr_imm = 0; + int high_temp = -1, high_addr = -1, nr_imm = 0, i; tgsi_parse_init(&p, vpc->vp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { @@ -544,6 +556,48 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) case TGSI_TOKEN_TYPE_IMMEDIATE: nr_imm++; break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (fdec->u.DeclarationRange.Last > high_temp) { + high_temp = + fdec->u.DeclarationRange.Last; + } + break; +#if 0 /* this would be nice.. except gallium doesn't track it */ + case TGSI_FILE_ADDRESS: + if (fdec->u.DeclarationRange.Last > high_addr) { + high_addr = + fdec->u.DeclarationRange.Last; + } + break; +#endif + default: + break; + } + } + break; +#if 1 /* yay, parse instructions looking for address regs instead */ + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + const struct tgsi_full_dst_register *fdst; + + finst = &p.FullToken.FullInstruction; + fdst = &finst->FullDstRegisters[0]; + + if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { + if (fdst->DstRegister.Index > high_addr) + high_addr = fdst->DstRegister.Index; + } + + } + break; +#endif default: break; } @@ -555,6 +609,19 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) assert(vpc->imm); } + if (++high_temp) { + vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + for (i = 0; i < high_temp; i++) + vpc->r_temp[i] = temp(vpc); + } + + if (++high_addr) { + vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); + for (i = 0; i < high_addr; i++) + vpc->r_address[i] = temp(vpc); + } + + vpc->r_temps_discard = 0; return TRUE; } @@ -569,7 +636,6 @@ nv40_vertprog_translate(struct nv40_context *nv40, if (!vpc) return; vpc->vp = vp; - vpc->high_temp = -1; if (!nv40_vertprog_prepare(vpc)) { FREE(vpc); -- cgit v1.2.3 From 312cbc5a5c7416745976c2281a9bbce6e3332965 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sun, 23 Mar 2008 18:30:53 +0000 Subject: gallium: wrap decls in extern "C" --- src/gallium/drivers/softpipe/sp_winsys.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index fc372dba27b..291825dfe22 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -37,6 +37,12 @@ #include "pipe/p_compiler.h" /* for boolean */ + +#ifdef __cplusplus +extern "C" { +#endif + + enum pipe_format; struct softpipe_winsys { @@ -60,4 +66,8 @@ struct pipe_screen * softpipe_create_screen(struct pipe_winsys *); +#ifdef __cplusplus +} +#endif + #endif /* SP_WINSYS_H */ -- cgit v1.2.3 From 799d3bce06e998e51ad8df6bcadeb41bd061801e Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 24 Mar 2008 13:41:40 +1100 Subject: nv40: respect rasterizer cso bypass_clipping flag --- src/gallium/drivers/nv40/nv40_context.h | 1 + src/gallium/drivers/nv40/nv40_state_viewport.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 02ca20b8014..b50f6f8fefc 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -97,6 +97,7 @@ struct nv40_blend_state { struct nv40_state { unsigned scissor_enabled; unsigned stipple_enabled; + unsigned viewport_bypass; unsigned fp_samplers; uint64_t dirty; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 9e5c7a72a7f..1b6248e5b8e 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -5,8 +5,20 @@ nv40_state_viewport_validate(struct nv40_context *nv40) { struct nouveau_stateobj *so = so_new(11, 0); struct pipe_viewport_state *vpt = &nv40->viewport; + unsigned bypass; - if (nv40->render_mode == HW) { + if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv40->state.hw[NV40_STATE_VIEWPORT] && + (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) && + nv40->state.viewport_bypass == bypass) + return FALSE; + nv40->state.viewport_bypass = bypass; + + if (!bypass) { so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); so_data (so, fui(vpt->translate[0])); @@ -48,7 +60,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40) struct nv40_state_entry nv40_state_viewport = { .validate = nv40_state_viewport_validate, .dirty = { - .pipe = NV40_NEW_VIEWPORT, + .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST, .hw = NV40_STATE_VIEWPORT } }; -- cgit v1.2.3 From 5ce37d42b3268102caf8225be4ca18418bfab7c4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 24 Mar 2008 13:45:38 +1100 Subject: nv40: maintain pipe_surface status field --- src/gallium/drivers/nv40/nv40_clear.c | 1 + src/gallium/drivers/nv40/nv40_state_emit.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c index 2c4e8f01fda..59efd620e32 100644 --- a/src/gallium/drivers/nv40/nv40_clear.c +++ b/src/gallium/drivers/nv40/nv40_clear.c @@ -9,4 +9,5 @@ nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 056238cc836..a9a9abc9220 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -40,6 +40,14 @@ static void nv40_state_do_validate(struct nv40_context *nv40, struct nv40_state_entry **states) { + const struct pipe_framebuffer_state *fb = &nv40->framebuffer; + unsigned i; + + for (i = 0; i < fb->num_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + while (*states) { struct nv40_state_entry *e = *states; -- cgit v1.2.3 From ce64778ed1f436d81178862dc0032dfd16b4b7de Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 25 Mar 2008 12:57:36 +1100 Subject: nv40: respect do_flip in surface_copy() --- src/gallium/drivers/nv40/nv40_surface.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c index e8a60116964..c0d135eb36d 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -42,8 +42,20 @@ nv40_surface_copy(struct pipe_context *pipe, unsigned do_flip, struct nv40_context *nv40 = nv40_context(pipe); struct nouveau_winsys *nvws = nv40->nvws; - nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, - width, height); + if (do_flip) { + /*XXX: This dodgyness will do for now for correctness. But, + * need to investigate whether the 2D engine is able to + * manage a flip (perhaps SIFM?), if not, use the 3D engine + */ + desty += height; + while (height--) { + nvws->surface_copy(nvws, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + } else { + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); + } } static void -- cgit v1.2.3 From 9f7cd571e01569abd0898fdee62d0e0f946bf3a8 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 25 Mar 2008 19:18:56 -0600 Subject: gallium: added fragment emit/write debug counters --- src/gallium/drivers/softpipe/sp_prim_setup.c | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 6a81e4d8cc2..c7eb12b3bb7 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -44,6 +44,7 @@ #include "pipe/p_shader_tokens.h" #define DEBUG_VERTS 0 +#define DEBUG_FRAGS 0 /** * Triangle edge info @@ -92,6 +93,11 @@ struct setup_stage { unsigned y_flags; unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ } span; + +#if DEBUG_FRAGS + uint numFragsEmitted; /**< per primitive */ + uint numFragsWritten; /**< per primitive */ +#endif }; @@ -160,7 +166,20 @@ emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) setup->quad.x0 = x; setup->quad.y0 = y; setup->quad.mask = mask; +#if DEBUG_FRAGS + if (mask & 1) setup->numFragsEmitted++; + if (mask & 2) setup->numFragsEmitted++; + if (mask & 4) setup->numFragsEmitted++; + if (mask & 8) setup->numFragsEmitted++; +#endif sp->quad.first->run(sp->quad.first, &setup->quad); +#if DEBUG_FRAGS + mask = setup->quad.mask; + if (mask & 1) setup->numFragsWritten++; + if (mask & 2) setup->numFragsWritten++; + if (mask & 4) setup->numFragsWritten++; + if (mask & 8) setup->numFragsWritten++; +#endif } @@ -674,6 +693,11 @@ static void setup_tri( struct draw_stage *stage, debug_printf("%s\n", __FUNCTION__ ); */ +#if DEBUG_FRAGS + setup->numFragsEmitted = 0; + setup->numFragsWritten = 0; +#endif + setup_sort_vertices( setup, prim ); setup_tri_coefficients( setup ); setup_tri_edges( setup ); @@ -702,6 +726,12 @@ static void setup_tri( struct draw_stage *stage, } flush_spans( setup ); + +#if DEBUG_FRAGS + printf("Tri: %u frags emitted, %u written\n", + setup->numFragsEmitted, + setup->numFragsWritten); +#endif } -- cgit v1.2.3 From 4abe1eb980ed76d2b2d3383eaab520d0aa2ae6f4 Mon Sep 17 00:00:00 2001 From: Michel Dänzer <michel@tungstengraphics.com> Date: Wed, 26 Mar 2008 09:36:40 +0000 Subject: gallium: Change pipe->flush() interface to optionally return a fence. The cell driver still uses an internal CELL_FLUSH_WAIT flag, in the long run proper fencing should be implemented for it. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- src/gallium/drivers/cell/ppu/cell_flush.c | 15 +++++++--- src/gallium/drivers/cell/ppu/cell_flush.h | 5 +++- src/gallium/drivers/cell/ppu/cell_vbuf.c | 2 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 2 +- src/gallium/drivers/failover/fo_context.c | 4 +-- src/gallium/drivers/i915simple/i915_batch.h | 4 +-- src/gallium/drivers/i915simple/i915_blit.c | 4 +-- src/gallium/drivers/i915simple/i915_flush.c | 14 +++------- src/gallium/drivers/i915simple/i915_prim_emit.c | 2 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_state_emit.c | 2 +- src/gallium/drivers/i915simple/i915_winsys.h | 5 ++-- src/gallium/drivers/i965simple/brw_flush.c | 13 ++------- src/gallium/drivers/softpipe/sp_flush.c | 10 +++---- src/gallium/drivers/softpipe/sp_flush.h | 4 ++- src/gallium/include/pipe/p_context.h | 5 ++-- src/gallium/include/pipe/p_defines.h | 3 +- src/gallium/winsys/dri/intel/intel_context.c | 4 +-- src/gallium/winsys/dri/intel/intel_winsys_i915.c | 25 ++++++++++------- src/gallium/winsys/xlib/xm_winsys.c | 31 +++++++++++++++++++++ src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 4 +-- src/mesa/state_tracker/st_cb_fbo.c | 2 +- src/mesa/state_tracker/st_cb_flush.c | 34 +++++++++++++---------- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_framebuffer.c | 3 +- src/mesa/state_tracker/st_public.h | 5 +++- 28 files changed, 127 insertions(+), 83 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index e129c062be6..ed12768e7f8 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -935,7 +935,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, 4, /* verts */ 2); /* attribs/vert */ - pipe->flush(pipe, PIPE_FLUSH_WAIT); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* need to signal that the texture has changed _after_ rendering to it */ pipe->texture_update(pipe, pt, face, (1 << dstLevel)); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 66a5627d844..3aaf3de6684 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -35,12 +35,19 @@ void -cell_flush(struct pipe_context *pipe, unsigned flags) +cell_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) { struct cell_context *cell = cell_context(pipe); + if (fence) { + *fence = NULL; + /* XXX: Implement real fencing */ + flags |= CELL_FLUSH_WAIT; + } + if (flags & PIPE_FLUSH_SWAPBUFFERS) - flags |= PIPE_FLUSH_WAIT; + flags |= CELL_FLUSH_WAIT; draw_flush( cell->draw ); cell_flush_int(pipe, flags); @@ -58,7 +65,7 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags) ASSERT(!flushing); flushing = TRUE; - if (flags & PIPE_FLUSH_WAIT) { + if (flags & CELL_FLUSH_WAIT) { uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); *cmd = CELL_CMD_FINISH; } @@ -72,7 +79,7 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags) } #endif - if (flags & PIPE_FLUSH_WAIT) { + if (flags & CELL_FLUSH_WAIT) { /* Wait for ack */ for (i = 0; i < cell->num_spus; i++) { uint k = wait_mbox_message(cell_global.spe_contexts[i]); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h index 7f940ae76b6..8f0645c4293 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -29,8 +29,11 @@ #ifndef CELL_FLUSH #define CELL_FLUSH +#define CELL_FLUSH_WAIT 0x80000000 + extern void -cell_flush(struct pipe_context *pipe, unsigned flags); +cell_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); extern void cell_flush_int(struct pipe_context *pipe, unsigned flags); diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index cc727ff4edb..3a181b585c5 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -243,7 +243,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, #if 0 /* helpful for debug */ - cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT); + cell_flush_int(&cell->pipe, CELL_FLUSH_WAIT); #endif } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index f5c27852c14..b418857ccd8 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -133,7 +133,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) vs->num_elts = n; send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); - cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); + cell_flush_int(& cell->pipe, CELL_FLUSH_WAIT); } draw->vs.post_nr = draw->vs.queue_nr; diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index afc0d7eb1ec..cb95ba516f9 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -69,7 +69,7 @@ static boolean failover_draw_elements( struct pipe_context *pipe, start, count )) { - failover->hw->flush( failover->hw, ~0 ); + failover->hw->flush( failover->hw, ~0, NULL ); failover->mode = FO_SW; } } @@ -92,7 +92,7 @@ static boolean failover_draw_elements( struct pipe_context *pipe, * intervening flush. Unlikely to be much performance impact to * this: */ - failover->sw->flush( failover->sw, ~0 ); + failover->sw->flush( failover->sw, ~0, NULL ); } return TRUE; diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index fb88cd6db0d..4ea06ce02bf 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -44,9 +44,9 @@ #define ADVANCE_BATCH() -#define FLUSH_BATCH() do { \ +#define FLUSH_BATCH(fence) do { \ if (0) i915_dump_batchbuffer( i915 ); \ - i915->winsys->batch_flush( i915->winsys ); \ + i915->winsys->batch_flush( i915->winsys, fence ); \ i915->batch_start = NULL; \ i915->hardware_dirty = ~0; \ } while (0) diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c index db4671ff553..24449e3fb33 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -70,7 +70,7 @@ i915_fill_blit(struct i915_context *i915, if (!BEGIN_BATCH(6, 1)) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); assert(BEGIN_BATCH(6, 1)); } OUT_BATCH(CMD); @@ -145,7 +145,7 @@ i915_copy_blit( struct i915_context *i915, if (!BEGIN_BATCH(8, 2)) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); assert(BEGIN_BATCH(8, 2)); } OUT_BATCH(CMD); diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index 96a54281f11..7d23e6b6b90 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -37,11 +37,9 @@ #include "i915_batch.h" -/** - * In future we may want a fence-like interface instead of finish. - */ static void i915_flush( struct pipe_context *pipe, - unsigned flags ) + unsigned flags, + struct pipe_fence_handle **fence ) { struct i915_context *i915 = i915_context(pipe); @@ -60,7 +58,7 @@ static void i915_flush( struct pipe_context *pipe, flush |= FLUSH_MAP_CACHE; if (!BEGIN_BATCH(1, 0)) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); assert(BEGIN_BATCH(1, 0)); } OUT_BATCH( flush ); @@ -69,11 +67,7 @@ static void i915_flush( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ - FLUSH_BATCH(); - - if (flags & PIPE_FLUSH_WAIT) { - i915->winsys->batch_finish(i915->winsys); - } + FLUSH_BATCH(fence); } diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index d8de5178f60..b6fb0a6d88f 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -140,7 +140,7 @@ emit_prim( struct draw_stage *stage, assert(vertex_size >= 12); /* never smaller than 12 bytes */ if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index eb64f51943b..7fb2adbb53b 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -161,7 +161,7 @@ i915_vbuf_render_draw( struct vbuf_render *render, i915_emit_hardware_state( i915 ); if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index a7498d22b7e..6f947d43468 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -115,7 +115,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) #endif if(!BEGIN_BATCH(dwords, relocs)) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); assert(BEGIN_BATCH(dwords, relocs)); } diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index aea30032818..5e16543f4ec 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -56,6 +56,7 @@ extern "C" { */ struct pipe_buffer; +struct pipe_fence_handle; struct pipe_winsys; struct pipe_screen; @@ -103,8 +104,8 @@ struct i915_winsys { unsigned access_flags, unsigned delta ); - void (*batch_flush)( struct i915_winsys *sws ); - void (*batch_finish)( struct i915_winsys *sws ); + void (*batch_flush)( struct i915_winsys *sws, + struct pipe_fence_handle **fence ); }; #define I915_BUFFER_ACCESS_WRITE 0x1 diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c index 5216c680cf6..e6001c30d94 100644 --- a/src/gallium/drivers/i965simple/brw_flush.c +++ b/src/gallium/drivers/i965simple/brw_flush.c @@ -36,14 +36,11 @@ #include "brw_batch.h" -/** - * In future we may want a fence-like interface instead of finish. - */ static void brw_flush( struct pipe_context *pipe, - unsigned flags ) + unsigned flags, + struct pipe_fence_handle **fence ) { struct brw_context *brw = brw_context(pipe); - struct pipe_fence_handle *fence; /* Do we need to emit an MI_FLUSH command to flush the hardware * caches? @@ -65,11 +62,7 @@ static void brw_flush( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ - FLUSH_BATCH( &fence ); - - if (flags & PIPE_FLUSH_WAIT) { -// brw->winsys->wait_fence(brw->winsys, fence); - } + FLUSH_BATCH( fence ); } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 2cbd0d7cabe..0625b69099b 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -40,13 +40,10 @@ #include "sp_winsys.h" -/* There will be actual work to do here. In future we may want a - * fence-like interface instead of finish, and perhaps flush will take - * flags to indicate what type of flush is required. - */ void softpipe_flush( struct pipe_context *pipe, - unsigned flags ) + unsigned flags, + struct pipe_fence_handle **fence ) { struct softpipe_context *softpipe = softpipe_context(pipe); uint i; @@ -72,5 +69,8 @@ softpipe_flush( struct pipe_context *pipe, * to unmap surfaces when flushing. */ softpipe_unmap_surfaces(softpipe); + + if (fence) + *fence = NULL; } diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h index 34ec617866b..68d9b5fa835 100644 --- a/src/gallium/drivers/softpipe/sp_flush.h +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -29,7 +29,9 @@ #define SP_FLUSH_H struct pipe_context; +struct pipe_fence_handle; -void softpipe_flush(struct pipe_context *pipe, unsigned flags ); +void softpipe_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index a3824601be9..b2e49bef4cf 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -37,7 +37,7 @@ extern "C" { struct pipe_screen; - +struct pipe_fence_handle; struct pipe_state_cache; /* Opaque driver handles: @@ -202,7 +202,8 @@ struct pipe_context { /* Flush rendering: */ void (*flush)( struct pipe_context *pipe, - unsigned flags ); + unsigned flags, + struct pipe_fence_handle **fence ); }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index bc938ba2538..586951d9566 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -201,8 +201,7 @@ enum pipe_texture_target { */ #define PIPE_FLUSH_RENDER_CACHE 0x1 #define PIPE_FLUSH_TEXTURE_CACHE 0x2 -#define PIPE_FLUSH_WAIT 0x4 -#define PIPE_FLUSH_SWAPBUFFERS 0x8 +#define PIPE_FLUSH_SWAPBUFFERS 0x4 /** diff --git a/src/gallium/winsys/dri/intel/intel_context.c b/src/gallium/winsys/dri/intel/intel_context.c index 79b320c6bf4..8eba33c3139 100644 --- a/src/gallium/winsys/dri/intel/intel_context.c +++ b/src/gallium/winsys/dri/intel/intel_context.c @@ -228,7 +228,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) assert(intel); /* should never be null */ if (intel) { - st_flush(intel->st, PIPE_FLUSH_WAIT); + st_finish(intel->st); intel_batchbuffer_free(intel->batch); @@ -256,7 +256,7 @@ GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv) { struct intel_context *intel = intel_context(driContextPriv); - st_flush(intel->st, 0x0); + st_flush(intel->st, PIPE_FLUSH_RENDER_CACHE, NULL); /* XXX make_current(NULL)? */ return GL_TRUE; } diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index 2def1afc31e..4d183db7c37 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -39,12 +39,14 @@ #include "intel_winsys.h" #include "pipe/p_util.h" +#include "pipe/p_winsys.h" #include "i915simple/i915_winsys.h" #include "i915simple/i915_screen.h" struct intel_i915_winsys { struct i915_winsys winsys; /**< batch buffer funcs */ + struct pipe_winsys *pws; struct intel_context *intel; }; @@ -112,19 +114,22 @@ static void intel_i915_batch_reloc( struct i915_winsys *sws, -static void intel_i915_batch_flush( struct i915_winsys *sws ) +static void intel_i915_batch_flush( struct i915_winsys *sws, + struct pipe_fence_handle **fence ) { - struct intel_context *intel = intel_i915_winsys(sws)->intel; + struct intel_i915_winsys *iws = intel_i915_winsys(sws); + struct intel_context *intel = iws->intel; + union { + struct _DriFenceObject *dri; + struct pipe_fence_handle *pipe; + } fu; - intel_batchbuffer_flush( intel->batch ); -// if (0) intel_i915_batch_wait_idle( sws ); -} + fu.dri = intel_batchbuffer_flush( intel->batch ); + if (fu.dri) + iws->pws->fence_reference(iws->pws, fence, fu.pipe); -static void intel_i915_batch_finish( struct i915_winsys *sws ) -{ - struct intel_context *intel = intel_i915_winsys(sws)->intel; - intel_batchbuffer_finish( intel->batch ); +// if (0) intel_i915_batch_wait_idle( sws ); } @@ -145,7 +150,7 @@ intel_create_i915simple( struct intel_context *intel, iws->winsys.batch_dword = intel_i915_batch_dword; iws->winsys.batch_reloc = intel_i915_batch_reloc; iws->winsys.batch_flush = intel_i915_batch_flush; - iws->winsys.batch_finish = intel_i915_batch_finish; + iws->pws = winsys; iws->intel = intel; screen = i915_create_screen(winsys, intel->intelScreen->deviceID); diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 7baaae295c5..c930a1d1963 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -570,6 +570,33 @@ xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) } +/* + * Fence functions - basically nothing to do, as we don't create any actual + * fence objects. + */ + +static void +xm_fence_reference(struct pipe_winsys *sws, struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +xm_fence_signalled(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +xm_fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + /** * Return pointer to a pipe_winsys object. @@ -603,6 +630,10 @@ xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis) ws->base.surface_alloc_storage = xm_surface_alloc_storage; ws->base.surface_release = xm_surface_release; + ws->fence_reference = xm_fence_reference; + ws->fence_signalled = xm_fence_signalled; + ws->fence_finish = xm_fence_finish; + ws->base.flush_frontbuffer = xm_flush_frontbuffer; ws->base.printf = xm_printf; ws->base.get_name = xm_get_name; diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index f1fddc4e026..a623d0bcc06 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -214,7 +214,7 @@ st_Accum(GLcontext *ctx, GLenum op, GLfloat value) const GLint height = ctx->DrawBuffer->_Ymax - ypos; /* make sure color bufs aren't cached */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); switch (op) { case GL_ADD: diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 026f015fd85..43cc21d1fb4 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -737,7 +737,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, GLint skipPixels; ubyte *stmap; - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* map the stencil buffer */ stmap = pipe_surface_map(ps); @@ -952,7 +952,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, enum pipe_format srcFormat, texFormat; /* make sure rendering has completed */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); st_validate_state(st); diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 5384252a8e5..ec7788923a3 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -366,7 +366,7 @@ st_finish_render_texture(GLcontext *ctx, assert(strb); - ctx->st->pipe->flush(ctx->st->pipe, PIPE_FLUSH_RENDER_CACHE); + ctx->st->pipe->flush(ctx->st->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* printf("FINISH RENDER TO TEXTURE surf=%p\n", strb->surface); diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index dbec993f1ba..a536a059bd5 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -43,19 +43,21 @@ #include "pipe/p_winsys.h" -void st_flush( struct st_context *st, uint pipeFlushFlags ) +void st_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ) { FLUSH_VERTICES(st->ctx, 0); - st->pipe->flush( st->pipe, pipeFlushFlags ); + st->pipe->flush( st->pipe, pipeFlushFlags, fence ); } -static void st_gl_flush( struct st_context *st, uint pipeFlushFlags ) +static void st_gl_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ) { GLframebuffer *fb = st->ctx->DrawBuffer; - FLUSH_VERTICES(st->ctx, 0); + st_flush( st, pipeFlushFlags, fence ); if (!fb) return; @@ -80,15 +82,6 @@ static void st_gl_flush( struct st_context *st, uint pipeFlushFlags ) = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); struct pipe_surface *front_surf = strb->surface; - /* If we aren't rendering to the frontbuffer, this is a noop. - * This should be uncontroversial for glFlush, though people may - * feel more strongly about glFinish. - * - * Additionally, need to make sure that the frontbuffer_dirty - * flag really gets set on frontbuffer rendering. - */ - st->pipe->flush( st->pipe, pipeFlushFlags ); - /* Hook for copying "fake" frontbuffer if necessary: */ st->pipe->winsys->flush_frontbuffer( st->pipe->winsys, front_surf, @@ -103,7 +96,18 @@ static void st_gl_flush( struct st_context *st, uint pipeFlushFlags ) */ static void st_glFlush(GLcontext *ctx) { - st_gl_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE); + st_gl_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); +} + + +void st_finish( struct st_context *st ) +{ + struct pipe_fence_handle *fence; + + st_gl_flush(st, PIPE_FLUSH_RENDER_CACHE, &fence); + + st->pipe->winsys->fence_finish(st->pipe->winsys, fence, 0); + st->pipe->winsys->fence_reference(st->pipe->winsys, &fence, NULL); } @@ -112,7 +116,7 @@ static void st_glFlush(GLcontext *ctx) */ static void st_glFinish(GLcontext *ctx) { - st_gl_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_WAIT); + st_finish( ctx->st ); } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 4cf9adcd28a..e9fcdf69a14 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -160,7 +160,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, return; /* make sure rendering has completed */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); if (format == GL_STENCIL_INDEX) { st_read_stencil_pixels(ctx, x, y, width, height, type, pack, dest); diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c index d46a9178b19..075e9d1bd68 100644 --- a/src/mesa/state_tracker/st_framebuffer.c +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -186,7 +186,8 @@ st_notify_swapbuffers(struct st_framebuffer *stfb) if (ctx && ctx->DrawBuffer == &stfb->Base) { st_flush( ctx->st, - PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_SWAPBUFFERS); + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_SWAPBUFFERS, + NULL ); } } diff --git a/src/mesa/state_tracker/st_public.h b/src/mesa/state_tracker/st_public.h index 3c397b126a9..9d88ce9764d 100644 --- a/src/mesa/state_tracker/st_public.h +++ b/src/mesa/state_tracker/st_public.h @@ -45,6 +45,7 @@ struct st_context; struct st_framebuffer; struct pipe_context; +struct pipe_fence_handle; struct pipe_surface; @@ -78,7 +79,9 @@ void st_make_current(struct st_context *st, struct st_framebuffer *draw, struct st_framebuffer *read); -void st_flush( struct st_context *st, uint pipeFlushFlags ); +void st_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ); +void st_finish( struct st_context *st ); void st_notify_swapbuffers(struct st_framebuffer *stfb); void st_notify_swapbuffers_complete(struct st_framebuffer *stfb); -- cgit v1.2.3 From 92126cea846959bb2152905a7712753d1114bd6b Mon Sep 17 00:00:00 2001 From: Ian Romanick <idr@us.ibm.com> Date: Wed, 26 Mar 2008 10:45:32 -0700 Subject: cell: Implement code-gen for logic op This also implements code-gen for the float-to-packed color conversion. It's currently hardcoded for A8R8G8B8, but that can easily be fixed as soon as other color depths are supported by the Cell driver. --- src/gallium/drivers/cell/common.h | 11 +- src/gallium/drivers/cell/ppu/cell_context.h | 2 + src/gallium/drivers/cell/ppu/cell_state_emit.c | 17 ++ .../drivers/cell/ppu/cell_state_per_fragment.c | 261 ++++++++++++++++++++- .../drivers/cell/ppu/cell_state_per_fragment.h | 4 + src/gallium/drivers/cell/spu/spu_main.c | 19 ++ src/gallium/drivers/cell/spu/spu_main.h | 9 +- src/gallium/drivers/cell/spu/spu_tri.c | 59 +++-- 8 files changed, 349 insertions(+), 33 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d59e4f7036e..b0928fefd2e 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -92,8 +92,9 @@ #define CELL_CMD_STATE_BIND_VS 18 #define CELL_CMD_STATE_BLEND 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 -#define CELL_CMD_VS_EXECUTE 21 -#define CELL_CMD_FLUSH_BUFFER_RANGE 22 +#define CELL_CMD_STATE_LOGICOP 21 +#define CELL_CMD_VS_EXECUTE 22 +#define CELL_CMD_FLUSH_BUFFER_RANGE 23 #define CELL_NUM_BUFFERS 4 @@ -124,6 +125,12 @@ struct cell_command_blend { }; +struct cell_command_logicop { + uint64_t base; /**< Effective address of code start. */ + unsigned size; /**< Size in bytes of test code. */ +}; + + /** * Tell SPUs about the framebuffer size, location */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 9e79db0acef..0442abddc18 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -92,6 +92,8 @@ struct cell_context const struct cell_vertex_shader_state *vs; const struct cell_fragment_shader_state *fs; + struct spe_function logic_op; + struct pipe_blend_color blend_color; struct pipe_clip_state clip; struct pipe_constant_buffer constants[2]; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 5709b48f129..5c1310d0b0c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -50,6 +50,23 @@ emit_state_cmd(struct cell_context *cell, uint cmd, void cell_emit_state(struct cell_context *cell) { + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_BLEND)) { + struct cell_command_logicop logicop; + + if (cell->logic_op.store != NULL) { + spe_release_func(& cell->logic_op); + } + + cell_generate_logic_op(& cell->logic_op, + & cell->blend->base, + cell->framebuffer.cbufs[0]); + + logicop.base = (intptr_t) cell->logic_op.store; + logicop.size = 64 * 4; + emit_state_cmd(cell, CELL_CMD_STATE_LOGICOP, &logicop, + sizeof(logicop)); + } + if (cell->dirty & CELL_NEW_FRAMEBUFFER) { struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; struct pipe_surface *zbuf = cell->framebuffer.zsbuf; diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index c750b1d89dc..f10025bd7c7 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -977,7 +977,6 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) spe_allocate_register(f, 13), spe_allocate_register(f, 14), }; - const int mask = spe_allocate_register(f, 15); unsigned func[4]; unsigned sF[4]; unsigned dF[4]; @@ -1114,9 +1113,6 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) func[i], sF[i], dF[i], frag[i], src_factor[i], pixel[i], dst_factor[i]); - spe_selb(f, frag[i], pixel[i], frag[i], mask); - } else { - spe_or(f, frag[i], pixel[i], pixel[i]); } } @@ -1146,3 +1142,260 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) } #endif } + + +int PC_OFFSET(const struct spe_function *f, const void *d) +{ + const intptr_t pc = (intptr_t) f->csr; + const intptr_t ea = ~0x0f & (intptr_t) d; + + return (ea - pc) >> 2; +} + + +/** + * Generate code to perform color conversion and logic op + * + * \bug + * The code generated by this function should also perform dithering. + * + * \bug + * The code generated by this function should also perform color-write + * masking. + * + * \bug + * This routine is hard-coded to only work with ARGB8 data. + */ +void +cell_generate_logic_op(struct spe_function *f, struct pipe_blend_state *blend, + struct pipe_surface *surf) +{ + const unsigned logic_op = (blend->logicop_enable) + ? blend->logicop_func : PIPE_LOGICOP_COPY; + + /* This code generates a maximum of 37 instructions. An additional 32 + * bytes (equiv. to 8 instructions) are needed for data storage. Round up + * to 64 to make it a happy power-of-two. + */ + spe_init_func(f, 4 * 64); + + + /* Pixel colors in framebuffer format in AoS layout. + */ + const int pixel[4] = { + spe_allocate_register(f, 3), + spe_allocate_register(f, 4), + spe_allocate_register(f, 5), + spe_allocate_register(f, 6), + }; + + /* Fragment colors stored as floats in SoA layout. + */ + const int frag[4] = { + spe_allocate_register(f, 7), + spe_allocate_register(f, 8), + spe_allocate_register(f, 9), + spe_allocate_register(f, 10), + }; + + const int mask = spe_allocate_register(f, 11); + + + /* Short-circuit the noop and invert cases. + */ + if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) { + spe_bi(f, 0, 0, 0); + return; + } else if (logic_op == PIPE_LOGICOP_INVERT) { + spe_nor(f, pixel[0], pixel[0], pixel[0]); + spe_nor(f, pixel[1], pixel[1], pixel[1]); + spe_nor(f, pixel[2], pixel[2], pixel[2]); + spe_nor(f, pixel[3], pixel[3], pixel[3]); + spe_bi(f, 0, 0, 0); + return; + } + + + const int tmp[4] = { + spe_allocate_available_register(f), + spe_allocate_available_register(f), + spe_allocate_available_register(f), + spe_allocate_available_register(f), + }; + + const int shuf_xpose_hi = spe_allocate_available_register(f); + const int shuf_xpose_lo = spe_allocate_available_register(f); + const int shuf_color = spe_allocate_available_register(f); + + + /* Pointer to the begining of the function's private data area. + */ + uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); + + + /* Convert fragment colors to framebuffer format in AoS layout. + */ + data[0] = 0x00010203; + data[1] = 0x10111213; + data[2] = 0x04050607; + data[3] = 0x14151617; + + data[4] = 0x0c000408; + data[5] = 0x80808080; + data[6] = 0x80808080; + data[7] = 0x80808080; + + spe_ilh(f, tmp[0], 0x0808); + spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); + spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); + spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); + + spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); + spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); + spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); + spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); + + spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); + spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); + spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); + spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); + + spe_cfltu(f, frag[0], frag[0], 32); + spe_cfltu(f, frag[1], frag[1], 32); + spe_cfltu(f, frag[2], frag[2], 32); + spe_cfltu(f, frag[3], frag[3], 32); + + spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); + spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); + spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); + spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); + + + /* If logic op is enabled, perform the requested logical operation on the + * converted fragment colors and the pixel colors. + */ + switch (logic_op) { + case PIPE_LOGICOP_CLEAR: + spe_il(f, frag[0], 0); + spe_il(f, frag[1], 0); + spe_il(f, frag[2], 0); + spe_il(f, frag[3], 0); + break; + case PIPE_LOGICOP_NOR: + spe_nor(f, frag[0], frag[0], pixel[0]); + spe_nor(f, frag[1], frag[1], pixel[1]); + spe_nor(f, frag[2], frag[2], pixel[2]); + spe_nor(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_AND_INVERTED: + spe_andc(f, frag[0], pixel[0], frag[0]); + spe_andc(f, frag[1], pixel[1], frag[1]); + spe_andc(f, frag[2], pixel[2], frag[2]); + spe_andc(f, frag[3], pixel[3], frag[3]); + break; + case PIPE_LOGICOP_COPY_INVERTED: + spe_nor(f, frag[0], frag[0], frag[0]); + spe_nor(f, frag[1], frag[1], frag[1]); + spe_nor(f, frag[2], frag[2], frag[2]); + spe_nor(f, frag[3], frag[3], frag[3]); + break; + case PIPE_LOGICOP_AND_REVERSE: + spe_andc(f, frag[0], frag[0], pixel[0]); + spe_andc(f, frag[1], frag[1], pixel[1]); + spe_andc(f, frag[2], frag[2], pixel[2]); + spe_andc(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_XOR: + spe_xor(f, frag[0], frag[0], pixel[0]); + spe_xor(f, frag[1], frag[1], pixel[1]); + spe_xor(f, frag[2], frag[2], pixel[2]); + spe_xor(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_NAND: + spe_nand(f, frag[0], frag[0], pixel[0]); + spe_nand(f, frag[1], frag[1], pixel[1]); + spe_nand(f, frag[2], frag[2], pixel[2]); + spe_nand(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_AND: + spe_and(f, frag[0], frag[0], pixel[0]); + spe_and(f, frag[1], frag[1], pixel[1]); + spe_and(f, frag[2], frag[2], pixel[2]); + spe_and(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_EQUIV: + spe_eqv(f, frag[0], frag[0], pixel[0]); + spe_eqv(f, frag[1], frag[1], pixel[1]); + spe_eqv(f, frag[2], frag[2], pixel[2]); + spe_eqv(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_OR_INVERTED: + spe_orc(f, frag[0], pixel[0], frag[0]); + spe_orc(f, frag[1], pixel[1], frag[1]); + spe_orc(f, frag[2], pixel[2], frag[2]); + spe_orc(f, frag[3], pixel[3], frag[3]); + break; + case PIPE_LOGICOP_COPY: + break; + case PIPE_LOGICOP_OR_REVERSE: + spe_orc(f, frag[0], frag[0], pixel[0]); + spe_orc(f, frag[1], frag[1], pixel[1]); + spe_orc(f, frag[2], frag[2], pixel[2]); + spe_orc(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_OR: + spe_or(f, frag[0], frag[0], pixel[0]); + spe_or(f, frag[1], frag[1], pixel[1]); + spe_or(f, frag[2], frag[2], pixel[2]); + spe_or(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_SET: + spe_il(f, frag[0], ~0); + spe_il(f, frag[1], ~0); + spe_il(f, frag[2], ~0); + spe_il(f, frag[3], ~0); + break; + + /* These two cases are short-circuited above. + */ + case PIPE_LOGICOP_INVERT: + case PIPE_LOGICOP_NOOP: + default: + assert(0); + } + + + /* Apply fragment mask. + */ + spe_ilh(f, tmp[0], 0x0000); + spe_ilh(f, tmp[1], 0x0404); + spe_ilh(f, tmp[2], 0x0808); + spe_ilh(f, tmp[3], 0x0c0c); + + spe_shufb(f, tmp[0], mask, mask, tmp[0]); + spe_shufb(f, tmp[1], mask, mask, tmp[1]); + spe_shufb(f, tmp[2], mask, mask, tmp[2]); + spe_shufb(f, tmp[3], mask, mask, tmp[3]); + + spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); + spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); + spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); + spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); + + spe_bi(f, 0, 0, 0); + +#if 0 + { + const uint32_t *p = f->store; + unsigned i; + + printf("# %u instructions\n", f->csr - f->store); + + printf("\t.text\n"); + for (i = 0; i < 64; i++) { + printf("\t.long\t0x%04x\n", p[i]); + } + fflush(stdout); + } +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h index f699247f9e4..ab4de96c69d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -31,4 +31,8 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); extern void cell_generate_alpha_blend(struct cell_blend_state *cb); +extern void +cell_generate_logic_op(struct spe_function *f, struct pipe_blend_state *blend, + struct pipe_surface *surf); + #endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 0a490ab277f..fccff01e108 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -64,6 +64,9 @@ static unsigned char depth_stencil_code_buffer[4 * 64] static unsigned char fb_blend_code_buffer[4 * 64] ALIGN16_ATTRIB; +static unsigned char logicop_code_buffer[4 * 64] + ALIGN16_ATTRIB; + /** * Tell the PPU that this SPU has finished copying a buffer to @@ -513,6 +516,22 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); break; } + case CELL_CMD_STATE_LOGICOP: { + struct cell_command_logicop *code = + (struct cell_command_logicop *) &buffer[pos+1]; + + mfc_get(logicop_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + spu.logicop = (logicop_func) logicop_code_buffer; + pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8); + break; + } case CELL_CMD_FLUSH_BUFFER_RANGE: { struct cell_buffer_range *br = (struct cell_buffer_range *) &buffer[pos+1]; diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 49f5d99674a..c20452931a9 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -77,9 +77,14 @@ struct spu_blend_results { typedef struct spu_blend_results (*blend_func)( qword frag_r, qword frag_g, qword frag_b, qword frag_a, qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, - qword const_r, qword const_g, qword const_b, qword const_a, + qword const_r, qword const_g, qword const_b, qword const_a); + +typedef struct spu_blend_results (*logicop_func)( + qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, + qword frag_r, qword frag_g, qword frag_b, qword frag_a, qword frag_mask); + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -111,6 +116,8 @@ struct spu_global blend_func blend; qword const_blend_color[4] ALIGN16_ATTRIB; + logicop_func logicop; + struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct cell_command_texture texture; diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index e6a1ce01dfd..95c629a8aae 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -305,7 +305,6 @@ emit_quad( int x, int y, mask_t mask ) if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; const int iy = y - setup.cliprect_miny; - const vector unsigned char shuffle = spu.color_shuffle; vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; @@ -330,45 +329,53 @@ emit_quad( int x, int y, mask_t mask ) } + /* Convert fragment data from AoS to SoA format. + */ + qword soa_frag[4]; + _transpose_matrix4x4((vec_float4 *) soa_frag, colors); + /* Read the current framebuffer values. - * - * Ignore read_fb for now. In the future we can use this to avoid - * reading the framebuffer if read_fb is false and the fragment mask is - * all 0xffffffff. This is the common case, so it is probably worth - * the effort. We'll have to profile to determine whether or not the - * extra conditional branches hurt overall performance. */ - vec_float4 aos_pix[4] = { - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]), + const qword pix[4] = { + (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]), + (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]), + (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]), + (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]), }; qword soa_pix[4]; - qword soa_frag[4]; - /* Convert pixel and fragment data from AoS to SoA format. - */ - _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix); - _transpose_matrix4x4((vec_float4 *) soa_frag, colors); + if (spu.read_fb) { + /* Convert pixel data from AoS to SoA format. + */ + vec_float4 aos_pix[4] = { + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]), + spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]), + }; + + _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix); + } - const struct spu_blend_results result = + + struct spu_blend_results result = (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3], spu.const_blend_color[0], spu.const_blend_color[1], - spu.const_blend_color[2], spu.const_blend_color[3], - (qword) mask); + spu.const_blend_color[2], spu.const_blend_color[3]); /* Convert final pixel data from SoA to AoS format. */ - _transpose_matrix4x4(aos_pix, (const vec_float4 *) &result); - - spu.ctile.ui[iy+0][ix+0] = spu_pack_color_shuffle(aos_pix[0], shuffle); - spu.ctile.ui[iy+0][ix+1] = spu_pack_color_shuffle(aos_pix[1], shuffle); - spu.ctile.ui[iy+1][ix+0] = spu_pack_color_shuffle(aos_pix[2], shuffle); - spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(aos_pix[3], shuffle); + result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3], + result.r, result.g, result.b, result.a, + (qword) mask); + + spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0); + spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0); + spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0); + spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0); } #endif } -- cgit v1.2.3 From 979358c47115d8ea50001832372f8043a60a5b80 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 27 Mar 2008 15:26:22 -0600 Subject: cell: fix unclosed comment --- src/gallium/drivers/cell/spu/spu_exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 061fbebf618..48edc62f49b 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1453,7 +1453,7 @@ exec_instruction( break; case TGSI_OPCODE_TXP: - /* Texture lookup with projection + /* Texture lookup with projection */ /* src[0] = texcoord (src[0].w = projection) */ /* src[1] = sampler unit */ exec_tex(mach, inst, TRUE, TRUE); -- cgit v1.2.3 From 8b8a947111ad911a986e48a66c9fe31f120de9a2 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 27 Mar 2008 15:26:54 -0600 Subject: cell: added (uint64_t) cast to silence warning --- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index b418857ccd8..f753960a0fb 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -69,7 +69,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; cf = (struct cell_attribute_fetch_code *) (&batch[1]); - cf->base = cell->attrib_fetch.store; + cf->base = (uint64_t) cell->attrib_fetch.store; cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr - (void *) cell->attrib_fetch.store)); -- cgit v1.2.3 From 132df5ebce8529f7b0999cdd574d70ec0f12a4fd Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 27 Mar 2008 15:27:14 -0600 Subject: cell: include cell_state_per_fragment.h to silence warning --- src/gallium/drivers/cell/ppu/cell_state_emit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 5c1310d0b0c..4c75caa0251 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -29,6 +29,7 @@ #include "cell_context.h" #include "cell_state.h" #include "cell_state_emit.h" +#include "cell_state_per_fragment.h" #include "cell_batch.h" #include "cell_texture.h" #include "draw/draw_context.h" -- cgit v1.2.3 From 39038c11699bbc9baab744542e96d54e91cb452a Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Thu, 27 Mar 2008 17:41:55 -0600 Subject: gallium: replace PIPE_ATTRIB_MAX with PIPE_MAX_ATTRIBS The later follows the naming scheme of other limits. Keep the old definition until all possible usage is updated. --- src/gallium/auxiliary/draw/draw_context.c | 4 ++-- src/gallium/auxiliary/draw/draw_private.h | 12 ++++++------ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- src/gallium/auxiliary/draw/draw_vf.c | 4 ++-- src/gallium/auxiliary/draw/draw_vf.h | 2 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_llvm.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_context.h | 6 +++--- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 2 +- src/gallium/drivers/cell/spu/spu_main.c | 4 ++-- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 4 ++-- src/gallium/drivers/cell/spu/spu_vertex_shader.h | 8 ++++---- src/gallium/drivers/failover/fo_context.h | 4 ++-- src/gallium/drivers/failover/fo_state_emit.c | 4 ++-- src/gallium/drivers/i915simple/i915_context.c | 4 ++-- src/gallium/drivers/i915simple/i915_context.h | 2 +- src/gallium/drivers/i965simple/brw_clip.h | 2 +- src/gallium/drivers/i965simple/brw_context.h | 12 ++++++------ src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_wm.h | 4 ++-- src/gallium/drivers/softpipe/sp_context.h | 6 +++--- src/gallium/drivers/softpipe/sp_draw_arrays.c | 4 ++-- src/gallium/drivers/softpipe/sp_quad_fs.c | 4 ++-- src/gallium/drivers/softpipe/sp_state_vertex.c | 4 ++-- src/gallium/include/pipe/p_state.h | 3 ++- src/mesa/state_tracker/st_draw.c | 2 +- 29 files changed, 63 insertions(+), 62 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 903cc267662..81858e01cac 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -238,7 +238,7 @@ draw_set_vertex_buffer(struct draw_context *draw, const struct pipe_vertex_buffer *buffer) { draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - assert(attr < PIPE_ATTRIB_MAX); + assert(attr < PIPE_MAX_ATTRIBS); draw->vertex_buffer[attr] = *buffer; } @@ -249,7 +249,7 @@ draw_set_vertex_element(struct draw_context *draw, const struct pipe_vertex_element *element) { draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - assert(attr < PIPE_ATTRIB_MAX); + assert(attr < PIPE_MAX_ATTRIBS); draw->vertex_element[attr] = *element; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 7007ee22c4d..8eb2f515cbc 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -228,8 +228,8 @@ struct draw_context /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ @@ -242,7 +242,7 @@ struct draw_context unsigned eltSize; /** vertex arrays */ - const void *vbuffer[PIPE_ATTRIB_MAX]; + const void *vbuffer[PIPE_MAX_ATTRIBS]; /** constant buffer (for vertex shader) */ const void *constants; @@ -275,9 +275,9 @@ struct draw_context /* Vertex fetch internal state */ struct { - const ubyte *src_ptr[PIPE_ATTRIB_MAX]; - unsigned pitch[PIPE_ATTRIB_MAX]; - fetch_func fetch[PIPE_ATTRIB_MAX]; + const ubyte *src_ptr[PIPE_MAX_ATTRIBS]; + unsigned pitch[PIPE_MAX_ATTRIBS]; + fetch_func fetch[PIPE_MAX_ATTRIBS]; unsigned nr_attrs; full_fetch_func fetch_func; pt_fetch_func pt_fetch; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 64ef83d8001..9b098bc173f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -78,7 +78,7 @@ struct fetch_emit_middle_end { unsigned pitch; void (*fetch)( const void *from, float *attrib); void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_ATTRIB_MAX]; + } fetch[PIPE_MAX_ATTRIBS]; unsigned nr_fetch; unsigned hw_vertex_size; diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index f4e29a62932..7bb34ace7aa 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -158,7 +158,7 @@ draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, unsigned offset = 0; unsigned i, j; - assert(nr < PIPE_ATTRIB_MAX); + assert(nr < PIPE_MAX_ATTRIBS); for (j = 0, i = 0; i < nr; i++) { const unsigned format = map[i].format; @@ -390,7 +390,7 @@ struct draw_vertex_fetch *draw_vf_create( void ) struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch); unsigned i; - for (i = 0; i < PIPE_ATTRIB_MAX; i++) + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) vf->attr[i].vf = vf; vf->identity[0] = 0.0; diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h index 011c8f0ff1c..7555d1bd588 100644 --- a/src/gallium/auxiliary/draw/draw_vf.h +++ b/src/gallium/auxiliary/draw/draw_vf.h @@ -169,7 +169,7 @@ struct draw_vf_attr struct draw_vertex_fetch { - struct draw_vf_attr attr[PIPE_ATTRIB_MAX]; + struct draw_vf_attr attr[PIPE_MAX_ATTRIBS]; unsigned attr_count; unsigned vertex_stride; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 4e2fa727070..487d0ea7f4f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -99,8 +99,8 @@ vs_exec_run( struct draw_vertex_shader *shader, struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index bd983f2ddfa..d29cb18efe4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -107,8 +107,8 @@ vs_llvm_run( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index a4503c143ed..bc910dc2d04 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -114,8 +114,8 @@ vs_sse_run( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 0442abddc18..7f656a97447 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -103,8 +103,8 @@ struct cell_context struct cell_texture *texture[PIPE_MAX_SAMPLERS]; uint num_textures; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; @@ -141,7 +141,7 @@ struct cell_context struct spe_function attrib_fetch; - unsigned attrib_fetch_offsets[PIPE_ATTRIB_MAX]; + unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index c839fb4d12d..b896252f817 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -123,7 +123,7 @@ cell_draw_elements(struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (sp->vertex_buffer[i].buffer) { void *buf = pipe->winsys->buffer_map(pipe->winsys, sp->vertex_buffer[i].buffer, @@ -151,7 +151,7 @@ cell_draw_elements(struct pipe_context *pipe, /* * unmap vertex/index buffers - will cause draw module to flush */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (sp->vertex_buffer[i].buffer) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 563831b62db..37d25fb3578 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -41,7 +41,7 @@ cell_set_vertex_element(struct pipe_context *pipe, const struct pipe_vertex_element *attrib) { struct cell_context *cell = cell_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); cell->vertex_element[index] = *attrib; /* struct copy */ cell->dirty |= CELL_NEW_VERTEX; @@ -55,7 +55,7 @@ cell_set_vertex_buffer(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffer) { struct cell_context *cell = cell_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); cell->vertex_buffer[index] = *buffer; /* struct copy */ cell->dirty |= CELL_NEW_VERTEX; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 4828a8023bd..49d5443cde7 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -263,7 +263,7 @@ void cell_update_vertex_fetch(struct draw_context *draw) struct cell_context *const cell = (struct cell_context *) draw->driver_private; struct spe_function *p = &cell->attrib_fetch; - unsigned function_index[PIPE_ATTRIB_MAX]; + unsigned function_index[PIPE_MAX_ATTRIBS]; unsigned unique_attr_formats; int out_ptr; int in_ptr; diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index fccff01e108..d7f46f80246 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -55,7 +55,7 @@ struct spu_global spu; struct spu_vs_context draw; -static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] ALIGN16_ATTRIB; static unsigned char depth_stencil_code_buffer[4 * 64] @@ -361,7 +361,7 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info) { const unsigned attr = vs_info->attr; - ASSERT(attr < PIPE_ATTRIB_MAX); + ASSERT(attr < PIPE_MAX_ATTRIBS); draw.vertex_fetch.src_ptr[attr] = vs_info->base; draw.vertex_fetch.pitch[attr] = vs_info->pitch; draw.vertex_fetch.size[attr] = vs_info->size; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 8363efeeb6e..3119a78c060 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -86,8 +86,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h index 54a4b8d9b9f..4c74f5e74d5 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -16,10 +16,10 @@ struct spu_vs_context { struct pipe_viewport_state viewport; struct { - uint64_t src_ptr[PIPE_ATTRIB_MAX]; - unsigned pitch[PIPE_ATTRIB_MAX]; - unsigned size[PIPE_ATTRIB_MAX]; - unsigned code_offset[PIPE_ATTRIB_MAX]; + uint64_t src_ptr[PIPE_MAX_ATTRIBS]; + unsigned pitch[PIPE_MAX_ATTRIBS]; + unsigned size[PIPE_MAX_ATTRIBS]; + unsigned code_offset[PIPE_MAX_ATTRIBS]; unsigned nr_attrs; boolean dirty; diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 8f3ad3ee798..4afe10c4b86 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -84,8 +84,8 @@ struct failover_context { struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; void *sw_sampler_state[PIPE_MAX_SAMPLERS]; void *hw_sampler_state[PIPE_MAX_SAMPLERS]; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index 3de931e04eb..bb89f925e93 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -104,7 +104,7 @@ failover_state_emit( struct failover_context *failover ) } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (failover->dirty_vertex_buffer & (1<<i)) { failover->sw->set_vertex_buffer( failover->sw, i, &failover->vertex_buffer[i] ); @@ -113,7 +113,7 @@ failover_state_emit( struct failover_context *failover ) } if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (failover->dirty_vertex_element & (1<<i)) { failover->sw->set_vertex_element( failover->sw, i, &failover->vertex_element[i] ); diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 15ff2360b74..fee33d82de4 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -65,7 +65,7 @@ i915_draw_elements( struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (i915->vertex_buffer[i].buffer) { void *buf = pipe->winsys->buffer_map(pipe->winsys, @@ -96,7 +96,7 @@ i915_draw_elements( struct pipe_context *pipe, /* * unmap vertex/index buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (i915->vertex_buffer[i].buffer) { pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 746f18ba38d..8e707ea574a 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -232,7 +232,7 @@ struct i915_context struct pipe_scissor_state scissor; struct i915_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; unsigned dirty; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h index a89d08b7910..d70fc094ff5 100644 --- a/src/gallium/drivers/i965simple/brw_clip.h +++ b/src/gallium/drivers/i965simple/brw_clip.h @@ -116,7 +116,7 @@ struct brw_clip_compile { unsigned last_mrf; unsigned header_position_offset; - unsigned offset[PIPE_ATTRIB_MAX]; + unsigned offset[PIPE_MAX_ATTRIBS]; }; #define ATTR_SIZE (4*4) diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index b83a13c3b6b..0c96ba17327 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -433,17 +433,17 @@ struct brw_cached_batch_item { -/* Protect against a future where PIPE_ATTRIB_MAX > 32. Wouldn't life +/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life * be easier if C allowed arrays of packed elements? */ -#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) +#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ + unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ }; @@ -496,9 +496,9 @@ struct brw_context /* Arrays with buffer objects to copy non-bufferobj arrays into * for upload: */ - const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; + const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; - struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; + struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index f5efe9fc06d..0d04a8a5947 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -292,7 +292,7 @@ static void brw_set_vertex_element(struct pipe_context *pipe, /* flush ? */ struct brw_context *brw = brw_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); struct brw_vertex_element_state el; memset(&el, 0, sizeof(el)); diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h index a1ac0f504a6..b29c4393f01 100644 --- a/src/gallium/drivers/i965simple/brw_wm.h +++ b/src/gallium/drivers/i965simple/brw_wm.h @@ -76,7 +76,7 @@ struct brw_wm_prog_key { #define PROGRAM_INTERNAL_PARAM #define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_ATTRIB_MAX + 3) +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_MAX_ATTRIBS + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) #define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) @@ -84,7 +84,7 @@ struct brw_wm_prog_key { #define BRW_WM_MAX_CONST 256 #define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS -#define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX) +#define PAYLOAD_DEPTH (PIPE_MAX_ATTRIBS) #define MAX_IFSN 32 #define MAX_LOOP_DEPTH 32 diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 19e6cfaf02e..dc9d0e6d5d6 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -77,8 +77,8 @@ struct softpipe_context { struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned dirty; unsigned num_samplers; @@ -92,7 +92,7 @@ struct softpipe_context { /* * Mapped vertex buffers */ - ubyte *mapped_vbuffer[PIPE_ATTRIB_MAX]; + ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 5b5a0fe5735..ab54050d3fe 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -125,7 +125,7 @@ softpipe_draw_elements(struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (sp->vertex_buffer[i].buffer) { void *buf = pipe->winsys->buffer_map(pipe->winsys, @@ -153,7 +153,7 @@ softpipe_draw_elements(struct pipe_context *pipe, /* * unmap vertex/index buffers - will cause draw module to flush */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (sp->vertex_buffer[i].buffer) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 861285101fd..c10ad80e015 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -185,8 +185,8 @@ struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) uint i; /* allocate storage for program inputs/outputs, aligned to 16 bytes */ - qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); - qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); + qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16); qss->machine.Inputs = align16(qss->inputs); qss->machine.Outputs = align16(qss->outputs); diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index f01a10de3b4..c054e76d9be 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -42,7 +42,7 @@ softpipe_set_vertex_element(struct pipe_context *pipe, const struct pipe_vertex_element *attrib) { struct softpipe_context *softpipe = softpipe_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); softpipe->vertex_element[index] = *attrib; /* struct copy */ softpipe->dirty |= SP_NEW_VERTEX; @@ -56,7 +56,7 @@ softpipe_set_vertex_buffer(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffer) { struct softpipe_context *softpipe = softpipe_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); softpipe->vertex_buffer[index] = *buffer; /* struct copy */ softpipe->dirty |= SP_NEW_VERTEX; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index a2bd8c6aaab..2490412126f 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -54,7 +54,8 @@ extern "C" { #define PIPE_MAX_SAMPLERS 8 #define PIPE_MAX_CLIP_PLANES 6 #define PIPE_MAX_CONSTANT 32 -#define PIPE_ATTRIB_MAX 32 +#define PIPE_MAX_ATTRIBS 32 +#define PIPE_ATTRIB_MAX 32 /* XXX obsolete - remove */ #define PIPE_MAX_COLOR_BUFS 8 #define PIPE_MAX_TEXTURE_LEVELS 16 #define PIPE_MAX_FEEDBACK_ATTRIBS 16 diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 20af90df7d8..4aca3311b7b 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -606,7 +606,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* * unmap vertex/index buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (draw->vertex_buffer[i].buffer) { pipe->winsys->buffer_unmap(pipe->winsys, draw->vertex_buffer[i].buffer); -- cgit v1.2.3 From 5615ab78b03cf1cb5fb19fc04fef52818f91b0be Mon Sep 17 00:00:00 2001 From: Roland Scheidegger <sroland@tungstengraphics.com> Date: Fri, 28 Mar 2008 15:43:00 +0100 Subject: gallium: remove redundant compare bit in sampler state --- src/gallium/drivers/i965simple/brw_wm.c | 3 +-- src/gallium/include/pipe/p_state.h | 1 - src/mesa/state_tracker/st_atom_sampler.c | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c index 1c4b5b5ede2..7fc5f59a98c 100644 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -161,8 +161,7 @@ static void brw_wm_populate_key( struct brw_context *brw, if (unit) { - if (unit->compare && - unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + if (unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { key->shadowtex_mask |= 1<<i; } if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index b47595deca4..0eeee47a9a5 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -243,7 +243,6 @@ struct pipe_sampler_state unsigned min_img_filter:2; /**< PIPE_TEX_FILTER_x */ unsigned min_mip_filter:2; /**< PIPE_TEX_MIPFILTER_x */ unsigned mag_img_filter:2; /**< PIPE_TEX_FILTER_x */ - unsigned compare:1; /**< shadow/depth compare enabled? */ unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */ unsigned compare_func:3; /**< PIPE_FUNC_x */ unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */ diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 5787a7492c6..5dd242ac662 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -167,7 +167,6 @@ update_samplers(struct st_context *st) /* only care about ARB_shadow, not SGI shadow */ if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) { - sampler->compare = 1; sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; sampler->compare_func = st_compare_func_to_pipe(texobj->CompareFunc); -- cgit v1.2.3 From cbfe6ee5d58e7342012392a8ead7ae373625c00a Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Fri, 28 Mar 2008 15:22:34 -0600 Subject: gallium: Fix computation of Z values when not using early Z. This fixes the missing bitmaps in the engine and fogcoord demos. --- src/gallium/drivers/softpipe/sp_quad_fs.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index c10ad80e015..a73df31383f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -113,15 +113,18 @@ shade_quad( } } else { - /* copy input Z (which was interpolated by the executor) to output Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; - /* XXX not sure the above line is always correct. The following - * might be better: - quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; - */ - } + /* compute Z values now, as in the quad earlyz stage */ + /* XXX we should really only do this if the earlyz stage is not used */ + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->outputs.depth[0] = z0; + quad->outputs.depth[1] = z0 + dzdx; + quad->outputs.depth[2] = z0 + dzdy; + quad->outputs.depth[3] = z0 + dzdx + dzdy; } /* shader may cull fragments */ -- cgit v1.2.3 From a52c0416d1f2105960b4646e2e268aed26814689 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Sat, 29 Mar 2008 14:41:03 +0100 Subject: gallium: Set vertex state/buffers en-mass. --- src/gallium/auxiliary/draw/draw_context.c | 24 ++++--- src/gallium/auxiliary/draw/draw_context.h | 12 ++-- src/gallium/auxiliary/util/u_draw_quad.c | 16 +++-- src/gallium/drivers/cell/ppu/cell_context.c | 4 +- src/gallium/drivers/cell/ppu/cell_state.h | 12 ++-- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 30 +++++---- src/gallium/drivers/failover/fo_context.h | 9 +-- src/gallium/drivers/failover/fo_state.c | 35 +++++----- src/gallium/drivers/failover/fo_state_emit.c | 22 ++----- src/gallium/drivers/i915simple/i915_state.c | 23 +++---- src/gallium/drivers/i965simple/brw_state.c | 60 +++++++++-------- src/gallium/drivers/softpipe/sp_context.c | 4 +- src/gallium/drivers/softpipe/sp_state.h | 12 ++-- src/gallium/drivers/softpipe/sp_state_vertex.c | 31 +++++---- src/gallium/include/pipe/p_context.h | 12 ++-- src/mesa/state_tracker/st_draw.c | 82 +++++++++++++----------- 16 files changed, 205 insertions(+), 183 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 81858e01cac..10bf9f54c10 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -233,24 +233,28 @@ void draw_set_viewport_state( struct draw_context *draw, void -draw_set_vertex_buffer(struct draw_context *draw, - unsigned attr, - const struct pipe_vertex_buffer *buffer) +draw_set_vertex_buffers(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_buffer *buffers) { + assert(count <= PIPE_MAX_ATTRIBS); + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - assert(attr < PIPE_MAX_ATTRIBS); - draw->vertex_buffer[attr] = *buffer; + + memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0])); } void -draw_set_vertex_element(struct draw_context *draw, - unsigned attr, - const struct pipe_vertex_element *element) +draw_set_vertex_elements(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_element *elements) { + assert(count <= PIPE_MAX_ATTRIBS); + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - assert(attr < PIPE_MAX_ATTRIBS); - draw->vertex_element[attr] = *element; + + memcpy(draw->vertex_element, elements, count * sizeof(elements[0])); } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index dae687e5906..84bae3bd78d 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -138,13 +138,13 @@ void draw_delete_vertex_shader(struct draw_context *draw, * Vertex data functions */ -void draw_set_vertex_buffer(struct draw_context *draw, - unsigned attr, - const struct pipe_vertex_buffer *buffer); +void draw_set_vertex_buffers(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_buffer *buffers); -void draw_set_vertex_element(struct draw_context *draw, - unsigned attr, - const struct pipe_vertex_element *element); +void draw_set_vertex_elements(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_element *elements); void draw_set_mapped_element_buffer( struct draw_context *draw, unsigned eltSize, void *elements ); diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 37e85336091..e659edb0881 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -45,23 +45,25 @@ util_draw_vertex_buffer(struct pipe_context *pipe, uint num_attribs) { struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velement; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; uint i; + assert(num_attribs <= PIPE_MAX_ATTRIBS); + /* tell pipe about the vertex buffer */ vbuffer.buffer = vbuf; vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = 0; - pipe->set_vertex_buffer(pipe, 0, &vbuffer); + pipe->set_vertex_buffers(pipe, 1, &vbuffer); /* tell pipe about the vertex attributes */ for (i = 0; i < num_attribs; i++) { - velement.src_offset = i * 4 * sizeof(float); - velement.vertex_buffer_index = 0; - velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - velement.nr_components = 4; - pipe->set_vertex_element(pipe, i, &velement); + velements[i].src_offset = i * 4 * sizeof(float); + velements[i].vertex_buffer_index = 0; + velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velements[i].nr_components = 4; } + pipe->set_vertex_elements(pipe, num_attribs, velements); /* draw */ pipe->draw_arrays(pipe, prim_type, 0, num_verts); diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index ccbbd1d331c..12eb5aa2547 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -103,8 +103,8 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.destroy = cell_destroy_context; /* state setters */ - cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; - cell->pipe.set_vertex_element = cell_set_vertex_element; + cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_vertex_elements = cell_set_vertex_elements; cell->pipe.draw_arrays = cell_draw_arrays; cell->pipe.draw_elements = cell_draw_elements; diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index 31ce505e211..82580ea35ab 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -48,13 +48,13 @@ #define CELL_NEW_VERTEX_INFO 0x8000 -void cell_set_vertex_element(struct pipe_context *, - unsigned index, - const struct pipe_vertex_element *); +void cell_set_vertex_elements(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); -void cell_set_vertex_buffer(struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer *); +void cell_set_vertex_buffers(struct pipe_context *, + unsigned count, + const struct pipe_vertex_buffer *); void cell_update_derived( struct cell_context *softpipe ); diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 37d25fb3578..6c83b8dc722 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -36,28 +36,34 @@ void -cell_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *attrib) +cell_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) { struct cell_context *cell = cell_context(pipe); - assert(index < PIPE_MAX_ATTRIBS); - cell->vertex_element[index] = *attrib; /* struct copy */ + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(cell->vertex_element, elements, count * sizeof(elements[0])); + cell->dirty |= CELL_NEW_VERTEX; - draw_set_vertex_element(cell->draw, index, attrib); + draw_set_vertex_elements(cell->draw, count, elements); } void -cell_set_vertex_buffer(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer) +cell_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct cell_context *cell = cell_context(pipe); - assert(index < PIPE_MAX_ATTRIBS); - cell->vertex_buffer[index] = *buffer; /* struct copy */ + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0])); + cell->dirty |= CELL_NEW_VERTEX; - draw_set_vertex_buffer(cell->draw, index, buffer); + draw_set_vertex_buffers(cell->draw, count, buffers); } diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 4afe10c4b86..c6409fe1e1c 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -84,15 +84,16 @@ struct failover_context { struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; + + uint num_vertex_buffers; + uint num_vertex_elements; void *sw_sampler_state[PIPE_MAX_SAMPLERS]; void *hw_sampler_state[PIPE_MAX_SAMPLERS]; unsigned dirty; - unsigned dirty_vertex_buffer; - unsigned dirty_vertex_element; unsigned num_samplers; unsigned num_textures; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 11eec2714eb..6a797066322 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -402,32 +402,35 @@ failover_set_viewport_state( struct pipe_context *pipe, static void -failover_set_vertex_buffer(struct pipe_context *pipe, - unsigned unit, - const struct pipe_vertex_buffer *vertex_buffer) +failover_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *vertex_buffers) { struct failover_context *failover = failover_context(pipe); - failover->vertex_buffer[unit] = *vertex_buffer; + memcpy(failover->vertex_buffers, vertex_buffers, + count * sizeof(vertex_buffers[0])); failover->dirty |= FO_NEW_VERTEX_BUFFER; - failover->dirty_vertex_buffer |= (1<<unit); - failover->sw->set_vertex_buffer( failover->sw, unit, vertex_buffer ); - failover->hw->set_vertex_buffer( failover->hw, unit, vertex_buffer ); + failover->num_vertex_buffers = count; + failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers ); + failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers ); } static void -failover_set_vertex_element(struct pipe_context *pipe, - unsigned unit, - const struct pipe_vertex_element *vertex_element) +failover_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *vertex_elements) { struct failover_context *failover = failover_context(pipe); - failover->vertex_element[unit] = *vertex_element; + memcpy(failover->vertex_elements, vertex_elements, + count * sizeof(vertex_elements[0])); + failover->dirty |= FO_NEW_VERTEX_ELEMENT; - failover->dirty_vertex_element |= (1<<unit); - failover->sw->set_vertex_element( failover->sw, unit, vertex_element ); - failover->hw->set_vertex_element( failover->hw, unit, vertex_element ); + failover->num_vertex_elements = count; + failover->sw->set_vertex_elements( failover->sw, count, vertex_elements ); + failover->hw->set_vertex_elements( failover->hw, count, vertex_elements ); } void @@ -474,7 +477,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_scissor_state = failover_set_scissor_state; failover->pipe.set_sampler_textures = failover_set_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; - failover->pipe.set_vertex_buffer = failover_set_vertex_buffer; - failover->pipe.set_vertex_element = failover_set_vertex_element; + failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; + failover->pipe.set_vertex_elements = failover_set_vertex_elements; failover->pipe.set_constant_buffer = failover_set_constant_buffer; } diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index bb89f925e93..bd4fce9d209 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -53,8 +53,6 @@ void failover_state_emit( struct failover_context *failover ) { - unsigned i; - if (failover->dirty & FO_NEW_BLEND) failover->sw->bind_blend_state( failover->sw, failover->blend->sw_state ); @@ -104,24 +102,16 @@ failover_state_emit( struct failover_context *failover ) } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (failover->dirty_vertex_buffer & (1<<i)) { - failover->sw->set_vertex_buffer( failover->sw, i, - &failover->vertex_buffer[i] ); - } - } + failover->sw->set_vertex_buffers( failover->sw, + failover->num_vertex_buffers, + failover->vertex_buffers ); } if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (failover->dirty_vertex_element & (1<<i)) { - failover->sw->set_vertex_element( failover->sw, i, - &failover->vertex_element[i] ); - } - } + failover->sw->set_vertex_elements( failover->sw, + failover->num_vertex_elements, + failover->vertex_elements ); } failover->dirty = 0; - failover->dirty_vertex_element = 0; - failover->dirty_vertex_buffer = 0; } diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 1cec36e206d..4404bc45901 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -687,23 +687,24 @@ static void i915_delete_rasterizer_state(struct pipe_context *pipe, FREE(raster); } -static void i915_set_vertex_buffer( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer ) +static void i915_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct i915_context *i915 = i915_context(pipe); - i915->vertex_buffer[index] = *buffer; + + memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); /* pass-through to draw module */ - draw_set_vertex_buffer(i915->draw, index, buffer); + draw_set_vertex_buffers(i915->draw, count, buffers); } -static void i915_set_vertex_element( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *element) +static void i915_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) { struct i915_context *i915 = i915_context(pipe); /* pass-through to draw module */ - draw_set_vertex_element(i915->draw, index, element); + draw_set_vertex_elements(i915->draw, count, elements); } @@ -742,6 +743,6 @@ i915_init_state_functions( struct i915_context *i915 ) i915->pipe.set_scissor_state = i915_set_scissor_state; i915->pipe.set_sampler_textures = i915_set_sampler_textures; i915->pipe.set_viewport_state = i915_set_viewport_state; - i915->pipe.set_vertex_buffer = i915_set_vertex_buffer; - i915->pipe.set_vertex_element = i915_set_vertex_element; + i915->pipe.set_vertex_buffers = i915_set_vertex_buffers; + i915->pipe.set_vertex_elements = i915_set_vertex_elements; } diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 0d04a8a5947..376f1487b29 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -277,45 +277,49 @@ static void brw_set_viewport_state( struct pipe_context *pipe, } -static void brw_set_vertex_buffer( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer ) +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct brw_context *brw = brw_context(pipe); - brw->vb.vbo_array[index] = buffer; + memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); } -static void brw_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *element) +static void brw_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) { /* flush ? */ struct brw_context *brw = brw_context(pipe); + uint i; - assert(index < PIPE_MAX_ATTRIBS); - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); + assert(count <= PIPE_MAX_ATTRIBS); - el.ve0.src_offset = element->src_offset; - el.ve0.src_format = brw_translate_surface_format(element->src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = element->vertex_buffer_index; + for (i = 0; i < count; i++) { + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); - el.ve1.dst_offset = index * 4; + el.ve0.src_offset = elements[i].src_offset; + el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.dst_offset = i * 4; - switch (element->nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - brw->vb.inputs[index] = el; + switch (elements[i].nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[i] = el; + } } @@ -457,6 +461,6 @@ brw_init_state_functions( struct brw_context *brw ) brw->pipe.set_scissor_state = brw_set_scissor_state; brw->pipe.set_sampler_textures = brw_set_sampler_textures; brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; - brw->pipe.set_vertex_element = brw_set_vertex_element; + brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; + brw->pipe.set_vertex_elements = brw_set_vertex_elements; } diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 16fb06f176f..e298ed37c36 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -174,8 +174,8 @@ softpipe_create( struct pipe_screen *screen, softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; - softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; + softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; + softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements; softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 0bb1095aece..6e6501f5bc4 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -151,13 +151,13 @@ void softpipe_set_sampler_textures( struct pipe_context *, void softpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); -void softpipe_set_vertex_element(struct pipe_context *, - unsigned index, - const struct pipe_vertex_element *); +void softpipe_set_vertex_elements(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); -void softpipe_set_vertex_buffer(struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer *); +void softpipe_set_vertex_buffers(struct pipe_context *, + unsigned count, + const struct pipe_vertex_buffer *); void softpipe_update_derived( struct softpipe_context *softpipe ); diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index c054e76d9be..e0230e16a4e 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -37,28 +37,35 @@ void -softpipe_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *attrib) +softpipe_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) { struct softpipe_context *softpipe = softpipe_context(pipe); - assert(index < PIPE_MAX_ATTRIBS); - softpipe->vertex_element[index] = *attrib; /* struct copy */ + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(softpipe->vertex_element, attribs, + count * sizeof(struct pipe_vertex_element)); + softpipe->dirty |= SP_NEW_VERTEX; - draw_set_vertex_element(softpipe->draw, index, attrib); + draw_set_vertex_elements(softpipe->draw, count, attribs); } void -softpipe_set_vertex_buffer(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer) +softpipe_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct softpipe_context *softpipe = softpipe_context(pipe); - assert(index < PIPE_MAX_ATTRIBS); - softpipe->vertex_buffer[index] = *buffer; /* struct copy */ + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); + softpipe->dirty |= SP_NEW_VERTEX; - draw_set_vertex_buffer(softpipe->draw, index, buffer); + draw_set_vertex_buffers(softpipe->draw, count, buffers); } diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 2d063e3f9e4..324f70185af 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -155,13 +155,13 @@ struct pipe_context { unsigned num_textures, struct pipe_texture ** ); - void (*set_vertex_buffer)( struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer * ); + void (*set_vertex_buffers)( struct pipe_context *, + unsigned num_buffers, + const struct pipe_vertex_buffer * ); - void (*set_vertex_element)( struct pipe_context *, - unsigned index, - const struct pipe_vertex_element * ); + void (*set_vertex_elements)( struct pipe_context *, + unsigned num_elements, + const struct pipe_vertex_element * ); /*@}*/ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 4aca3311b7b..f0f62246ddc 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -210,6 +210,7 @@ st_draw_vbo(GLcontext *ctx, const struct pipe_shader_state *vs; struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; GLuint attr; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; /* sanity check for pointer arithmetic below */ assert(sizeof(arrays[0]->Ptr[0]) == 1); @@ -226,7 +227,6 @@ st_draw_vbo(GLcontext *ctx, for (attr = 0; attr < vp->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; - struct pipe_vertex_element velement; if (bufobj && bufobj->Name) { /* Attribute data is in a VBO. @@ -239,8 +239,8 @@ st_draw_vbo(GLcontext *ctx, vbuffer[attr].buffer = NULL; pipe_buffer_reference(winsys, &vbuffer[attr].buffer, stobj->buffer); vbuffer[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ - velement.src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; - assert(velement.src_offset <= 2048); /* 11-bit field */ + velements[attr].src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; + assert(velements[attr].src_offset <= 2048); /* 11-bit field */ } else { /* attribute data is in user-space memory, not a VBO */ @@ -259,24 +259,24 @@ st_draw_vbo(GLcontext *ctx, (void *) arrays[mesaAttr]->Ptr, bytes); vbuffer[attr].buffer_offset = 0; - velement.src_offset = 0; + velements[attr].src_offset = 0; } /* common-case setup */ vbuffer[attr].pitch = arrays[mesaAttr]->StrideB; /* in bytes */ vbuffer[attr].max_index = max_index; - velement.vertex_buffer_index = attr; - velement.nr_components = arrays[mesaAttr]->Size; - velement.src_format = pipe_vertex_format(arrays[mesaAttr]->Type, - arrays[mesaAttr]->Size, - arrays[mesaAttr]->Normalized); - assert(velement.src_format); - - /* tell pipe about this attribute */ - pipe->set_vertex_buffer(pipe, attr, &vbuffer[attr]); - pipe->set_vertex_element(pipe, attr, &velement); + velements[attr].vertex_buffer_index = attr; + velements[attr].nr_components = arrays[mesaAttr]->Size; + velements[attr].src_format + = pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); } + pipe->set_vertex_buffers(pipe, vp->num_inputs, vbuffer); + pipe->set_vertex_elements(pipe, vp->num_inputs, velements); + /* do actual drawing */ if (ib) { @@ -336,8 +336,8 @@ st_draw_vbo(GLcontext *ctx, for (attr = 0; attr < vp->num_inputs; attr++) { pipe_buffer_reference(winsys, &vbuffer[attr].buffer, NULL); assert(!vbuffer[attr].buffer); - pipe->set_vertex_buffer(pipe, attr, &vbuffer[attr]); } + pipe->set_vertex_buffers(pipe, vp->num_inputs, vbuffer); } @@ -362,7 +362,7 @@ st_draw_vertices(GLcontext *ctx, unsigned prim, struct pipe_context *pipe = ctx->st->pipe; struct pipe_buffer *vbuf; struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velement; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned i; assert(numAttribs > 0); @@ -393,16 +393,16 @@ st_draw_vertices(GLcontext *ctx, unsigned prim, vbuffer.buffer = vbuf; vbuffer.pitch = numAttribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = 0; - pipe->set_vertex_buffer(pipe, 0, &vbuffer); + pipe->set_vertex_buffers(pipe, 1, &vbuffer); /* tell pipe about the vertex attributes */ for (i = 0; i < numAttribs; i++) { - velement.src_offset = i * 4 * sizeof(GLfloat); - velement.vertex_buffer_index = 0; - velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - velement.nr_components = 4; - pipe->set_vertex_element(pipe, i, &velement); + velements[i].src_offset = i * 4 * sizeof(GLfloat); + velements[i].vertex_buffer_index = 0; + velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velements[i].nr_components = 4; } + pipe->set_vertex_elements(pipe, numAttribs, velements); /* draw */ pipe->draw_arrays(pipe, prim, 0, numVertex); @@ -470,7 +470,8 @@ st_feedback_draw_vbo(GLcontext *ctx, const struct st_vertex_program *vp; const struct pipe_shader_state *vs; struct pipe_buffer *index_buffer_handle = 0; - struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_buffer vbuffers[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; GLuint attr, i; ubyte *mapped_constants; @@ -505,7 +506,6 @@ st_feedback_draw_vbo(GLcontext *ctx, for (attr = 0; attr < vp->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; - struct pipe_vertex_element velement; void *map; if (bufobj && bufobj->Name) { @@ -516,10 +516,10 @@ st_feedback_draw_vbo(GLcontext *ctx, struct st_buffer_object *stobj = st_buffer_object(bufobj); assert(stobj->buffer); - vbuffer[attr].buffer = NULL; - pipe_buffer_reference(winsys, &vbuffer[attr].buffer, stobj->buffer); - vbuffer[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ - velement.src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; + vbuffers[attr].buffer = NULL; + pipe_buffer_reference(winsys, &vbuffers[attr].buffer, stobj->buffer); + vbuffers[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ + velements[attr].src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; } else { /* attribute data is in user-space memory, not a VBO */ @@ -528,35 +528,39 @@ st_feedback_draw_vbo(GLcontext *ctx, * (max_index + 1)); /* wrap user data */ - vbuffer[attr].buffer + vbuffers[attr].buffer = winsys->user_buffer_create(winsys, (void *) arrays[mesaAttr]->Ptr, bytes); - vbuffer[attr].buffer_offset = 0; - velement.src_offset = 0; + vbuffers[attr].buffer_offset = 0; + velements[attr].src_offset = 0; } /* common-case setup */ - vbuffer[attr].pitch = arrays[mesaAttr]->StrideB; /* in bytes */ - vbuffer[attr].max_index = max_index; - velement.vertex_buffer_index = attr; - velement.nr_components = arrays[mesaAttr]->Size; - velement.src_format = pipe_vertex_format(arrays[mesaAttr]->Type, + vbuffers[attr].pitch = arrays[mesaAttr]->StrideB; /* in bytes */ + vbuffers[attr].max_index = max_index; + velements[attr].vertex_buffer_index = attr; + velements[attr].nr_components = arrays[mesaAttr]->Size; + velements[attr].src_format = pipe_vertex_format(arrays[mesaAttr]->Type, arrays[mesaAttr]->Size, arrays[mesaAttr]->Normalized); - assert(velement.src_format); + assert(velements[attr].src_format); /* tell draw about this attribute */ +#if 0 draw_set_vertex_buffer(draw, attr, &vbuffer[attr]); - draw_set_vertex_element(draw, attr, &velement); +#endif /* map the attrib buffer */ map = pipe->winsys->buffer_map(pipe->winsys, - vbuffer[attr].buffer, + vbuffers[attr].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, attr, map); } + draw_set_vertex_buffers(draw, vp->num_inputs, vbuffers); + draw_set_vertex_elements(draw, vp->num_inputs, velements); + if (ib) { unsigned indexSize; struct gl_buffer_object *bufobj = ib->obj; -- cgit v1.2.3 From 3017999d9bee8f9d2ef170c1bb6926aab8e08393 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 30 Mar 2008 19:00:07 +1000 Subject: nv40: vp const/immd fix --- src/gallium/drivers/nv40/nv40_vertprog.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index e5ce8943752..41885b9d4a5 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -328,7 +328,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, struct nv40_sreg src[3], dst, tmp; struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); int mask; - int ai = -1, ci = -1; + int ai = -1, ci = -1, ii = -1; int i; if (finst->Instruction.Opcode == TGSI_OPCODE_END) @@ -358,12 +358,9 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, tgsi_src(vpc, fsrc), none, none); } break; - /*XXX: index comparison is broken now that consts come from - * two different register files. - */ case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { + if ((ci == -1 && ii == -1) || + ci == fsrc->SrcRegister.Index) { ci = fsrc->SrcRegister.Index; src[i] = tgsi_src(vpc, fsrc); } else { @@ -372,6 +369,17 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, tgsi_src(vpc, fsrc), none, none); } break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->SrcRegister.Index) { + ii = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; case TGSI_FILE_TEMPORARY: /* handled above */ break; -- cgit v1.2.3 From 68395f6726183a0776e324b900e429449ede2b22 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 30 Mar 2008 19:08:59 +1000 Subject: nv40: vp 1/0/- swz --- src/gallium/drivers/nv40/nv40_vertprog.c | 74 +++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 41885b9d4a5..40ef7174a41 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -4,13 +4,13 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "nv40_context.h" #include "nv40_state.h" /* TODO (at least...): * 1. Indexed consts + ARL - * 2. Arb. swz/negation * 3. NV_vp11, NV_vp2, NV_vp3 features * - extra arith opcodes * - branching @@ -321,6 +321,66 @@ tgsi_mask(uint tgsi) return mask; } +static boolean +src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, + struct nv40_sreg *src) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= tgsi_mask(1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= tgsi_mask(1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= tgsi_mask(1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= tgsi_mask(1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(vpc); + + if (mask) + arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv40_sreg one = temp(vpc); + arith(vpc, 0, OP_STR, one, neg_mask, one, none, none); + arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + static boolean nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, const struct tgsi_full_instruction *finst) @@ -347,6 +407,18 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc; fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(vpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + switch (fsrc->SrcRegister.File) { case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->SrcRegister.Index) { -- cgit v1.2.3 From bbefb541ad94382debb0f7a8daa636729799a31a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 30 Mar 2008 20:32:22 +1000 Subject: nouveau: adapt to recent gallium changes --- src/gallium/drivers/nouveau/nouveau_push.h | 8 ++--- src/gallium/drivers/nouveau/nouveau_stateobj.h | 4 +-- src/gallium/drivers/nouveau/nouveau_winsys.h | 3 +- src/gallium/drivers/nv10/nv10_context.c | 19 +++------- src/gallium/drivers/nv10/nv10_state.c | 18 +++++----- src/gallium/drivers/nv30/nv30_context.c | 19 +++------- src/gallium/drivers/nv30/nv30_query.c | 2 +- src/gallium/drivers/nv30/nv30_state.c | 18 +++++----- src/gallium/drivers/nv30/nv30_vbo.c | 4 +-- src/gallium/drivers/nv40/nv40_context.c | 17 ++------- src/gallium/drivers/nv40/nv40_draw.c | 4 +-- src/gallium/drivers/nv40/nv40_query.c | 2 +- src/gallium/drivers/nv40/nv40_screen.c | 2 +- src/gallium/drivers/nv40/nv40_state.c | 20 +++++------ src/gallium/drivers/nv40/nv40_state_emit.c | 4 +-- src/gallium/drivers/nv40/nv40_vbo.c | 4 +-- src/gallium/drivers/nv50/nv50_context.c | 18 ++-------- src/gallium/drivers/nv50/nv50_screen.c | 2 +- src/gallium/drivers/nv50/nv50_state.c | 12 +++---- src/gallium/winsys/dri/nouveau/nouveau_context.c | 4 +-- src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 12 ++++++- .../winsys/dri/nouveau/nouveau_winsys_pipe.c | 42 ++++++++++++++++++++++ 22 files changed, 121 insertions(+), 117 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h index 225c17744ad..54ef1c1291e 100644 --- a/src/gallium/drivers/nouveau/nouveau_push.h +++ b/src/gallium/drivers/nouveau/nouveau_push.h @@ -27,7 +27,7 @@ #define BEGIN_RING(obj,mthd,size) do { \ NOUVEAU_PUSH_CONTEXT(pc); \ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ - pc->nvws->push_flush(pc->nvws, ((size) + 1)); \ + pc->nvws->push_flush(pc->nvws, ((size) + 1), NULL); \ OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ } while(0) @@ -36,9 +36,9 @@ BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ } while(0) -#define FIRE_RING() do { \ +#define FIRE_RING(fence) do { \ NOUVEAU_PUSH_CONTEXT(pc); \ - pc->nvws->push_flush(pc->nvws, 0); \ + pc->nvws->push_flush(pc->nvws, 0, fence); \ } while(0) #define OUT_RELOC(bo,data,flags,vor,tor) do { \ @@ -73,7 +73,7 @@ #define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ NOUVEAU_PUSH_CONTEXT(pc); \ if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ - pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \ + pc->nvws->push_flush(pc->nvws->channel, ((size) + 1), NULL); \ OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ (flags), 0, 0); \ pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index d465223748a..d501b76b51e 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -99,7 +99,7 @@ so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) nr = so->cur - so->push; if (pb->remaining < nr) - nvws->push_flush(nvws, nr); + nvws->push_flush(nvws, nr, NULL); pb->remaining -= nr; memcpy(pb->cur, so->push, nr * 4); @@ -120,7 +120,7 @@ so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) i = so->cur_reloc << 1; if (nvws->channel->pushbuf->remaining < i) - nvws->push_flush(nvws, i); + nvws->push_flush(nvws, i, NULL); nvws->channel->pushbuf->remaining -= i; for (i = 0; i < so->cur_reloc; i++) { diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 7fa7cc0910c..2a5305f7ce4 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -27,7 +27,8 @@ struct nouveau_winsys { int (*push_reloc)(struct nouveau_winsys *, void *ptr, struct pipe_buffer *, uint32_t data, uint32_t flags, uint32_t vor, uint32_t tor); - int (*push_flush)(struct nouveau_winsys *, unsigned size); + int (*push_flush)(struct nouveau_winsys *, unsigned size, + struct pipe_fence_handle **fence); int (*grobj_alloc)(struct nouveau_winsys *, int grclass, struct nouveau_grobj **); diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 2599acf2861..14042fb2fbb 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -7,10 +7,10 @@ #include "nv10_screen.h" static void -nv10_flush(struct pipe_context *pipe, unsigned flags) +nv10_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) { struct nv10_context *nv10 = nv10_context(pipe); - struct nouveau_winsys *nvws = nv10->nvws; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(celsius, 0x1fd8, 1); @@ -19,18 +19,7 @@ nv10_flush(struct pipe_context *pipe, unsigned flags) OUT_RING (1); } - if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv10->sync, 0); - BEGIN_RING(celsius, 0x104, 1); - OUT_RING (0); - BEGIN_RING(celsius, 0x100, 1); - OUT_RING (0); - } - - FIRE_RING(); - - if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv10->sync, 0, 0, 2000); + FIRE_RING(fence); } static void @@ -253,7 +242,7 @@ nv10_init_hwctx(struct nv10_context *nv10, int celsius_class) OUT_RING (1); - FIRE_RING (); + FIRE_RING (NULL); return TRUE; } diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index d7c445f1b39..1ff01de1060 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -633,24 +633,22 @@ nv10_set_viewport_state(struct pipe_context *pipe, } static void -nv10_set_vertex_buffer(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_buffer *vb) +nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) { struct nv10_context *nv10 = nv10_context(pipe); - nv10->vtxbuf[index] = *vb; - + memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count); nv10->dirty |= NV10_NEW_ARRAYS; } static void -nv10_set_vertex_element(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_element *ve) +nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) { struct nv10_context *nv10 = nv10_context(pipe); - nv10->vtxelt[index] = *ve; - + memcpy(nv10->vtxelt, ve, sizeof(*ve) * count); nv10->dirty |= NV10_NEW_ARRAYS; } @@ -693,7 +691,7 @@ nv10_init_state_functions(struct nv10_context *nv10) nv10->pipe.set_scissor_state = nv10_set_scissor_state; nv10->pipe.set_viewport_state = nv10_set_viewport_state; - nv10->pipe.set_vertex_buffer = nv10_set_vertex_buffer; - nv10->pipe.set_vertex_element = nv10_set_vertex_element; + nv10->pipe.set_vertex_buffers = nv10_set_vertex_buffers; + nv10->pipe.set_vertex_elements = nv10_set_vertex_elements; } diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index cdd662a9f19..1e729d789bd 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -7,10 +7,10 @@ #include "nv30_screen.h" static void -nv30_flush(struct pipe_context *pipe, unsigned flags) +nv30_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_winsys *nvws = nv30->nvws; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(rankine, 0x1fd8, 1); @@ -19,18 +19,7 @@ nv30_flush(struct pipe_context *pipe, unsigned flags) OUT_RING (1); } - if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv30->sync, 0); - BEGIN_RING(rankine, 0x104, 1); - OUT_RING (0); - BEGIN_RING(rankine, 0x100, 1); - OUT_RING (0); - } - - FIRE_RING(); - - if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv30->sync, 0, 0, 2000); + FIRE_RING(fence); } static void @@ -144,7 +133,7 @@ nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) BEGIN_RING(rankine, 0x1e94, 1); OUT_RING (0x13); - FIRE_RING (); + FIRE_RING (NULL); return TRUE; } diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index 71fdcfa24df..e19cb455dce 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -67,7 +67,7 @@ nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(); + FIRE_RING(NULL); } static boolean diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index b0055892aeb..983638adccb 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -710,24 +710,22 @@ nv30_set_viewport_state(struct pipe_context *pipe, } static void -nv30_set_vertex_buffer(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_buffer *vb) +nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) { struct nv30_context *nv30 = nv30_context(pipe); - nv30->vtxbuf[index] = *vb; - + memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count); nv30->dirty |= NV30_NEW_ARRAYS; } static void -nv30_set_vertex_element(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_element *ve) +nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) { struct nv30_context *nv30 = nv30_context(pipe); - nv30->vtxelt[index] = *ve; - + memcpy(nv30->vtxelt, ve, sizeof(*ve) * count); nv30->dirty |= NV30_NEW_ARRAYS; } @@ -770,7 +768,7 @@ nv30_init_state_functions(struct nv30_context *nv30) nv30->pipe.set_scissor_state = nv30_set_scissor_state; nv30->pipe.set_viewport_state = nv30_set_viewport_state; - nv30->pipe.set_vertex_buffer = nv30_set_vertex_buffer; - nv30->pipe.set_vertex_element = nv30_set_vertex_element; + nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers; + nv30->pipe.set_vertex_elements = nv30_set_vertex_elements; } diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index a62462f7bc5..b18a407ec5a 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -229,7 +229,7 @@ nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (0); - pipe->flush(pipe, 0); + pipe->flush(pipe, 0, NULL); return TRUE; } @@ -418,7 +418,7 @@ nv30_draw_elements(struct pipe_context *pipe, mode, start, count); } - pipe->flush(pipe, 0); + pipe->flush(pipe, 0, NULL); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 7fcf8b86197..f9c93f7a2d7 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -7,10 +7,10 @@ #include "nv40_screen.h" static void -nv40_flush(struct pipe_context *pipe, unsigned flags) +nv40_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_winsys *nvws = nv40->nvws; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(curie, 0x1fd8, 1); @@ -19,18 +19,7 @@ nv40_flush(struct pipe_context *pipe, unsigned flags) OUT_RING (1); } - if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(nv40->screen->sync, 0); - BEGIN_RING(curie, 0x104, 1); - OUT_RING (0); - BEGIN_RING(curie, 0x100, 1); - OUT_RING (0); - } - - FIRE_RING(); - - if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(nv40->screen->sync, 0, 0, 2000); + FIRE_RING(fence); } static void diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index d05e5ad1936..9cd8fa6a497 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -89,7 +89,7 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, NOUVEAU_ERR("AIII, missed flush\n"); assert(0); } - FIRE_RING(); + FIRE_RING(NULL); nv40_state_emit(nv40); } @@ -275,7 +275,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); draw_flush(nv40->draw); - pipe->flush(pipe, 0); + pipe->flush(pipe, 0, NULL); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 15961591b96..57f39cfab0c 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -75,7 +75,7 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(); + FIRE_RING(NULL); } static boolean diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 75b965bb9d1..e98005f749f 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -266,7 +266,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, so_emit(nvws, so); so_ref(NULL, &so); - nvws->push_flush(nvws, 0); + nvws->push_flush(nvws, 0, NULL); screen->pipe.winsys = ws; screen->pipe.destroy = nv40_screen_destroy; diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 3eafbece309..1417c95e758 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -660,26 +660,26 @@ nv40_set_viewport_state(struct pipe_context *pipe, } static void -nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_buffer *vb) +nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_vertex_buffer(nv40->draw, index, vb); + draw_set_vertex_buffers(nv40->draw, count, vb); - nv40->vtxbuf[index] = *vb; + memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); nv40->dirty |= NV40_NEW_ARRAYS; } static void -nv40_set_vertex_element(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_element *ve) +nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_vertex_element(nv40->draw, index, ve); + draw_set_vertex_elements(nv40->draw, count, ve); - nv40->vtxelt[index] = *ve; + memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); nv40->dirty |= NV40_NEW_ARRAYS; } @@ -722,7 +722,7 @@ nv40_init_state_functions(struct nv40_context *nv40) nv40->pipe.set_scissor_state = nv40_set_scissor_state; nv40->pipe.set_viewport_state = nv40_set_viewport_state; - nv40->pipe.set_vertex_buffer = nv40_set_vertex_buffer; - nv40->pipe.set_vertex_element = nv40_set_vertex_element; + nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; + nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index a9a9abc9220..74feb6d4bfc 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -123,7 +123,7 @@ nv40_state_validate(struct nv40_context *nv40) return FALSE; /* Attempt to go to hwtnl again */ - nv40->pipe.flush(&nv40->pipe, 0); + nv40->pipe.flush(&nv40->pipe, 0, NULL); nv40->dirty |= (NV40_NEW_VIEWPORT | NV40_NEW_VERTPROG | NV40_NEW_ARRAYS | @@ -147,7 +147,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40) /* Setup for swtnl */ if (nv40->render_mode == HW) { NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); - nv40->pipe.flush(&nv40->pipe, 0); + nv40->pipe.flush(&nv40->pipe, 0, NULL); nv40->dirty |= (NV40_NEW_VIEWPORT | NV40_NEW_VERTPROG | NV40_NEW_ARRAYS | diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 4fae10f74b4..b66bf26afb3 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -209,7 +209,7 @@ nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); OUT_RING (0); - pipe->flush(pipe, 0); + pipe->flush(pipe, 0, NULL); return TRUE; } @@ -384,7 +384,7 @@ nv40_draw_elements(struct pipe_context *pipe, mode, start, count); } - pipe->flush(pipe, 0); + pipe->flush(pipe, 0, NULL); return TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 980d066c844..e822d863945 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -7,24 +7,12 @@ #include "nv50_screen.h" static void -nv50_flush(struct pipe_context *pipe, unsigned flags) +nv50_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) { struct nv50_context *nv50 = (struct nv50_context *)pipe; - struct nv50_screen *screen = nv50->screen; - struct nouveau_winsys *nvws = screen->nvws; - if (flags & PIPE_FLUSH_WAIT) { - nvws->notifier_reset(screen->sync, 0); - BEGIN_RING(tesla, 0x104, 1); - OUT_RING (0); - BEGIN_RING(tesla, 0x100, 1); - OUT_RING (0); - } - - FIRE_RING(); - - if (flags & PIPE_FLUSH_WAIT) - nvws->notifier_wait(screen->sync, 0, 0, 2000); + FIRE_RING(fence); } static void diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ff4aca81a5a..586373a5c4b 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -171,7 +171,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, so_emit(nvws, so); so_ref(NULL, &so); - nvws->push_flush(nvws, 0); + nvws->push_flush(nvws, 0, NULL); screen->pipe.winsys = ws; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index b096a2583d7..a614ea03358 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -422,14 +422,14 @@ nv50_set_viewport_state(struct pipe_context *pipe, } static void -nv50_set_vertex_buffer(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_buffer *vb) +nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) { } static void -nv50_set_vertex_element(struct pipe_context *pipe, unsigned index, - const struct pipe_vertex_element *ve) +nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) { } @@ -472,7 +472,7 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.set_scissor_state = nv50_set_scissor_state; nv50->pipe.set_viewport_state = nv50_set_viewport_state; - nv50->pipe.set_vertex_buffer = nv50_set_vertex_buffer; - nv50->pipe.set_vertex_element = nv50_set_vertex_element; + nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; + nv50->pipe.set_vertex_elements = nv50_set_vertex_elements; } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index 336dd65847b..cf1d83b18f4 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -282,7 +282,7 @@ nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) assert(nv); - st_flush(nv->st, PIPE_FLUSH_WAIT); + st_finish(nv->st); st_destroy_context(nv->st); if (nv->pctx_id >= 0) { @@ -337,7 +337,7 @@ nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) struct nouveau_context *nv = driContextPriv->driverPrivate; (void)nv; - st_flush(nv->st, 0); + st_flush(nv->st, 0, NULL); return GL_TRUE; } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index bf1afce5d9d..60fdbb8dfdf 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -69,8 +69,18 @@ nouveau_pipe_push_reloc(struct nouveau_winsys *nvws, void *ptr, } static int -nouveau_pipe_push_flush(struct nouveau_winsys *nvws, unsigned size) +nouveau_pipe_push_flush(struct nouveau_winsys *nvws, unsigned size, + struct pipe_fence_handle **fence) { + if (fence) { + struct nouveau_pushbuf *pb = nvws->channel->pushbuf; + struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(pb); + struct nouveau_fence *ref = NULL; + + nouveau_fence_ref(nvpb->fence, &ref); + *fence = (struct pipe_fence_handle *)ref; + } + return nouveau_pushbuf_flush(nvws->channel, size); } diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c index e1a9271395b..453b3623f4a 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c @@ -164,6 +164,44 @@ nouveau_pipe_bo_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) nouveau_bo_unmap(nvbuf->bo); } +static INLINE struct nouveau_fence * +nouveau_pipe_fence(struct pipe_fence_handle *pfence) +{ + return (struct nouveau_fence *)pfence; +} + +static void +nouveau_pipe_fence_reference(struct pipe_winsys *ws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *pfence) +{ + nouveau_fence_ref((void *)pfence, (void *)ptr); +} + +static int +nouveau_pipe_fence_signalled(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, unsigned flag) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)ws; + struct nouveau_fence *fence = nouveau_pipe_fence(pfence); + + if (nouveau_fence(fence)->signalled == 0) + nouveau_fence_flush(nvpws->nv->nvc->channel); + + return !nouveau_fence(fence)->signalled; +} + +static int +nouveau_pipe_fence_finish(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, unsigned flag) +{ + struct nouveau_fence *fence = nouveau_pipe_fence(pfence); + struct nouveau_fence *ref = NULL; + + nouveau_fence_ref(fence, &ref); + return nouveau_fence_wait(&ref); +} + struct pipe_winsys * nouveau_create_pipe_winsys(struct nouveau_context *nv) { @@ -189,6 +227,10 @@ nouveau_create_pipe_winsys(struct nouveau_context *nv) pws->buffer_map = nouveau_pipe_bo_map; pws->buffer_unmap = nouveau_pipe_bo_unmap; + pws->fence_reference = nouveau_pipe_fence_reference; + pws->fence_signalled = nouveau_pipe_fence_signalled; + pws->fence_finish = nouveau_pipe_fence_finish; + pws->get_name = nouveau_get_name; return &nvpws->pws; -- cgit v1.2.3 From 833b1fb152851ba0d4fa2a5ba4702ee98d9bc217 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 31 Mar 2008 05:13:06 +1000 Subject: nv40: mark fp dirty even when only consts updated Fixes arbfplight "sticking". --- src/gallium/drivers/nv40/nv40_fragprog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 87bca41cf64..4b7667e0381 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -919,6 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) nv40->constbuf[PIPE_SHADER_FRAGMENT]; struct pipe_winsys *ws = nv40->pipe.winsys; struct nouveau_stateobj *so; + boolean new_consts = FALSE; int i; if (fp->translated) @@ -945,7 +946,6 @@ nv40_fragprog_validate(struct nv40_context *nv40) update_constants: if (fp->nr_consts) { - boolean new_consts = FALSE; float *map; map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); @@ -965,7 +965,7 @@ update_constants: nv40_fragprog_upload(nv40, fp); } - if (fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { + if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); return TRUE; } -- cgit v1.2.3 From 7b389f8d2f307fa0714494f2a43e9141cc04ed3e Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sun, 30 Mar 2008 21:52:36 +0200 Subject: nv30: use FREE macro --- src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_draw.c | 2 +- src/gallium/drivers/nv30/nv30_fragprog.c | 4 ++-- src/gallium/drivers/nv30/nv30_miptree.c | 6 +++--- src/gallium/drivers/nv30/nv30_query.c | 2 +- src/gallium/drivers/nv30/nv30_state.c | 4 ++-- src/gallium/drivers/nv30/nv30_vertprog.c | 8 ++++---- 7 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 1e729d789bd..b8e8b86d99a 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -41,7 +41,7 @@ nv30_destroy(struct pipe_context *pipe) nvws->grobj_free(&nv30->rankine); - free(nv30); + FREE(nv30); } static boolean diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c index 59a72657996..8ec0835225b 100644 --- a/src/gallium/drivers/nv30/nv30_draw.c +++ b/src/gallium/drivers/nv30/nv30_draw.c @@ -40,7 +40,7 @@ nv30_draw_reset_stipple_counter(struct draw_stage *draw) static void nv30_draw_destroy(struct draw_stage *draw) { - free(draw); + FREE(draw); } struct draw_stage * diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 6f61d36f4e8..51000bd6fcd 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -753,7 +753,7 @@ nv30_fragprog_translate(struct nv30_context *nv30, fp->on_hw = FALSE; out_err: tgsi_parse_free(&parse); - free(fpc); + FREE(fpc); } void @@ -829,6 +829,6 @@ nv30_fragprog_destroy(struct nv30_context *nv30, struct nv30_fragment_program *fp) { if (fp->insn_len) - free(fp->insn); + FREE(fp->insn); } diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 6fdcf42a243..afb05fdd2bf 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -72,7 +72,7 @@ nv30_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { - free(mt); + FREE(mt); return NULL; } @@ -93,9 +93,9 @@ nv30_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) pipe_buffer_reference(ws, &nv30mt->buffer, NULL); for (l = 0; l <= mt->last_level; l++) { if (nv30mt->level[l].image_offset) - free(nv30mt->level[l].image_offset); + FREE(nv30mt->level[l].image_offset); } - free(nv30mt); + FREE(nv30mt); } } diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index e19cb455dce..0c2d941562c 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -35,7 +35,7 @@ nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) if (q->object) nv30->nvws->res_free(&q->object); - free(q); + FREE(q); } static void diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 983638adccb..620038fa647 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -486,7 +486,7 @@ nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv30_vertex_program *vp = hwcso; nv30_vertprog_destroy(nv30, vp); - free(vp); + FREE(vp); } static void * @@ -522,7 +522,7 @@ nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv30_fragment_program *fp = hwcso; nv30_fragprog_destroy(nv30, fp); - free(fp); + FREE(fp); } static void diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index e9b62ff48b4..fe1a467565d 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -578,7 +578,7 @@ nv30_vertprog_translate(struct nv30_context *nv30, vpc->high_temp = -1; if (!nv30_vertprog_prepare(vpc)) { - free(vpc); + FREE(vpc); return; } @@ -634,7 +634,7 @@ nv30_vertprog_translate(struct nv30_context *nv30, vp->translated = TRUE; out_err: tgsi_parse_free(&parse); - free(vpc); + FREE(vpc); } void @@ -790,8 +790,8 @@ void nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) { if (vp->nr_consts) - free(vp->consts); + FREE(vp->consts); if (vp->nr_insns) - free(vp->insns); + FREE(vp->insns); } -- cgit v1.2.3 From 6806519a0b2a8af0c950f71705b02e13876d460f Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 31 Mar 2008 06:35:43 +1000 Subject: nv40: track current scissor enable/disable state Not sure how this was forgotten :) Anyhow, fixes gearbox/bzflag/xmoto, probably other things that use scissored clears / hit the nasty clear-with-quad path. --- src/gallium/drivers/nv40/nv40_state_scissor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index 9e9eadc5116..285239ef419 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -10,10 +10,11 @@ nv40_state_scissor_validate(struct nv40_context *nv40) if (nv40->state.hw[NV40_STATE_SCISSOR] && (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) return FALSE; + nv40->state.scissor_enabled = rast->scissor; so = so_new(3, 0); so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); - if (rast->scissor) { + if (nv40->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); so_data (so, ((s->maxy - s->miny) << 16) | s->miny); } else { -- cgit v1.2.3 From 169faae6db9a289c5a2d9430d85c36ac36abd218 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 31 Mar 2008 09:00:25 +1000 Subject: nv40: support vp clip distance regs, unused currently. --- src/gallium/drivers/nv40/nv40_state.h | 1 + src/gallium/drivers/nv40/nv40_vertprog.c | 106 ++++++++++++++++++++++++++++++- 2 files changed, 104 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index ab2866eb7a5..e018464c9f8 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -42,6 +42,7 @@ struct nv40_vertex_program { uint32_t ir; uint32_t or; + uint32_t clip_ctrl; struct nouveau_stateobj *so; }; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 40ef7174a41..5906280e5e1 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -37,6 +37,8 @@ #define neg(s) nv40_sr_neg((s)) #define abs(s) nv40_sr_abs((s)) +#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + struct nv40_vpc { struct nv40_vertex_program *vp; @@ -200,6 +202,36 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + case NV40_VP_INST_DEST_CLIP(0): + vp->or |= (1 << 6); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(1): + vp->or |= (1 << 7); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(2): + vp->or |= (1 << 8); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(3): + vp->or |= (1 << 9); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(4): + vp->or |= (1 << 10); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(5): + vp->or |= (1 << 11); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; + dst.index = NV40_VP_INST_DEST_PSZ; + break; default: break; } @@ -391,6 +423,11 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, int ai = -1, ci = -1, ii = -1; int i; + struct { + struct nv40_sreg dst; + unsigned c, m; + } clip; + if (finst->Instruction.Opcode == TGSI_OPCODE_END) return TRUE; @@ -464,6 +501,51 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + /* If writing to clip distance regs, need to modify instruction to + * change which component is written to. On NV40 the clip regs + * are the unused components (yzw) of FOGC/PSZ + */ + clip.dst = none; + if (dst.type == NV40SR_OUTPUT && + dst.index >= NV40_VP_INST_DEST_CLIP(0) && + dst.index <= NV40_VP_INST_DEST_CLIP(5)) { + unsigned n = dst.index - NV40_VP_INST_DEST_CLIP(0); + unsigned c[] = { SWZ_Y, SWZ_Z, SWZ_W, SWZ_Y, SWZ_Z, SWZ_W }; + unsigned m[] = + { MASK_Y, MASK_Z, MASK_W, MASK_Y, MASK_Z, MASK_W }; + + /* Some instructions we can get away with swizzling and/or + * changing the writemask. Others, we'll use a temp reg. + */ + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_DST: + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LOG: + case TGSI_OPCODE_XPD: + clip.dst = dst; + clip.c = c[n]; + clip.m = m[n]; + dst = temp(vpc); + break; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + mask = m[n]; + break; + default: + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + src[i] = nv40_sr_swz(src[i], + c[n], c[n], c[n], c[n]); + } + mask = m[n]; + break; + } + } + switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); @@ -561,6 +643,12 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, return FALSE; } + if (clip.dst.type != NV40SR_NONE) { + arith(vpc, 0, OP_MOV, clip.dst, clip.m, + nv40_sr_swz(dst, clip.c, clip.c, clip.c, clip.c), + none, none); + } + release_temps(vpc); return TRUE; } @@ -612,6 +700,15 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, return FALSE; } break; +#if 0 + case TGSI_SEMANTIC_CLIP: + if (fdec->Semantic.SemanticIndex >= 6) { + NOUVEAU_ERR("bad clip distance index\n"); + return FALSE; + } + hw = NV40_VP_INST_DEST_CLIP(fdec->Semantic.SemanticIndex); + break; +#endif default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; @@ -782,6 +879,7 @@ nv40_vertprog_validate(struct nv40_context *nv40) { struct nouveau_winsys *nvws = nv40->nvws; struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; struct nv40_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -825,12 +923,14 @@ check_gpu_resources: assert(0); } - so = so_new(5, 0); - so_method(so, nv40->screen->curie, NV40TCL_VP_START_FROM_ID, 1); + so = so_new(7, 0); + so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); - so_method(so, nv40->screen->curie, NV40TCL_VP_ATTRIB_EN, 2); + so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); so_data (so, vp->ir); so_data (so, vp->or); + so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1); + so_data (so, vp->clip_ctrl); so_ref(so, &vp->so); upload_code = TRUE; -- cgit v1.2.3 From 4ad9dd6179787a46ecb223ab0e59e6b25b9368af Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 31 Mar 2008 09:29:22 +1000 Subject: nouveau: update object header --- src/gallium/drivers/nouveau/nouveau_class.h | 14 ++++++++------ src/gallium/drivers/nv40/nv40_vbo.c | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index dc202086d2c..bf1e622d2c4 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -4249,6 +4249,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_MASK 0x0000000f #define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT 0x00000002 #define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE 0x00000004 +#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_USHORT 0x00000005 #define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT 4 #define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_MASK 0x000000f0 #define NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 8 @@ -4963,12 +4964,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) #define NV40TCL_VP_UPLOAD_INST__SIZE 0x00000004 #define NV40TCL_CLIP_PLANE_ENABLE 0x00001478 -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 2) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 6) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 10) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 14) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 18) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 22) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 1) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 5) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 9) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 13) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 17) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 21) #define NV40TCL_POLYGON_STIPPLE_ENABLE 0x0000147c #define NV40TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) #define NV40TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 @@ -4990,6 +4991,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_VTXFMT_TYPE_MASK 0x0000000f #define NV40TCL_VTXFMT_TYPE_FLOAT 0x00000002 #define NV40TCL_VTXFMT_TYPE_UBYTE 0x00000004 +#define NV40TCL_VTXFMT_TYPE_USHORT 0x00000005 #define NV40TCL_VTXFMT_SIZE_SHIFT 4 #define NV40TCL_VTXFMT_SIZE_MASK 0x000000f0 #define NV40TCL_VTXFMT_STRIDE_SHIFT 8 diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index b66bf26afb3..bc53924a676 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -32,7 +32,7 @@ nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) case PIPE_FORMAT_R16G16_SSCALED: case PIPE_FORMAT_R16G16B16_SSCALED: case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = 5; + *fmt = NV40TCL_VTXFMT_TYPE_USHORT; break; default: pf_sprint_name(fs, pipe); -- cgit v1.2.3 From e616d3f3e2178e34e4e7d769b38b0dff4ad615fe Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 31 Mar 2008 09:37:57 +1000 Subject: nv40: fix slight thinko --- src/gallium/drivers/nv40/nv40_vertprog.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 5906280e5e1..08d3f387e09 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -425,7 +425,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, struct { struct nv40_sreg dst; - unsigned c, m; + unsigned m; } clip; if (finst->Instruction.Opcode == TGSI_OPCODE_END) @@ -503,14 +503,13 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, /* If writing to clip distance regs, need to modify instruction to * change which component is written to. On NV40 the clip regs - * are the unused components (yzw) of FOGC/PSZ + * are the unused components (yzw) of FOGC/PSZ. */ clip.dst = none; if (dst.type == NV40SR_OUTPUT && dst.index >= NV40_VP_INST_DEST_CLIP(0) && dst.index <= NV40_VP_INST_DEST_CLIP(5)) { unsigned n = dst.index - NV40_VP_INST_DEST_CLIP(0); - unsigned c[] = { SWZ_Y, SWZ_Z, SWZ_W, SWZ_Y, SWZ_Z, SWZ_W }; unsigned m[] = { MASK_Y, MASK_Z, MASK_W, MASK_Y, MASK_Z, MASK_W }; @@ -524,7 +523,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, case TGSI_OPCODE_LOG: case TGSI_OPCODE_XPD: clip.dst = dst; - clip.c = c[n]; clip.m = m[n]; dst = temp(vpc); break; @@ -537,10 +535,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, mask = m[n]; break; default: - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - src[i] = nv40_sr_swz(src[i], - c[n], c[n], c[n], c[n]); - } + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) + src[i] = swz(src[i], X, X, X, X); mask = m[n]; break; } @@ -645,8 +641,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, if (clip.dst.type != NV40SR_NONE) { arith(vpc, 0, OP_MOV, clip.dst, clip.m, - nv40_sr_swz(dst, clip.c, clip.c, clip.c, clip.c), - none, none); + swz(dst, X, X, X, X), none, none); } release_temps(vpc); -- cgit v1.2.3 From baab98a637d526871fb77ec6f313012f49c0e998 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 31 Mar 2008 09:02:08 +0900 Subject: gallium: Eliminate p_winsys::printf Not convenient and almost not used at all. Better replacements in p_debug.h --- src/gallium/drivers/i915simple/i915_debug.c | 19 +++++++------------ src/gallium/drivers/i915simple/i915_debug.h | 4 +--- src/gallium/drivers/i915simple/i915_debug_fp.c | 4 +--- src/gallium/drivers/i915simple/i915_screen.c | 5 ++--- src/gallium/drivers/i965simple/brw_context.c | 5 ++--- src/gallium/drivers/i965simple/brw_context.h | 6 +++--- src/gallium/include/pipe/p_winsys.h | 4 ---- src/gallium/winsys/dri/intel/intel_winsys_pipe.c | 10 ---------- src/gallium/winsys/xlib/xm_winsys.c | 11 ----------- src/gallium/winsys/xlib/xm_winsys_aub.c | 10 ---------- src/mesa/drivers/x11/xm_winsys.c | 10 ---------- 11 files changed, 16 insertions(+), 72 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index 78102dbac26..9b9111167f8 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -30,6 +30,7 @@ #include "i915_winsys.h" #include "i915_debug.h" #include "pipe/p_winsys.h" +#include "pipe/p_debug.h" static void @@ -39,11 +40,9 @@ PRINTF( ... ) { va_list args; - char buffer[256]; va_start( args, fmt ); - vsprintf( buffer, fmt, args ); - stream->winsys->printf( stream->winsys, buffer ); + debug_vprintf( fmt, args ); va_end( args ); } @@ -200,14 +199,12 @@ BITS( ... ) { va_list args; - char buffer[256]; unsigned himask = ~0UL >> (31 - (hi)); PRINTF(stream, "\t\t "); va_start( args, fmt ); - vsprintf( buffer, fmt, args ); - stream->winsys->printf( stream->winsys, buffer ); + debug_vprintf( fmt, args ); va_end( args ); PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); @@ -231,13 +228,11 @@ FLAG( { if (((dw) >> (bit)) & 1) { va_list args; - char buffer[256]; PRINTF(stream, "\t\t "); va_start( args, fmt ); - vsprintf( buffer, fmt, args ); - stream->winsys->printf( stream->winsys, buffer ); + debug_vprintf( fmt, args ); va_end( args ); PRINTF(stream, "\n"); @@ -877,11 +872,11 @@ i915_dump_batchbuffer( struct i915_context *i915 ) stream.winsys = i915->pipe.winsys; if (!start || !end) { - stream.winsys->printf( stream.winsys, "\n\nBATCH: ???\n"); + debug_printf( "\n\nBATCH: ???\n"); return; } - stream.winsys->printf( stream.winsys, "\n\nBATCH: (%d)\n", bytes / 4); + debug_printf( "\n\nBATCH: (%d)\n", bytes / 4); while (!done && stream.offset < bytes) @@ -893,7 +888,7 @@ i915_dump_batchbuffer( struct i915_context *i915 ) stream.offset >= 0); } - stream.winsys->printf( stream.winsys, "END-BATCH\n\n\n"); + debug_printf( "END-BATCH\n\n\n"); } diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h index 0bcd0942334..afb63edabf7 100644 --- a/src/gallium/drivers/i915simple/i915_debug.h +++ b/src/gallium/drivers/i915simple/i915_debug.h @@ -83,11 +83,9 @@ I915_DBG( { if ((i915)->debug & FILE_DEBUG_FLAG) { va_list args; - char buffer[256]; va_start( args, fmt ); - vsprintf( buffer, fmt, args ); - i915->pipe.winsys->printf( i915->pipe.winsys, buffer ); + debug_vprintf( fmt, args ); va_end( args ); } } diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c index ebfdb3d93c5..37a3508fe14 100644 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -39,11 +39,9 @@ PRINTF( ... ) { va_list args; - char buffer[256]; va_start( args, fmt ); - vsprintf( buffer, fmt, args ); - stream->winsys->printf( stream->winsys, buffer ); + debug_vprintf( fmt, args ); va_end( args ); } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 8d7bf0b33e0..839b98c0cec 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -226,9 +226,8 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id) break; default: - winsys->printf(winsys, - "%s: unknown pci id 0x%x, cannot create screen\n", - __FUNCTION__, pci_id); + debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); return NULL; } diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 7c908da6726..a276cc0535d 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -77,9 +77,8 @@ struct pipe_context *brw_create(struct pipe_screen *screen, { struct brw_context *brw; - screen->winsys->printf(screen->winsys, - "%s: creating brw_context with pci id 0x%x\n", - __FUNCTION__, pci_id); + debug_printf("%s: creating brw_context with pci id 0x%x\n", + __FUNCTION__, pci_id); brw = CALLOC_STRUCT(brw_context); if (brw == NULL) diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 0c96ba17327..eeccf367856 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -183,12 +183,12 @@ extern int BRW_DEBUG; #define DEBUG_MIPTREE 0x800000 #define DBG(...) do { \ - if (BRW_DEBUG & FILE_DEBUG_FLAG) \ - brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + debug_printf(__VA_ARGS__); \ } while(0) #define PRINT(...) do { \ - brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ + debug_printf(brw->pipe.winsys, __VA_ARGS__); \ } while(0) struct brw_state_flags { diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 1383bd0544a..8569cdcf123 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -73,10 +73,6 @@ struct pipe_winsys struct pipe_surface *surf, void *context_private ); - /** Debug output */ - void (*printf)( struct pipe_winsys *sws, - const char *, ... ); - /** allocate a new surface (no context dependency) */ struct pipe_surface *(*surface_alloc)(struct pipe_winsys *ws); diff --git a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c index 789a386500a..77dec9488df 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c @@ -243,15 +243,6 @@ intel_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -static void -intel_printf( struct pipe_winsys *winsys, const char *fmtString, ... ) -{ - va_list args; - va_start( args, fmtString ); - vfprintf(stderr, fmtString, args); - va_end( args ); -} - static const char * intel_get_name( struct pipe_winsys *winsys ) { @@ -277,7 +268,6 @@ intel_create_pipe_winsys( int fd ) iws->winsys.buffer_unmap = intel_buffer_unmap; iws->winsys.buffer_destroy = intel_buffer_destroy; iws->winsys.flush_frontbuffer = intel_flush_frontbuffer; - iws->winsys.printf = intel_printf; iws->winsys.get_name = intel_get_name; iws->winsys.surface_alloc = intel_i915_surface_alloc; iws->winsys.surface_alloc_storage = intel_i915_surface_alloc_storage; diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 74597562798..9a20bdfb699 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -303,16 +303,6 @@ xm_flush_frontbuffer(struct pipe_winsys *pws, -static void -xm_printf(struct pipe_winsys *pws, const char *fmtString, ...) -{ - va_list args; - va_start( args, fmtString ); - vfprintf(stderr, fmtString, args); - va_end( args ); -} - - static const char * xm_get_name(struct pipe_winsys *pws) { @@ -635,7 +625,6 @@ xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis) ws->base.fence_finish = xm_fence_finish; ws->base.flush_frontbuffer = xm_flush_frontbuffer; - ws->base.printf = xm_printf; ws->base.get_name = xm_get_name; } diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index d55d8c39eb5..f42f7fcc5f1 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -311,15 +311,6 @@ aub_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -static void -aub_printf( struct pipe_winsys *winsys, const char *fmtString, ... ) -{ - va_list args; - va_start( args, fmtString ); - vfprintf(stderr, fmtString, args); - va_end( args ); -} - static const char * aub_get_name( struct pipe_winsys *winsys ) { @@ -344,7 +335,6 @@ xmesa_create_pipe_winsys_aub( void ) iws->winsys.buffer_unmap = aub_buffer_unmap; iws->winsys.buffer_destroy = aub_buffer_destroy; iws->winsys.flush_frontbuffer = aub_flush_frontbuffer; - iws->winsys.printf = aub_printf; iws->winsys.get_name = aub_get_name; iws->winsys.surface_alloc = aub_i915_surface_alloc; diff --git a/src/mesa/drivers/x11/xm_winsys.c b/src/mesa/drivers/x11/xm_winsys.c index 2edc6976933..eab9fd38521 100644 --- a/src/mesa/drivers/x11/xm_winsys.c +++ b/src/mesa/drivers/x11/xm_winsys.c @@ -193,15 +193,6 @@ xm_wait_idle(struct pipe_winsys *pws) /* no-op */ } -static void -xm_printf(struct pipe_winsys *pws, const char *fmtString, ...) -{ - va_list args; - va_start( args, fmtString ); - vfprintf(stderr, fmtString, args); - va_end( args ); -} - static const char * xm_get_name(struct pipe_winsys *pws) { @@ -353,7 +344,6 @@ xmesa_create_pipe_winsys( XMesaContext xmesa ) xws->winsys.flush_frontbuffer = xm_flush_frontbuffer; xws->winsys.wait_idle = xm_wait_idle; - xws->winsys.printf = xm_printf; xws->winsys.get_name = xm_get_name; xws->xmesa = xmesa; -- cgit v1.2.3 From ab8bcc4ec626be2d09bcdbaba2d1030b8dac7e25 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 31 Mar 2008 16:35:13 -0600 Subject: cell: implement logicop/output for PIPE_FORMAT_B8G8R8A8_UNORM Remote display to my usual terminal shows the right colors again. Not 100% sure about the shuffle control words, but they seem to work. --- .../drivers/cell/ppu/cell_state_per_fragment.c | 36 ++++++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index f10025bd7c7..0a79cccc83b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -1164,7 +1164,7 @@ int PC_OFFSET(const struct spe_function *f, const void *d) * masking. * * \bug - * This routine is hard-coded to only work with ARGB8 data. + * Only two framebuffer formats are supported at this time. */ void cell_generate_logic_op(struct spe_function *f, struct pipe_blend_state *blend, @@ -1235,15 +1235,31 @@ cell_generate_logic_op(struct spe_function *f, struct pipe_blend_state *blend, /* Convert fragment colors to framebuffer format in AoS layout. */ - data[0] = 0x00010203; - data[1] = 0x10111213; - data[2] = 0x04050607; - data[3] = 0x14151617; - - data[4] = 0x0c000408; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; + switch (surf->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + data[0] = 0x00010203; + data[1] = 0x10111213; + data[2] = 0x04050607; + data[3] = 0x14151617; + data[4] = 0x0c000408; + data[5] = 0x80808080; + data[6] = 0x80808080; + data[7] = 0x80808080; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + data[0] = 0x03020100; + data[1] = 0x13121110; + data[2] = 0x07060504; + data[3] = 0x17161514; + data[4] = 0x0804000c; + data[5] = 0x80808080; + data[6] = 0x80808080; + data[7] = 0x80808080; + break; + default: + fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); + ASSERT(0); + } spe_ilh(f, tmp[0], 0x0808); spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); -- cgit v1.2.3 From 58b6690cf84147f88ea2ba95d2a929089e93b57f Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 31 Mar 2008 16:44:56 -0600 Subject: cell: updated comments: s/test/SPE/ --- src/gallium/drivers/cell/common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index b0928fefd2e..c9e873b35c3 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -109,7 +109,7 @@ */ struct cell_command_depth_stencil_alpha_test { uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of test code. */ + unsigned size; /**< Size in bytes of SPE code. */ unsigned read_depth; /**< Flag: should depth be read? */ unsigned read_stencil; /**< Flag: should stencil be read? */ }; @@ -120,14 +120,14 @@ struct cell_command_depth_stencil_alpha_test { */ struct cell_command_blend { uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of test code. */ + unsigned size; /**< Size in bytes of SPE code. */ unsigned read_fb; /**< Flag: should framebuffer be read? */ }; struct cell_command_logicop { uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of test code. */ + unsigned size; /**< Size in bytes of SPE code. */ }; -- cgit v1.2.3 From 84c2821d2a3b0252d6ccdfc88c6acd8f72134ebf Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 31 Mar 2008 16:54:31 -0600 Subject: cell: added const qualifier --- src/gallium/drivers/cell/ppu/cell_state_per_fragment.c | 3 ++- src/gallium/drivers/cell/ppu/cell_state_per_fragment.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 0a79cccc83b..53ae3aa50e7 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -1167,7 +1167,8 @@ int PC_OFFSET(const struct spe_function *f, const void *d) * Only two framebuffer formats are supported at this time. */ void -cell_generate_logic_op(struct spe_function *f, struct pipe_blend_state *blend, +cell_generate_logic_op(struct spe_function *f, + const struct pipe_blend_state *blend, struct pipe_surface *surf) { const unsigned logic_op = (blend->logicop_enable) diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h index ab4de96c69d..a8267a51331 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -32,7 +32,8 @@ extern void cell_generate_alpha_blend(struct cell_blend_state *cb); extern void -cell_generate_logic_op(struct spe_function *f, struct pipe_blend_state *blend, - struct pipe_surface *surf); +cell_generate_logic_op(struct spe_function *f, + const struct pipe_blend_state *blend, + struct pipe_surface *surf); #endif /* CELL_STATE_PER_FRAGMENT_H */ -- cgit v1.2.3 From 14452aee73e16f2ede075cf894e69d62cc539f5e Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 31 Mar 2008 17:38:21 -0600 Subject: cell: initial work to support multi-texture --- src/gallium/drivers/cell/common.h | 8 +++++-- src/gallium/drivers/cell/ppu/cell_state_emit.c | 18 +++++++--------- src/gallium/drivers/cell/spu/spu_main.c | 27 ++++++++++++++++-------- src/gallium/drivers/cell/spu/spu_main.h | 8 +++---- src/gallium/drivers/cell/spu/spu_texture.c | 29 ++++++++++++++++---------- src/gallium/drivers/cell/spu/spu_tri.c | 2 +- 6 files changed, 55 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index c9e873b35c3..298812fc201 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -66,6 +66,8 @@ #define CELL_MAX_SPUS 6 +#define CELL_MAX_SAMPLERS 4 + #define TILE_SIZE 32 @@ -228,8 +230,10 @@ struct cell_command_release_verts struct cell_command_texture { - void *start; /**< Address in main memory */ - uint width, height; + struct { + void *start; /**< Address in main memory */ + ushort width, height; + } texture[CELL_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 4c75caa0251..4fbe1a21b8d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -129,17 +129,15 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & CELL_NEW_TEXTURE) { struct cell_command_texture texture; - if (cell->texture[0]) { - texture.start = cell->texture[0]->tiled_data; - texture.width = cell->texture[0]->base.width[0]; - texture.height = cell->texture[0]->base.height[0]; + uint i; + memset(&texture, 0, sizeof(texture)); + for (i = 0;i < CELL_MAX_SAMPLERS; i++) { + if (cell->texture[i]) { + texture.texture[i].start = cell->texture[i]->tiled_data; + texture.texture[i].width = cell->texture[i]->base.width[0]; + texture.texture[i].height = cell->texture[i]->base.height[0]; + } } - else { - texture.start = NULL; - texture.width = 0; - texture.height = 0; - } - emit_state_cmd(cell, CELL_CMD_STATE_TEXTURE, &texture, sizeof(struct cell_command_texture)); } diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index d7f46f80246..80fa5f78596 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -329,17 +329,26 @@ cmd_state_sampler(const struct pipe_sampler_state *state) static void cmd_state_texture(const struct cell_command_texture *texture) { - if (Debug) - printf("SPU %u: TEXTURE at %p size %u x %u\n", - spu.init.id, texture->start, texture->width, texture->height); + uint i; + + if (1||Debug) { + printf("SPU %u: TEXTURE\n", spu.init.id); + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + printf(" %d: at %p size %u x %u\n", i, texture->texture[i].start, + texture->texture[i].width, texture->texture[i].height); + } + } memcpy(&spu.texture, texture, sizeof(*texture)); - spu.tex_size = (vector float) - { spu.texture.width, spu.texture.height, 0.0, 0.0}; - spu.tex_size_mask = (vector unsigned int) - { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; - spu.tex_size_x_mask = spu_splats(spu.texture.width - 1); - spu.tex_size_y_mask = spu_splats(spu.texture.height - 1); + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + const uint width = texture->texture[i].width; + const uint height = texture->texture[i].height; + spu.tex_size[i] = (vector float) { width, height, 0.0, 0.0}; + spu.tex_size_mask[i] = (vector unsigned int) + { width - 1, height - 1, 0, 0 }; + spu.tex_size_x_mask[i] = spu_splats(width - 1); + spu.tex_size_y_mask[i] = spu_splats(height - 1); + } } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index c20452931a9..8a877875377 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -141,10 +141,10 @@ struct spu_global /** for converting RGBA to PIPE_FORMAT_x colors */ vector unsigned char color_shuffle; - vector float tex_size; - vector unsigned int tex_size_mask; /**< == int(size - 1) */ - vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ - vector unsigned int tex_size_y_mask; /**< == int(size - 1) */ + vector float tex_size[CELL_MAX_SAMPLERS]; + vector unsigned int tex_size_mask[CELL_MAX_SAMPLERS]; /**< == int(size - 1) */ + vector unsigned int tex_size_x_mask[CELL_MAX_SAMPLERS]; /**< == int(size - 1) */ + vector unsigned int tex_size_y_mask[CELL_MAX_SAMPLERS]; /**< == int(size - 1) */ vector float (*sample_texture)(vector float texcoord); diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 67eb08196a4..91a6aec5ec2 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -40,25 +40,29 @@ void invalidate_tex_cache(void) { - spu_dcache_mark_dirty((unsigned) spu.texture.start, - 4 * spu.texture.width * spu.texture.height); + uint unit = 0; + uint bytes = 4 * spu.texture.texture[unit].width + * spu.texture.texture[unit].height; + + spu_dcache_mark_dirty((unsigned) spu.texture.texture[unit].start, bytes); } static uint get_texel(vec_uint4 coordinate) { + const uint unit = 0; vec_uint4 tmp; unsigned x = spu_extract(coordinate, 0); unsigned y = spu_extract(coordinate, 1); - const unsigned tiles_per_row = spu.texture.width / TILE_SIZE; + const unsigned tiles_per_row = spu.texture.texture[unit].width / TILE_SIZE; unsigned tile_offset = sizeof(tile_t) * ((y / TILE_SIZE * tiles_per_row) + (x / TILE_SIZE)); unsigned texel_offset = 4 * (((y % TILE_SIZE) * TILE_SIZE) + (x % TILE_SIZE)); spu_dcache_fetch_unaligned((qword *) & tmp, - spu.texture.start + tile_offset + texel_offset, + spu.texture.texture[unit].start + tile_offset + texel_offset, 4); return spu_extract(tmp, 0); } @@ -67,13 +71,14 @@ get_texel(vec_uint4 coordinate) static void get_four_texels(vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { - const unsigned texture_ea = (uintptr_t) spu.texture.start; + const uint unit = 0; + const unsigned texture_ea = (uintptr_t) spu.texture.texture[unit].start; vec_uint4 tile_x = spu_rlmask(x, -5); vec_uint4 tile_y = spu_rlmask(y, -5); const qword offset_x = si_andi((qword) x, 0x1f); const qword offset_y = si_andi((qword) y, 0x1f); - const qword tiles_per_row = (qword) spu_splats(spu.texture.width / TILE_SIZE); + const qword tiles_per_row = (qword) spu_splats(spu.texture.texture[unit].width / TILE_SIZE); const qword tile_size = (qword) spu_splats(sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); @@ -101,9 +106,10 @@ get_four_texels(vec_uint4 x, vec_uint4 y, vec_uint4 *texels) vector float sample_texture_nearest(vector float texcoord) { - vector float tc = spu_mul(texcoord, spu.tex_size); + const uint unit = 0; + vector float tc = spu_mul(texcoord, spu.tex_size[unit]); vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ - itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ + itc = spu_and(itc, spu.tex_size_mask[unit]); /* mask (GL_REPEAT) */ uint texel = get_texel(itc); return spu_unpack_A8R8G8B8(texel); } @@ -112,10 +118,11 @@ sample_texture_nearest(vector float texcoord) vector float sample_texture_bilinear(vector float texcoord) { + const uint unit = 0; static const vec_uint4 offset_x = {0, 0, 1, 1}; static const vec_uint4 offset_y = {0, 1, 0, 1}; - vector float tc = spu_mul(texcoord, spu.tex_size); + vector float tc = spu_mul(texcoord, spu.tex_size[unit]); tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ /* integer texcoords S,T: */ @@ -129,8 +136,8 @@ sample_texture_bilinear(vector float texcoord) x = spu_add(x, offset_x); y = spu_add(y, offset_y); - x = spu_and(x, spu.tex_size_x_mask); - y = spu_and(y, spu.tex_size_y_mask); + x = spu_and(x, spu.tex_size_x_mask[unit]); + y = spu_and(y, spu.tex_size_y_mask[unit]); get_four_texels(x, y, texels); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 95c629a8aae..9f63317b1f1 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -309,7 +309,7 @@ emit_quad( int x, int y, mask_t mask ) spu.cur_ctile_status = TILE_STATUS_DIRTY; - if (spu.texture.start) { + if (spu.texture.texture[0].start) { /* texture mapping */ vector float texcoords[4]; eval_coeff(2, (float) x, (float) y, texcoords); -- cgit v1.2.3 From 5553a3b6757f0baaabbd67dd2f86d834d2f291ca Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 31 Mar 2008 20:36:53 -0600 Subject: cell: set cell->num_textures in cell_set_sampler_textures() --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 00f4be7401e..52c31260505 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -273,6 +273,7 @@ cell_set_sampler_textures(struct pipe_context *pipe, pipe_texture_reference((struct pipe_texture **) &cell->texture[i], tex); } + cell->num_textures = num; cell_update_texture_mapping(cell); -- cgit v1.2.3 From e6c981f22c0b6469ef44e9d7a34113db34647fef Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 31 Mar 2008 21:09:02 -0600 Subject: cell: more work for multi-texture support --- src/gallium/drivers/cell/common.h | 17 ++++++-- src/gallium/drivers/cell/ppu/cell_state_emit.c | 31 ++++++++++----- src/gallium/drivers/cell/spu/spu_main.c | 55 +++++++++++++++----------- src/gallium/drivers/cell/spu/spu_main.h | 18 ++++++--- src/gallium/drivers/cell/spu/spu_texture.c | 24 +++++------ src/gallium/drivers/cell/spu/spu_tri.c | 2 +- 6 files changed, 90 insertions(+), 57 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 298812fc201..f430e88b9c1 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -36,6 +36,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "pipe/p_format.h" +#include "pipe/p_state.h" /** The standard assert macro doesn't seem to work reliably */ @@ -228,12 +229,20 @@ struct cell_command_release_verts }; +struct cell_command_sampler +{ + uint64_t opcode; /**< CELL_CMD_STATE_SAMPLER */ + uint unit; + struct pipe_sampler_state state; +}; + + struct cell_command_texture { - struct { - void *start; /**< Address in main memory */ - ushort width, height; - } texture[CELL_MAX_SAMPLERS]; + uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */ + uint unit; + void *start; /**< Address in main memory */ + ushort width, height; }; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 4fbe1a21b8d..9cae67f0912 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -121,25 +121,36 @@ cell_emit_state(struct cell_context *cell) } if (cell->dirty & CELL_NEW_SAMPLER) { - if (cell->sampler[0]) { - emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, - cell->sampler[0], sizeof(struct pipe_sampler_state)); + uint i; + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + if (cell->sampler[i]) { + struct cell_command_sampler *sampler + = cell_batch_alloc(cell, sizeof(*sampler)); + sampler->opcode = CELL_CMD_STATE_SAMPLER; + sampler->unit = i; + sampler->state = *cell->sampler[i]; + } } } if (cell->dirty & CELL_NEW_TEXTURE) { - struct cell_command_texture texture; uint i; - memset(&texture, 0, sizeof(texture)); for (i = 0;i < CELL_MAX_SAMPLERS; i++) { + struct cell_command_texture *texture + = cell_batch_alloc(cell, sizeof(*texture)); + texture->opcode = CELL_CMD_STATE_TEXTURE; + texture->unit = i; if (cell->texture[i]) { - texture.texture[i].start = cell->texture[i]->tiled_data; - texture.texture[i].width = cell->texture[i]->base.width[0]; - texture.texture[i].height = cell->texture[i]->base.height[0]; + texture->start = cell->texture[i]->tiled_data; + texture->width = cell->texture[i]->base.width[0]; + texture->height = cell->texture[i]->base.height[0]; + } + else { + texture->start = NULL; + texture->width = 1; + texture->height = 1; } } - emit_state_cmd(cell, CELL_CMD_STATE_TEXTURE, - &texture, sizeof(struct cell_command_texture)); } if (cell->dirty & CELL_NEW_VERTEX_INFO) { diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 80fa5f78596..7f0473d198c 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -312,13 +312,13 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat static void -cmd_state_sampler(const struct pipe_sampler_state *state) +cmd_state_sampler(const struct cell_command_sampler *sampler) { if (Debug) - printf("SPU %u: SAMPLER\n", - spu.init.id); + printf("SPU %u: SAMPLER [%u]\n", + spu.init.id, sampler->unit); - memcpy(&spu.sampler[0], state, sizeof(*state)); + spu.sampler[sampler->unit] = sampler->state; if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) spu.sample_texture = sample_texture_bilinear; else @@ -329,26 +329,25 @@ cmd_state_sampler(const struct pipe_sampler_state *state) static void cmd_state_texture(const struct cell_command_texture *texture) { - uint i; + const uint unit = texture->unit; + const uint width = texture->width; + const uint height = texture->height; - if (1||Debug) { - printf("SPU %u: TEXTURE\n", spu.init.id); - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - printf(" %d: at %p size %u x %u\n", i, texture->texture[i].start, - texture->texture[i].width, texture->texture[i].height); - } + if (Debug) { + printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id, + texture->unit, texture->start, + texture->width, texture->height); } - memcpy(&spu.texture, texture, sizeof(*texture)); - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - const uint width = texture->texture[i].width; - const uint height = texture->texture[i].height; - spu.tex_size[i] = (vector float) { width, height, 0.0, 0.0}; - spu.tex_size_mask[i] = (vector unsigned int) + spu.texture[unit].start = texture->start; + spu.texture[unit].width = width; + spu.texture[unit].height = height; + + spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; + spu.texture[unit].tex_size_mask = (vector unsigned int) { width - 1, height - 1, 0, 0 }; - spu.tex_size_x_mask[i] = spu_splats(width - 1); - spu.tex_size_y_mask[i] = spu_splats(height - 1); - } + spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); + spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); } @@ -480,12 +479,20 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8); break; case CELL_CMD_STATE_SAMPLER: - cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8); + { + struct cell_command_sampler *sampler + = (struct cell_command_sampler *) &buffer[pos]; + cmd_state_sampler(sampler); + pos += sizeof(*sampler) / 8; + } break; case CELL_CMD_STATE_TEXTURE: - cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8); + { + struct cell_command_texture *texture + = (struct cell_command_texture *) &buffer[pos]; + cmd_state_texture(texture); + pos += sizeof(*texture) / 8; + } break; case CELL_CMD_STATE_VERTEX_INFO: cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 8a877875377..2bfad3535a1 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -100,6 +100,17 @@ struct spu_framebuffer { } ALIGN16_ATTRIB; +struct spu_texture +{ + void *start; + uint width, height; + vector float tex_size; + vector unsigned int tex_size_mask; /**< == int(size - 1) */ + vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ + vector unsigned int tex_size_y_mask; /**< == int(size - 1) */ +} ALIGN16_ATTRIB; + + /** * All SPU global/context state will be in singleton object of this type: */ @@ -119,7 +130,7 @@ struct spu_global logicop_func logicop; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; - struct cell_command_texture texture; + struct spu_texture texture[PIPE_MAX_SAMPLERS]; struct vertex_info vertex_info; @@ -141,11 +152,6 @@ struct spu_global /** for converting RGBA to PIPE_FORMAT_x colors */ vector unsigned char color_shuffle; - vector float tex_size[CELL_MAX_SAMPLERS]; - vector unsigned int tex_size_mask[CELL_MAX_SAMPLERS]; /**< == int(size - 1) */ - vector unsigned int tex_size_x_mask[CELL_MAX_SAMPLERS]; /**< == int(size - 1) */ - vector unsigned int tex_size_y_mask[CELL_MAX_SAMPLERS]; /**< == int(size - 1) */ - vector float (*sample_texture)(vector float texcoord); } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 91a6aec5ec2..4612501eb3b 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -41,10 +41,10 @@ void invalidate_tex_cache(void) { uint unit = 0; - uint bytes = 4 * spu.texture.texture[unit].width - * spu.texture.texture[unit].height; + uint bytes = 4 * spu.texture[unit].width + * spu.texture[unit].height; - spu_dcache_mark_dirty((unsigned) spu.texture.texture[unit].start, bytes); + spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes); } @@ -55,14 +55,14 @@ get_texel(vec_uint4 coordinate) vec_uint4 tmp; unsigned x = spu_extract(coordinate, 0); unsigned y = spu_extract(coordinate, 1); - const unsigned tiles_per_row = spu.texture.texture[unit].width / TILE_SIZE; + const unsigned tiles_per_row = spu.texture[unit].width / TILE_SIZE; unsigned tile_offset = sizeof(tile_t) * ((y / TILE_SIZE * tiles_per_row) + (x / TILE_SIZE)); unsigned texel_offset = 4 * (((y % TILE_SIZE) * TILE_SIZE) + (x % TILE_SIZE)); spu_dcache_fetch_unaligned((qword *) & tmp, - spu.texture.texture[unit].start + tile_offset + texel_offset, + spu.texture[unit].start + tile_offset + texel_offset, 4); return spu_extract(tmp, 0); } @@ -72,13 +72,13 @@ static void get_four_texels(vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { const uint unit = 0; - const unsigned texture_ea = (uintptr_t) spu.texture.texture[unit].start; + const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; vec_uint4 tile_x = spu_rlmask(x, -5); vec_uint4 tile_y = spu_rlmask(y, -5); const qword offset_x = si_andi((qword) x, 0x1f); const qword offset_y = si_andi((qword) y, 0x1f); - const qword tiles_per_row = (qword) spu_splats(spu.texture.texture[unit].width / TILE_SIZE); + const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].width / TILE_SIZE); const qword tile_size = (qword) spu_splats(sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); @@ -107,9 +107,9 @@ vector float sample_texture_nearest(vector float texcoord) { const uint unit = 0; - vector float tc = spu_mul(texcoord, spu.tex_size[unit]); + vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ - itc = spu_and(itc, spu.tex_size_mask[unit]); /* mask (GL_REPEAT) */ + itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */ uint texel = get_texel(itc); return spu_unpack_A8R8G8B8(texel); } @@ -122,7 +122,7 @@ sample_texture_bilinear(vector float texcoord) static const vec_uint4 offset_x = {0, 0, 1, 1}; static const vec_uint4 offset_y = {0, 1, 0, 1}; - vector float tc = spu_mul(texcoord, spu.tex_size[unit]); + vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ /* integer texcoords S,T: */ @@ -136,8 +136,8 @@ sample_texture_bilinear(vector float texcoord) x = spu_add(x, offset_x); y = spu_add(y, offset_y); - x = spu_and(x, spu.tex_size_x_mask[unit]); - y = spu_and(y, spu.tex_size_y_mask[unit]); + x = spu_and(x, spu.texture[unit].tex_size_x_mask); + y = spu_and(y, spu.texture[unit].tex_size_y_mask); get_four_texels(x, y, texels); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 9f63317b1f1..17e337bbdf6 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -309,7 +309,7 @@ emit_quad( int x, int y, mask_t mask ) spu.cur_ctile_status = TILE_STATUS_DIRTY; - if (spu.texture.texture[0].start) { + if (spu.texture[0].start) { /* texture mapping */ vector float texcoords[4]; eval_coeff(2, (float) x, (float) y, texcoords); -- cgit v1.2.3 From d83e0c45bec8d08c249088f9e8575505355fe595 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 31 Mar 2008 21:15:57 -0600 Subject: cell: update some of the CAP, texformat queries --- src/gallium/drivers/cell/ppu/cell_screen.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 124670df255..84b48bf4f1b 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -55,11 +55,11 @@ cell_get_param(struct pipe_screen *screen, int param) { switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; + return PIPE_MAX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: - return 1; + return 0; case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; + return 0; case PIPE_CAP_GLSL: return 1; case PIPE_CAP_S3TC: @@ -67,13 +67,13 @@ cell_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: - return 1; + return 0; case PIPE_CAP_MAX_RENDER_TARGETS: return 1; case PIPE_CAP_OCCLUSION_QUERY: - return 1; + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; + return 0; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 12; /* max 2Kx2K */ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: @@ -118,8 +118,12 @@ cell_is_format_supported( struct pipe_screen *screen, { switch (type) { case PIPE_TEXTURE: - /* cell supports all texture formats, XXX for now anyway */ - return TRUE; + /* cell supports most texture formats, XXX for now anyway */ + if (format == PIPE_FORMAT_DXT5_RGBA || + format == PIPE_FORMAT_R8G8B8A8_SRGB) + return FALSE; + else + return TRUE; case PIPE_SURFACE: /* cell supports all (off-screen) surface formats, XXX for now */ return TRUE; -- cgit v1.2.3 From f8c09464f801e97b24ccdb1ba70444c60d4235bd Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 1 Apr 2008 11:05:32 -0600 Subject: cell: enable #define CACHE_STATS to print a cache report upon exit --- src/gallium/drivers/cell/spu/spu_dcache.c | 18 ++++++++++++++++++ src/gallium/drivers/cell/spu/spu_dcache.h | 3 +++ src/gallium/drivers/cell/spu/spu_main.c | 2 ++ 3 files changed, 23 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index a1701d80d18..167404cdc54 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -36,6 +36,7 @@ #define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) #define CACHE_LOG2NNWAY 2 #define CACHE_LOG2NSETS 6 +/*#define CACHE_STATS 1*/ #include <cache-api.h> /* Yes folks, this is ugly. @@ -123,3 +124,20 @@ spu_dcache_mark_dirty(unsigned ea, unsigned size) ? (entry & ~CACHELINE_VALID) : entry; } } + + +/** + * Print cache utilization report + */ +void +spu_dcache_report(void) +{ +#ifdef CACHE_STATS + if (spu.init.id == 0) { + printf("SPU 0: Texture cache report:\n"); + cache_pr_stats(data); + } +#endif +} + + diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h index 7a06b8c25af..39a19eb31b5 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.h +++ b/src/gallium/drivers/cell/spu/spu_dcache.h @@ -31,4 +31,7 @@ spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); extern void spu_dcache_mark_dirty(unsigned ea, unsigned size); +extern void +spu_dcache_report(void); + #endif /* SPU_DCACHE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 7f0473d198c..5b5a570a3cd 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -629,6 +629,8 @@ main_loop(void) if (Debug) printf("SPU %u: Exit main loop\n", spu.init.id); + + spu_dcache_report(); } -- cgit v1.2.3 From 6ddd2df1aec329be494d342dbd88a9f5af5e7b2c Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 1 Apr 2008 11:28:27 -0600 Subject: cell: return CELL_MAX_SAMPLERS to indicate number of texture units --- src/gallium/drivers/cell/ppu/cell_screen.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 84b48bf4f1b..5198b51441a 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "cell/common.h" #include "cell_screen.h" #include "cell_texture.h" #include "cell_winsys.h" @@ -55,7 +56,7 @@ cell_get_param(struct pipe_screen *screen, int param) { switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return PIPE_MAX_SAMPLERS; + return CELL_MAX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 0; case PIPE_CAP_TWO_SIDED_STENCIL: -- cgit v1.2.3 From c14da8f52407529f20f819e31a01356535de0117 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 1 Apr 2008 11:30:17 -0600 Subject: cell: assert num samplers/textures <= CELL_MAX_SAMPLERS --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 52c31260505..67b87f16d7f 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -232,12 +232,12 @@ cell_bind_sampler_states(struct pipe_context *pipe, { struct cell_context *cell = cell_context(pipe); - draw_flush(cell->draw); + assert(num <= CELL_MAX_SAMPLERS); - assert(unit < PIPE_MAX_SAMPLERS); + draw_flush(cell->draw); memcpy(cell->sampler, samplers, num * sizeof(void *)); - memset(&cell->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * + memset(&cell->sampler[num], 0, (CELL_MAX_SAMPLERS - num) * sizeof(void *)); cell->num_samplers = num; @@ -261,6 +261,8 @@ cell_set_sampler_textures(struct pipe_context *pipe, struct cell_context *cell = cell_context(pipe); uint i; + assert(num <= CELL_MAX_SAMPLERS); + /* Check for no-op */ if (num == cell->num_textures && !memcmp(cell->texture, texture, num * sizeof(struct pipe_texture *))) @@ -268,7 +270,7 @@ cell_set_sampler_textures(struct pipe_context *pipe, draw_flush(cell->draw); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { struct pipe_texture *tex = i < num ? texture[i] : NULL; pipe_texture_reference((struct pipe_texture **) &cell->texture[i], tex); -- cgit v1.2.3 From e7b23d36df1ab3ac5b54ef8e4e56c4fd46db8257 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 1 Apr 2008 11:35:53 -0600 Subject: cell: checkpoint: more multi-texture work --- src/gallium/drivers/cell/ppu/cell_texture.c | 8 +++++-- src/gallium/drivers/cell/spu/spu_main.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 2 +- src/gallium/drivers/cell/spu/spu_texture.c | 6 ++--- src/gallium/drivers/cell/spu/spu_texture.h | 4 ++-- src/gallium/drivers/cell/spu/spu_tri.c | 34 +++++++++++++++++++++++++---- 6 files changed, 42 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 9c694e136d8..c59d1f7f231 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -245,9 +245,13 @@ void cell_update_texture_mapping(struct cell_context *cell) { uint face = 0, level = 0, zslice = 0; + uint i; + + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + if (cell->texture[i]) + cell_tile_texture(cell, cell->texture[i]); + } - if (cell->texture[0]) - cell_tile_texture(cell, cell->texture[0]); #if 0 if (cell->tex_surf && cell->tex_map) { pipe_surface_unmap(cell->tex_surf); diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 5b5a570a3cd..a840d01596a 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -333,7 +333,7 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint width = texture->width; const uint height = texture->height; - if (Debug) { + if (1||Debug) { printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id, texture->unit, texture->start, texture->width, texture->height); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 2bfad3535a1..26e050cfc3e 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -152,7 +152,7 @@ struct spu_global /** for converting RGBA to PIPE_FORMAT_x colors */ vector unsigned char color_shuffle; - vector float (*sample_texture)(vector float texcoord); + vector float (*sample_texture)(uint unit, vector float texcoord); } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 4612501eb3b..58a426cc409 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -104,9 +104,8 @@ get_four_texels(vec_uint4 x, vec_uint4 y, vec_uint4 *texels) * XXX this is extremely primitive for now. */ vector float -sample_texture_nearest(vector float texcoord) +sample_texture_nearest(uint unit, vector float texcoord) { - const uint unit = 0; vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */ @@ -116,9 +115,8 @@ sample_texture_nearest(vector float texcoord) vector float -sample_texture_bilinear(vector float texcoord) +sample_texture_bilinear(uint unit, vector float texcoord) { - const uint unit = 0; static const vec_uint4 offset_x = {0, 0, 1, 1}; static const vec_uint4 offset_y = {0, 1, 0, 1}; diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index 95eb87080f1..f7c9738be88 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -37,11 +37,11 @@ invalidate_tex_cache(void); extern vector float -sample_texture_nearest(vector float texcoord); +sample_texture_nearest(uint unit, vector float texcoord); extern vector float -sample_texture_bilinear(vector float texcoord); +sample_texture_bilinear(uint unit, vector float texcoord); #endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 17e337bbdf6..51abcb17577 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -311,17 +311,43 @@ emit_quad( int x, int y, mask_t mask ) if (spu.texture[0].start) { /* texture mapping */ + const uint unit = 0; vector float texcoords[4]; eval_coeff(2, (float) x, (float) y, texcoords); if (spu_extract(mask, 0)) - colors[0] = spu.sample_texture(texcoords[0]); + colors[0] = spu.sample_texture(unit, texcoords[0]); if (spu_extract(mask, 1)) - colors[1] = spu.sample_texture(texcoords[1]); + colors[1] = spu.sample_texture(unit, texcoords[1]); if (spu_extract(mask, 2)) - colors[2] = spu.sample_texture(texcoords[2]); + colors[2] = spu.sample_texture(unit, texcoords[2]); if (spu_extract(mask, 3)) - colors[3] = spu.sample_texture(texcoords[3]); + colors[3] = spu.sample_texture(unit, texcoords[3]); + + + if (spu.texture[1].start) { + /* multi-texture mapping */ + const uint unit = 1; + vector float colors1[4]; + + eval_coeff(3, (float) x, (float) y, texcoords); + + if (spu_extract(mask, 0)) + colors1[0] = spu.sample_texture(unit, texcoords[0]); + if (spu_extract(mask, 1)) + colors1[1] = spu.sample_texture(unit, texcoords[1]); + if (spu_extract(mask, 2)) + colors1[2] = spu.sample_texture(unit, texcoords[2]); + if (spu_extract(mask, 3)) + colors1[3] = spu.sample_texture(unit, texcoords[3]); + + /* hack: modulate first texture by second */ + colors[0] = spu_mul(colors[0], colors1[0]); + colors[1] = spu_mul(colors[1], colors1[1]); + colors[2] = spu_mul(colors[2], colors1[2]); + colors[3] = spu_mul(colors[3], colors1[3]); + } + } else { /* simple shading */ -- cgit v1.2.3 From 2d02ee85093d8068f11ecb286c5f02f52786cd95 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 1 Apr 2008 14:52:04 -0600 Subject: cell: fix bug in texture tiling function (non-square textures work now) --- src/gallium/drivers/cell/ppu/cell_texture.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index c59d1f7f231..07717da8f6a 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -186,15 +186,17 @@ tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) uint it, jt; /* tile counters */ uint i, j; /* intra-tile counters */ + /* loop over dest tiles */ for (it = 0; it < h_t; it++) { for (jt = 0; jt < w_t; jt++) { - /* fill in tile (i, j) */ + /* start of dest tile: */ uint *tdst = dst + (it * w_t + jt) * tile_size2; + /* loop over texels in the tile */ for (i = 0; i < tile_size; i++) { for (j = 0; j < tile_size; j++) { const uint srci = it * tile_size + i; const uint srcj = jt * tile_size + j; - *tdst++ = src[srci * h + srcj]; + *tdst++ = src[srci * w + srcj]; } } } -- cgit v1.2.3 From 9e7b730eb25f08065d718dee4a67c67d1d133b6e Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 1 Apr 2008 14:52:25 -0600 Subject: cell: pass tex unit to get_texel() --- src/gallium/drivers/cell/spu/spu_texture.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 58a426cc409..ba0d57b9595 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -49,9 +49,8 @@ invalidate_tex_cache(void) static uint -get_texel(vec_uint4 coordinate) +get_texel(uint unit, vec_uint4 coordinate) { - const uint unit = 0; vec_uint4 tmp; unsigned x = spu_extract(coordinate, 0); unsigned y = spu_extract(coordinate, 1); @@ -109,7 +108,7 @@ sample_texture_nearest(uint unit, vector float texcoord) vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */ - uint texel = get_texel(itc); + uint texel = get_texel(unit, itc); return spu_unpack_A8R8G8B8(texel); } -- cgit v1.2.3 From 9d1444092fbfd9f975cfb996695f0533a78410f7 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 1 Apr 2008 14:55:31 -0600 Subject: cell: turn off some debug output --- src/gallium/drivers/cell/spu/spu_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index a840d01596a..5b5a570a3cd 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -333,7 +333,7 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint width = texture->width; const uint height = texture->height; - if (1||Debug) { + if (Debug) { printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id, texture->unit, texture->start, texture->width, texture->height); -- cgit v1.2.3 From bccd3f138ccc717fad9073110d15e3321b590476 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 1 Apr 2008 15:42:42 -0600 Subject: cell: more multi-texture fixes (mostly working now) --- src/gallium/drivers/cell/spu/spu_main.c | 6 +++--- src/gallium/drivers/cell/spu/spu_main.h | 4 +++- src/gallium/drivers/cell/spu/spu_texture.c | 5 ++--- src/gallium/drivers/cell/spu/spu_tri.c | 18 +++++++++--------- 4 files changed, 17 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 5b5a570a3cd..1ab1c40379a 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -319,10 +319,10 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) spu.init.id, sampler->unit); spu.sampler[sampler->unit] = sampler->state; - if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) - spu.sample_texture = sample_texture_bilinear; + if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture[sampler->unit] = sample_texture_bilinear; else - spu.sample_texture = sample_texture_nearest; + spu.sample_texture[sampler->unit] = sample_texture_nearest; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 26e050cfc3e..e9e39cbeab3 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -85,6 +85,8 @@ typedef struct spu_blend_results (*logicop_func)( qword frag_mask); +typedef vector float (*sample_texture_func)(uint unit, vector float texcoord); + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -152,7 +154,7 @@ struct spu_global /** for converting RGBA to PIPE_FORMAT_x colors */ vector unsigned char color_shuffle; - vector float (*sample_texture)(uint unit, vector float texcoord); + sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index ba0d57b9595..ceab2469803 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -68,9 +68,8 @@ get_texel(uint unit, vec_uint4 coordinate) static void -get_four_texels(vec_uint4 x, vec_uint4 y, vec_uint4 *texels) +get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { - const uint unit = 0; const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; vec_uint4 tile_x = spu_rlmask(x, -5); vec_uint4 tile_y = spu_rlmask(y, -5); @@ -136,7 +135,7 @@ sample_texture_bilinear(uint unit, vector float texcoord) x = spu_and(x, spu.texture[unit].tex_size_x_mask); y = spu_and(y, spu.texture[unit].tex_size_y_mask); - get_four_texels(x, y, texels); + get_four_texels(unit, x, y, texels); vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 51abcb17577..ab4ff8160a9 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -316,13 +316,13 @@ emit_quad( int x, int y, mask_t mask ) eval_coeff(2, (float) x, (float) y, texcoords); if (spu_extract(mask, 0)) - colors[0] = spu.sample_texture(unit, texcoords[0]); + colors[0] = spu.sample_texture[unit](unit, texcoords[0]); if (spu_extract(mask, 1)) - colors[1] = spu.sample_texture(unit, texcoords[1]); + colors[1] = spu.sample_texture[unit](unit, texcoords[1]); if (spu_extract(mask, 2)) - colors[2] = spu.sample_texture(unit, texcoords[2]); + colors[2] = spu.sample_texture[unit](unit, texcoords[2]); if (spu_extract(mask, 3)) - colors[3] = spu.sample_texture(unit, texcoords[3]); + colors[3] = spu.sample_texture[unit](unit, texcoords[3]); if (spu.texture[1].start) { @@ -330,16 +330,16 @@ emit_quad( int x, int y, mask_t mask ) const uint unit = 1; vector float colors1[4]; - eval_coeff(3, (float) x, (float) y, texcoords); + eval_coeff(2, (float) x, (float) y, texcoords); if (spu_extract(mask, 0)) - colors1[0] = spu.sample_texture(unit, texcoords[0]); + colors1[0] = spu.sample_texture[unit](unit, texcoords[0]); if (spu_extract(mask, 1)) - colors1[1] = spu.sample_texture(unit, texcoords[1]); + colors1[1] = spu.sample_texture[unit](unit, texcoords[1]); if (spu_extract(mask, 2)) - colors1[2] = spu.sample_texture(unit, texcoords[2]); + colors1[2] = spu.sample_texture[unit](unit, texcoords[2]); if (spu_extract(mask, 3)) - colors1[3] = spu.sample_texture(unit, texcoords[3]); + colors1[3] = spu.sample_texture[unit](unit, texcoords[3]); /* hack: modulate first texture by second */ colors[0] = spu_mul(colors[0], colors1[0]); -- cgit v1.2.3 From bdf5b23bfd222ade9b3599ebd0f8932a5179431e Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 2 Apr 2008 12:54:37 +1000 Subject: nv40: shorten zsa state lines --- src/gallium/drivers/nv40/nv40_state.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 1417c95e758..89f4078b720 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -445,19 +445,20 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; - so_method(so, nv40->screen->curie, NV40TCL_DEPTH_FUNC, 3); + so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); so_data (so, nvgl_comparison_op(cso->depth.func)); so_data (so, cso->depth.writemask ? 1 : 0); so_data (so, cso->depth.enabled ? 1 : 0); - so_method(so, nv40->screen->curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); so_data (so, cso->alpha.enabled ? 1 : 0); so_data (so, nvgl_comparison_op(cso->alpha.func)); so_data (so, float_to_ubyte(cso->alpha.ref)); if (cso->stencil[0].enabled) { - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, cso->stencil[0].enabled ? 1 : 0); so_data (so, cso->stencil[0].write_mask); so_data (so, nvgl_comparison_op(cso->stencil[0].func)); @@ -467,12 +468,12 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); } else { - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 8); + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8); so_data (so, cso->stencil[1].enabled ? 1 : 0); so_data (so, cso->stencil[1].write_mask); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); @@ -482,7 +483,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); } else { - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 1); + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } -- cgit v1.2.3 From ae87909d0d261d0f4e888f6a167e6329eb129a87 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 2 Apr 2008 13:04:06 +1000 Subject: nv40: only update draw module state when using swtnl --- src/gallium/drivers/nv40/nv40_context.h | 6 ++++-- src/gallium/drivers/nv40/nv40_state.c | 22 ++++++++++------------ src/gallium/drivers/nv40/nv40_state_emit.c | 20 ++++++++++++++++++++ 3 files changed, 34 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index b50f6f8fefc..525eef8d63d 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -133,7 +133,7 @@ struct nv40_context { unsigned fallback_swrast; /* Context state */ - unsigned dirty; + unsigned dirty, draw_dirty; struct pipe_scissor_state scissor; unsigned stipple[32]; struct pipe_clip_state clip; @@ -153,8 +153,10 @@ struct nv40_context { unsigned nr_samplers; unsigned nr_textures; unsigned dirty_samplers; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; + unsigned vtxelt_nr; }; static INLINE struct nv40_context * diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 89f4078b720..5dc2991212d 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -423,10 +423,9 @@ nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = hwcso; - draw_set_rasterizer_state(nv40->draw, &rsso->pipe); - nv40->rasterizer = hwcso; nv40->dirty |= NV40_NEW_RAST; + nv40->draw_dirty |= NV40_NEW_RAST; } static void @@ -530,10 +529,9 @@ nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv40_context *nv40 = nv40_context(pipe); struct nv40_vertex_program *vp = hwcso; - draw_bind_vertex_shader(nv40->draw, vp ? vp->draw : NULL); - nv40->vertprog = hwcso; nv40->dirty |= NV40_NEW_VERTPROG; + nv40->draw_dirty |= NV40_NEW_VERTPROG; } static void @@ -596,10 +594,9 @@ nv40_set_clip_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_clip_state(nv40->draw, clip); - nv40->clip = *clip; nv40->dirty |= NV40_NEW_UCP; + nv40->draw_dirty |= NV40_NEW_UCP; } static void @@ -654,10 +651,9 @@ nv40_set_viewport_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_viewport_state(nv40->draw, vpt); - nv40->viewport = *vpt; nv40->dirty |= NV40_NEW_VIEWPORT; + nv40->draw_dirty |= NV40_NEW_VIEWPORT; } static void @@ -666,10 +662,11 @@ nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_vertex_buffers(nv40->draw, count, vb); - memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); + nv40->vtxbuf_nr = count; + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; } static void @@ -678,10 +675,11 @@ nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_vertex_elements(nv40->draw, count, ve); - memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); + nv40->vtxelt_nr = count; + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; } void diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 74feb6d4bfc..722b9f31e6d 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -144,6 +144,8 @@ nv40_state_validate(struct nv40_context *nv40) boolean nv40_state_validate_swtnl(struct nv40_context *nv40) { + struct draw_context *draw = nv40->draw; + /* Setup for swtnl */ if (nv40->render_mode == HW) { NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); @@ -155,12 +157,30 @@ nv40_state_validate_swtnl(struct nv40_context *nv40) nv40->render_mode = SWTNL; } + if (nv40->draw_dirty & NV40_NEW_VERTPROG) + draw_bind_vertex_shader(draw, nv40->vertprog->draw); + + if (nv40->draw_dirty & NV40_NEW_RAST) + draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); + + if (nv40->draw_dirty & NV40_NEW_UCP) + draw_set_clip_state(draw, &nv40->clip); + + if (nv40->draw_dirty & NV40_NEW_VIEWPORT) + draw_set_viewport_state(draw, &nv40->viewport); + + if (nv40->draw_dirty & NV40_NEW_ARRAYS) { + draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); + draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt); + } + nv40_state_do_validate(nv40, swtnl_states); if (nv40->fallback_swrast) { NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); return FALSE; } + nv40->draw_dirty = 0; return TRUE; } -- cgit v1.2.3 From b1a361ba7a565063200c033e4939e6b28c006b13 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Wed, 2 Apr 2008 05:10:18 +0200 Subject: nv10: fix stuff and things. --- src/gallium/drivers/nouveau/nouveau_class.h | 24 +++- src/gallium/drivers/nv10/nv10_context.c | 92 +++--------- src/gallium/drivers/nv10/nv10_context.h | 31 +++- src/gallium/drivers/nv10/nv10_miptree.c | 28 ++-- src/gallium/drivers/nv10/nv10_prim_vbuf.c | 17 ++- src/gallium/drivers/nv10/nv10_screen.c | 70 ++++++--- src/gallium/drivers/nv10/nv10_screen.h | 6 +- src/gallium/drivers/nv10/nv10_state.c | 155 ++++---------------- src/gallium/drivers/nv10/nv10_state_emit.c | 216 ++++++++++++++++++++++++++-- 9 files changed, 395 insertions(+), 244 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index dc202086d2c..880afe6ce08 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -2804,6 +2804,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV10TCL_VERTEX_WGH_1F 0x00000ce4 #define NV10TCL_EDGEFLAG_ENABLE 0x00000cec #define NV10TCL_VERTEX_ARRAY_VALIDATE 0x00000cf0 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(x) (0x00000d00+((x)*8)) +#define NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET__SIZE 0x00000008 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(x) (0x00000d04+((x)*8)) +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT__SIZE 0x00000008 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_STRIDE_MASK 0x0000ff00 #define NV10TCL_VERTEX_ARRAY_OFFSET_POS 0x00000d00 #define NV10TCL_VERTEX_ARRAY_FORMAT_POS 0x00000d04 #define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_SHIFT 0 @@ -4249,6 +4259,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_MASK 0x0000000f #define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT 0x00000002 #define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE 0x00000004 +#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_USHORT 0x00000005 #define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT 4 #define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_MASK 0x000000f0 #define NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 8 @@ -4963,12 +4974,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) #define NV40TCL_VP_UPLOAD_INST__SIZE 0x00000004 #define NV40TCL_CLIP_PLANE_ENABLE 0x00001478 -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 2) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 6) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 10) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 14) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 18) -#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 22) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 1) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 5) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 9) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 13) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 17) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 21) #define NV40TCL_POLYGON_STIPPLE_ENABLE 0x0000147c #define NV40TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) #define NV40TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 @@ -4990,6 +5001,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_VTXFMT_TYPE_MASK 0x0000000f #define NV40TCL_VTXFMT_TYPE_FLOAT 0x00000002 #define NV40TCL_VTXFMT_TYPE_UBYTE 0x00000004 +#define NV40TCL_VTXFMT_TYPE_USHORT 0x00000005 #define NV40TCL_VTXFMT_SIZE_SHIFT 4 #define NV40TCL_VTXFMT_SIZE_MASK 0x000000f0 #define NV40TCL_VTXFMT_STRIDE_SHIFT 8 diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 14042fb2fbb..42c496b9590 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -12,13 +12,6 @@ nv10_flush(struct pipe_context *pipe, unsigned flags, { struct nv10_context *nv10 = nv10_context(pipe); - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(celsius, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(celsius, 0x1fd8, 1); - OUT_RING (1); - } - FIRE_RING(fence); } @@ -26,36 +19,21 @@ static void nv10_destroy(struct pipe_context *pipe) { struct nv10_context *nv10 = nv10_context(pipe); - struct nouveau_winsys *nvws = nv10->nvws; if (nv10->draw) draw_destroy(nv10->draw); - nvws->res_free(&nv10->vertprog.exec_heap); - nvws->res_free(&nv10->vertprog.data_heap); - - nvws->notifier_free(&nv10->sync); - - nvws->grobj_free(&nv10->celsius); - - free(nv10); + FREE(nv10); } -static boolean -nv10_init_hwctx(struct nv10_context *nv10, int celsius_class) +static void nv10_init_hwctx(struct nv10_context *nv10) { - struct nouveau_winsys *nvws = nv10->nvws; - int ret; + struct nv10_screen *screen = nv10->screen; + struct nouveau_winsys *nvws = screen->nvws; int i; - ret = nvws->grobj_alloc(nvws, celsius_class, &nv10->celsius); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); - OUT_RING (nv10->sync->handle); + OUT_RING (screen->sync->handle); BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); OUT_RING (nvws->channel->vram->handle); OUT_RING (nvws->channel->gart->handle); @@ -90,7 +68,7 @@ nv10_init_hwctx(struct nv10_context *nv10, int celsius_class) BEGIN_RING(celsius, NV10TCL_NOP, 1); OUT_RING (0); - if (celsius_class != NV10TCL) { + if (nv10->screen->celsius->grclass != NV10TCL) { /* For nv11, nv17 */ BEGIN_RING(celsius, 0x120, 3); OUT_RING (0); @@ -243,72 +221,44 @@ nv10_init_hwctx(struct nv10_context *nv10, int celsius_class) FIRE_RING (NULL); - return TRUE; } struct pipe_context * -nv10_create(struct pipe_screen *screen, unsigned pctx_id) +nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) { - struct pipe_winsys *pipe_winsys = screen->winsys; - struct nouveau_winsys *nvws = nv10_screen(screen)->nvws; - unsigned chipset = nv10_screen(screen)->chipset; + struct nv10_screen *screen = nv10_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; struct nv10_context *nv10; - int celsius_class = 0, ret; - - if (chipset>=0x20) - celsius_class=NV11TCL; - else if (chipset>=0x17) - celsius_class=NV17TCL; - else if (chipset>=0x11) - celsius_class=NV11TCL; - else - celsius_class=NV10TCL; - - nv10 = CALLOC_STRUCT(nv10_context); + unsigned chipset = screen->chipset; + struct nouveau_winsys *nvws = screen->nvws; + + nv10 = CALLOC(1, sizeof(struct nv10_context)); if (!nv10) return NULL; + nv10->screen = screen; + nv10->pctx_id = pctx_id; + nv10->chipset = chipset; nv10->nvws = nvws; - /* Notifier for sync purposes */ - ret = nvws->notifier_alloc(nvws, 1, &nv10->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv10_destroy(&nv10->pipe); - return NULL; - } - - /* Vtxprog resources */ - if (nvws->res_init(&nv10->vertprog.exec_heap, 0, 512) || - nvws->res_init(&nv10->vertprog.data_heap, 0, 256)) { - nv10_destroy(&nv10->pipe); - return NULL; - } - - /* Static celsius initialisation */ - if (!nv10_init_hwctx(nv10, celsius_class)) { - nv10_destroy(&nv10->pipe); - return NULL; - } - - /* Pipe context setup */ - nv10->pipe.winsys = pipe_winsys; - + nv10->pipe.winsys = ws; + nv10->pipe.screen = pscreen; nv10->pipe.destroy = nv10_destroy; - nv10->pipe.draw_arrays = nv10_draw_arrays; nv10->pipe.draw_elements = nv10_draw_elements; nv10->pipe.clear = nv10_clear; - nv10->pipe.flush = nv10_flush; nv10_init_surface_functions(nv10); nv10_init_state_functions(nv10); + nv10_init_miptree_functions(nv10); nv10->draw = draw_create(); assert(nv10->draw); draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10)); + nv10_init_hwctx(nv10); + return &nv10->pipe; } diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 386138556e5..61eb4e6a937 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -11,7 +11,7 @@ #include "nouveau/nouveau_gldefs.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv10_context *ctx = nv10 + struct nv10_screen *ctx = nv10->screen #include "nouveau/nouveau_push.h" #include "nv10_state.h" @@ -24,17 +24,27 @@ #define NV10_NEW_VERTPROG (1 << 1) #define NV10_NEW_FRAGPROG (1 << 2) #define NV10_NEW_ARRAYS (1 << 3) -#define NV10_NEW_VBO (1 << 4) +#define NV10_NEW_VTXFMT (1 << 4) +#define NV10_NEW_BLEND (1 << 5) +#define NV10_NEW_BLENDCOL (1 << 6) +#define NV10_NEW_RAST (1 << 7) +#define NV10_NEW_DSA (1 << 8) +#define NV10_NEW_VIEWPORT (1 << 9) +#define NV10_NEW_SCISSOR (1 << 9) +#define NV10_NEW_FRAMEBUFFER (1 << 10) + +#include "nv10_screen.h" struct nv10_context { struct pipe_context pipe; + struct nouveau_winsys *nvws; + struct nv10_screen *screen; + unsigned pctx_id; struct draw_context *draw; int chipset; - struct nouveau_grobj *celsius; - struct nouveau_notifier *sync; uint32_t dirty; @@ -49,6 +59,14 @@ struct nv10_context { struct pipe_buffer *zeta; uint32_t lma_offset; + struct nv10_blend_state *blend; + struct pipe_blend_color *blend_color; + struct nv10_rasterizer_state *rast; + struct nv10_depth_stencil_alpha_state *dsa; + struct pipe_viewport_state *viewport; + struct pipe_scissor_state *scissor; + struct pipe_framebuffer_state *framebuffer; + struct { struct pipe_buffer *buffer; uint32_t format; @@ -91,7 +109,9 @@ nv10_context(struct pipe_context *pipe) extern void nv10_init_state_functions(struct nv10_context *nv10); extern void nv10_init_surface_functions(struct nv10_context *nv10); -extern void nv10_init_miptree_functions(struct pipe_screen *screen); +extern void nv10_init_miptree_functions(struct nv10_context *nv10); + +extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen); /* nv10_clear.c */ extern void nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, @@ -111,6 +131,7 @@ extern void nv10_fragtex_bind(struct nv10_context *); /* nv10_prim_vbuf.c */ struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ); +extern void nv10_vtxbuf_bind(struct nv10_context* nv10); /* nv10_state.c and friends */ extern void nv10_emit_hw_state(struct nv10_context *nv10); diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 7b7f39b80c9..4dfc675a6b9 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -69,7 +69,7 @@ nv10_miptree_create(struct pipe_screen *screen, struct pipe_texture *pt) mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { - free(mt); + FREE(mt); return NULL; } @@ -90,12 +90,19 @@ nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) pipe_buffer_reference(ws, &nv10mt->buffer, NULL); for (l = 0; l <= mt->last_level; l++) { if (nv10mt->level[l].image_offset) - free(nv10mt->level[l].image_offset); + FREE(nv10mt->level[l].image_offset); } - free(nv10mt); + FREE(nv10mt); } } +static void +nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, + uint face, uint levels) +{ +} + + static struct pipe_surface * nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) @@ -122,13 +129,16 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return ps; } -void -nv10_init_miptree_functions(struct pipe_screen *screen) + +void nv10_init_miptree_functions(struct nv10_context *nv10) { - struct nv10_screen *nv10screen = nv10_screen(screen); + nv10->pipe.texture_update = nv10_miptree_update; +} - nv10screen->screen.texture_create = nv10_miptree_create; - nv10screen->screen.texture_release = nv10_miptree_release; - nv10screen->screen.get_tex_surface = nv10_miptree_surface_get; +void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv10_miptree_create; + pscreen->texture_release = nv10_miptree_release; + pscreen->get_tex_surface = nv10_miptree_surface_get; } diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 15268912234..bbcfe1a2fdd 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -67,6 +67,17 @@ struct nv10_vbuf_render { }; +void nv10_vtxbuf_bind( struct nv10_context* nv10 ) +{ + int i; + for(i = 0; i < 8; i++) { + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1); + OUT_RING(0/*nv10->vtxbuf*/); + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1); + OUT_RING(0/*XXX*/); + } +} + /** * Basically a cast wrapper. */ @@ -100,7 +111,7 @@ nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, assert(!nv10_render->buffer); nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); - nv10->dirty |= NV10_NEW_VBO; + nv10->dirty |= NV10_NEW_ARRAYS; return winsys->buffer_map(winsys, nv10_render->buffer, @@ -139,8 +150,8 @@ nv10_vbuf_render_draw( struct vbuf_render *render, } while (nr_indices) { - // XXX too big ? - push = MIN2(nr_indices, 2047 * 2); + // XXX too big/small ? check the size + push = MIN2(nr_indices, 1200 * 2); BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 4d8e0b05ccd..80676ead1a9 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -116,35 +116,71 @@ nv10_screen_is_format_supported(struct pipe_screen *screen, } static void -nv10_screen_destroy(struct pipe_screen *screen) +nv10_screen_destroy(struct pipe_screen *pscreen) { - FREE(screen); + struct nv10_screen *screen = nv10_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->celsius); + + FREE(pscreen); } struct pipe_screen * -nv10_screen_create(struct pipe_winsys *winsys, struct nouveau_winsys *nvws, +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, unsigned chipset) { - struct nv10_screen *nv10screen = CALLOC_STRUCT(nv10_screen); + struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen); + unsigned celsius_class; + int ret; - if (!nv10screen) + if (!screen) + return NULL; + screen->chipset = chipset; + screen->nvws = nvws; + + /* 3D object */ + if (chipset>=0x20) + celsius_class=NV11TCL; + else if (chipset>=0x17) + celsius_class=NV17TCL; + else if (chipset>=0x11) + celsius_class=NV11TCL; + else + celsius_class=NV10TCL; + + if (!celsius_class) { + NOUVEAU_ERR("Unknown nv1x chipset: nv%02x\n", chipset); return NULL; + } + + ret = nvws->grobj_alloc(nvws, celsius_class, &screen->celsius); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv10_screen_destroy(&screen->pipe); + return NULL; + } - nv10screen->chipset = chipset; - nv10screen->nvws = nvws; + screen->pipe.winsys = ws; + screen->pipe.destroy = nv10_screen_destroy; - nv10screen->screen.winsys = winsys; + screen->pipe.get_name = nv10_screen_get_name; + screen->pipe.get_vendor = nv10_screen_get_vendor; + screen->pipe.get_param = nv10_screen_get_param; + screen->pipe.get_paramf = nv10_screen_get_paramf; - nv10screen->screen.destroy = nv10_screen_destroy; + screen->pipe.is_format_supported = nv10_screen_is_format_supported; - nv10screen->screen.get_name = nv10_screen_get_name; - nv10screen->screen.get_vendor = nv10_screen_get_vendor; - nv10screen->screen.get_param = nv10_screen_get_param; - nv10screen->screen.get_paramf = nv10_screen_get_paramf; - nv10screen->screen.is_format_supported = - nv10_screen_is_format_supported; + nv10_screen_init_miptree_functions(&screen->pipe); - nv10_init_miptree_functions(&nv10screen->screen); - return &nv10screen->screen; + return &screen->pipe; } diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h index ac8c04072ab..4192fe11ef4 100644 --- a/src/gallium/drivers/nv10/nv10_screen.h +++ b/src/gallium/drivers/nv10/nv10_screen.h @@ -4,10 +4,14 @@ #include "pipe/p_screen.h" struct nv10_screen { - struct pipe_screen screen; + struct pipe_screen pipe; struct nouveau_winsys *nvws; unsigned chipset; + + /* HW graphics objects */ + struct nouveau_grobj *celsius; + struct nouveau_notifier *sync; }; static INLINE struct nv10_screen * diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 1ff01de1060..12722709f6b 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -35,6 +35,8 @@ static void nv10_vertex_layout(struct pipe_context* pipe) } } draw_compute_vertex_size(&vinfo); + + nv10->dirty |= NV10_NEW_VTXFMT; } static void * @@ -62,27 +64,19 @@ nv10_blend_state_create(struct pipe_context *pipe, } static void -nv10_blend_state_bind(struct pipe_context *pipe, void *hwcso) +nv10_blend_state_bind(struct pipe_context *pipe, void *blend) { struct nv10_context *nv10 = nv10_context(pipe); - struct nv10_blend_state *cb = hwcso; - - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); - OUT_RING (cb->d_enable); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (cb->b_enable); - OUT_RING (cb->b_srcfunc); - OUT_RING (cb->b_dstfunc); + nv10->blend = (struct nv10_blend_state*)blend; - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (cb->c_mask); + nv10->dirty |= NV10_NEW_BLEND; } static void nv10_blend_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + FREE(hwcso); } @@ -255,7 +249,7 @@ nv10_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) static void nv10_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + FREE(hwcso); } static void @@ -347,43 +341,19 @@ nv10_rasterizer_state_create(struct pipe_context *pipe, } static void -nv10_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +nv10_rasterizer_state_bind(struct pipe_context *pipe, void *rast) { struct nv10_context *nv10 = nv10_context(pipe); - struct nv10_rasterizer_state *rs = hwcso; - - BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); - OUT_RING (rs->shade_model); - OUT_RING (rs->line_width); - - - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (rs->point_size); - - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (rs->poly_mode_front); - OUT_RING (rs->poly_mode_back); - - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (rs->cull_face); - OUT_RING (rs->front_face); + nv10->rast = (struct nv10_rasterizer_state*)rast; - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (rs->line_smooth_en); - OUT_RING (rs->poly_smooth_en); - - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (rs->cull_face_en); - -/* BEGIN_RING(celsius, NV10TCL_POINT_SPRITE, 1); - OUT_RING (rs->point_sprite);*/ + nv10->dirty |= NV10_NEW_RAST; } static void nv10_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + FREE(hwcso); } static void * @@ -415,25 +385,19 @@ nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe, } static void -nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa) { struct nv10_context *nv10 = nv10_context(pipe); - struct nv10_depth_stencil_alpha_state *hw = hwcso; - - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 3); - OUT_RINGp ((uint32_t *)&hw->depth, 3); - BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); - OUT_RING (hw->stencil.enable); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(hw->stencil.wmask), 7); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 3); - OUT_RINGp ((uint32_t *)&hw->alpha.enabled, 3); + + nv10->dsa = (struct nv10_depth_stencil_alpha_state*)dsa; + + nv10->dirty |= NV10_NEW_DSA; } static void nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + FREE(hwcso); } static void * @@ -461,11 +425,11 @@ nv10_vp_state_bind(struct pipe_context *pipe, void *hwcso) static void nv10_vp_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nv10_context *nv10 = nv10_context(pipe); + //struct nv10_context *nv10 = nv10_context(pipe); struct nv10_vertex_program *vp = hwcso; //nv10_vertprog_destroy(nv10, vp); - free(vp); + FREE(vp); } static void * @@ -499,7 +463,7 @@ nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv10_fragment_program *fp = hwcso; nv10_fragprog_destroy(nv10, fp); - free(fp); + FREE(fp); } static void @@ -508,11 +472,9 @@ nv10_set_blend_color(struct pipe_context *pipe, { struct nv10_context *nv10 = nv10_context(pipe); - BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0)); + nv10->blend_color = (struct pipe_blend_color*)bcol; + + nv10->dirty |= NV10_NEW_BLENDCOL; } static void @@ -542,58 +504,10 @@ nv10_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nv10_context *nv10 = nv10_context(pipe); - struct pipe_surface *rt, *zeta; - uint32_t rt_format, w, h; - int i, colour_format = 0, zeta_format = 0; - - w = fb->cbufs[0]->width; - h = fb->cbufs[0]->height; - colour_format = fb->cbufs[0]->format; - rt = fb->cbufs[0]; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - - zeta_format = fb->zsbuf->format; - zeta = fb->zsbuf; - } - rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; + nv10->framebuffer = (struct pipe_framebuffer_state*)fb; - switch (colour_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING ( (rt->pitch * rt->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); - nv10->rt[0] = rt->buffer; - - if (zeta_format) - { - nv10->zeta = zeta->buffer; - } - - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - OUT_RING (rt_format); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); + nv10->dirty |= NV10_NEW_FRAMEBUFFER; } static void @@ -609,10 +523,9 @@ nv10_set_scissor_state(struct pipe_context *pipe, { struct nv10_context *nv10 = nv10_context(pipe); - // XXX -/* BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ + nv10->scissor = (struct pipe_scissor_state*)s; + + nv10->dirty |= NV10_NEW_SCISSOR; } static void @@ -621,15 +534,9 @@ nv10_set_viewport_state(struct pipe_context *pipe, { struct nv10_context *nv10 = nv10_context(pipe); -/* OUT_RINGf (vpt->translate[0]); - OUT_RINGf (vpt->translate[1]); - OUT_RINGf (vpt->translate[2]); - OUT_RINGf (vpt->translate[3]);*/ - BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); - OUT_RINGf (vpt->scale[0]); - OUT_RINGf (vpt->scale[1]); - OUT_RINGf (vpt->scale[2]); - OUT_RINGf (vpt->scale[3]); + nv10->viewport = (struct pipe_viewport_state*)vpt; + + nv10->dirty |= NV10_NEW_VIEWPORT; } static void diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 8bf0bd2d683..0c524963896 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -1,14 +1,174 @@ +#include "pipe/p_util.h" + #include "nv10_context.h" #include "nv10_state.h" +static void nv10_state_emit_blend(struct nv10_context* nv10) +{ + struct nv10_blend_state *b = nv10->blend; + + BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (b->d_enable); + + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (b->b_enable); + OUT_RING (b->b_srcfunc); + OUT_RING (b->b_dstfunc); + + BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (b->c_mask); +} + +static void nv10_state_emit_blend_color(struct nv10_context* nv10) +{ + struct pipe_blend_color *c = nv10->blend_color; + + BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + (float_to_ubyte(c->color[0]) << 16)| + (float_to_ubyte(c->color[1]) << 8) | + (float_to_ubyte(c->color[2]) << 0)); +} + +static void nv10_state_emit_rast(struct nv10_context* nv10) +{ + struct nv10_rasterizer_state *r = nv10->rast; + + BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (r->shade_model); + OUT_RING (r->line_width); + + + BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (r->point_size); + + BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (r->poly_mode_front); + OUT_RING (r->poly_mode_back); + + + BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (r->cull_face); + OUT_RING (r->front_face); + + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (r->line_smooth_en); + OUT_RING (r->poly_smooth_en); + + BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (r->cull_face_en); +} + +static void nv10_state_emit_dsa(struct nv10_context* nv10) +{ + struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + + BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 3); + OUT_RINGp ((uint32_t *)&d->depth, 3); + BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (d->stencil.enable); + BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 3); + OUT_RINGp ((uint32_t *)&d->alpha.enabled, 3); +} + +static void nv10_state_emit_viewport(struct nv10_context* nv10) +{ + struct pipe_viewport_state *vpt = nv10->viewport; + +/* OUT_RINGf (vpt->translate[0]); + OUT_RINGf (vpt->translate[1]); + OUT_RINGf (vpt->translate[2]); + OUT_RINGf (vpt->translate[3]);*/ + BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (vpt->scale[0]); + OUT_RINGf (vpt->scale[1]); + OUT_RINGf (vpt->scale[2]); + OUT_RINGf (vpt->scale[3]); +} + +static void nv10_state_emit_scissor(struct nv10_context* nv10) +{ + // XXX this is so not working +/* struct pipe_scissor_state *s = nv10->scissor; + BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void nv10_state_emit_framebuffer(struct nv10_context* nv10) +{ + struct pipe_framebuffer_state* fb = nv10->framebuffer; + struct pipe_surface *rt, *zeta; + uint32_t rt_format, w, h; + int colour_format = 0, zeta_format = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); + OUT_RING ( (rt->pitch * rt->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); + nv10->rt[0] = rt->buffer; + + if (zeta_format) + { + nv10->zeta = zeta->buffer; + } + + BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING ((w << 16) | 0); + OUT_RING ((h << 16) | 0); + OUT_RING (rt_format); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (((w - 1) << 16) | 0); + OUT_RING (((h - 1) << 16) | 0); +} + void nv10_emit_hw_state(struct nv10_context *nv10) { int i; + if (nv10->dirty & NV10_NEW_VERTPROG) { + //nv10_vertprog_bind(nv10, nv10->vertprog.current); + nv10->dirty &= ~NV10_NEW_VERTPROG; + } + if (nv10->dirty & NV10_NEW_FRAGPROG) { nv10_fragprog_bind(nv10, nv10->fragprog.current); /*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */ + nv10->dirty_samplers |= (1<<10); + nv10->dirty_samplers = 0; } if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) { @@ -16,16 +176,50 @@ nv10_emit_hw_state(struct nv10_context *nv10) nv10->dirty &= ~NV10_NEW_FRAGPROG; } - if (nv10->dirty & NV10_NEW_VERTPROG) { - //nv10_vertprog_bind(nv10, nv10->vertprog.current); - nv10->dirty &= ~NV10_NEW_VERTPROG; + if (nv10->dirty & NV10_NEW_ARRAYS) { + nv10->dirty &= ~NV10_NEW_ARRAYS; + // array state will be put here once it's not emitted at each frame } - if (nv10->dirty & NV10_NEW_VBO) { - + if (nv10->dirty & NV10_NEW_VTXFMT) { + nv10->dirty &= ~NV10_NEW_VTXFMT; + nv10_vtxbuf_bind(nv10); } - nv10->dirty_samplers = 0; + if (nv10->dirty & NV10_NEW_BLEND) { + nv10->dirty &= ~NV10_NEW_BLEND; + nv10_state_emit_blend(nv10); + } + + if (nv10->dirty & NV10_NEW_BLENDCOL) { + nv10->dirty &= ~NV10_NEW_BLENDCOL; + nv10_state_emit_blend_color(nv10); + } + + if (nv10->dirty & NV10_NEW_RAST) { + nv10->dirty &= ~NV10_NEW_RAST; + nv10_state_emit_rast(nv10); + } + + if (nv10->dirty & NV10_NEW_DSA) { + nv10->dirty &= ~NV10_NEW_DSA; + nv10_state_emit_dsa(nv10); + } + + if (nv10->dirty & NV10_NEW_VIEWPORT) { + nv10->dirty &= ~NV10_NEW_VIEWPORT; + nv10_state_emit_viewport(nv10); + } + + if (nv10->dirty & NV10_NEW_SCISSOR) { + nv10->dirty &= ~NV10_NEW_SCISSOR; + nv10_state_emit_scissor(nv10); + } + + if (nv10->dirty & NV10_NEW_FRAMEBUFFER) { + nv10->dirty &= ~NV10_NEW_FRAMEBUFFER; + nv10_state_emit_framebuffer(nv10); + } /* Emit relocs for every referenced buffer. * This is to ensure the bufmgr has an accurate idea of how @@ -48,10 +242,16 @@ nv10_emit_hw_state(struct nv10_context *nv10) BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_OFFSET, 1); - OUT_RELOCl(nv10->zeta+lma_offset);*/ +/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(nv10->zeta + lma_offset);*/ } + /* Vertex buffer */ + BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv10->fp_samplers & (1 << i))) -- cgit v1.2.3 From 8f26e975ca6341cb3366a18beb352b5cdcaee2bc Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Wed, 2 Apr 2008 15:09:32 +0200 Subject: nv10: set rasterizer state. --- src/gallium/drivers/nv10/nv10_state.c | 4 ++++ src/gallium/drivers/nv10/nv10_state.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 12722709f6b..182f6857f5b 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -282,6 +282,8 @@ nv10_rasterizer_state_create(struct pipe_context *pipe, */ rs = malloc(sizeof(struct nv10_rasterizer_state)); + rs->templ = cso; + rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; @@ -347,6 +349,8 @@ nv10_rasterizer_state_bind(struct pipe_context *pipe, void *rast) nv10->rast = (struct nv10_rasterizer_state*)rast; + draw_set_rasterizer_state(nv10->draw, (nv10->rast ? nv10->rast->templ : NULL)); + nv10->dirty |= NV10_NEW_RAST; } diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h index 9bda8a7d6a3..3ca501d1356 100644 --- a/src/gallium/drivers/nv10/nv10_state.h +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -39,6 +39,8 @@ struct nv10_rasterizer_state { uint32_t cull_face_en; uint32_t point_sprite; + + const struct pipe_rasterizer_state *templ; }; struct nv10_vertex_program_exec { -- cgit v1.2.3 From fb19b3393fbee26f7bb88b572b3d0cc2943f2edc Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Wed, 2 Apr 2008 18:26:49 +0200 Subject: nv10: fix some more state, work on the vertex emission code. --- src/gallium/auxiliary/draw/draw_vertex_fetch.c | 2 + src/gallium/drivers/nv10/nv10_context.h | 21 +++++---- src/gallium/drivers/nv10/nv10_fragtex.c | 4 ++ src/gallium/drivers/nv10/nv10_prim_vbuf.c | 7 ++- src/gallium/drivers/nv10/nv10_state.c | 60 ++++++-------------------- src/gallium/drivers/nv10/nv10_state_emit.c | 39 ++++++++++++++--- src/gallium/drivers/nv10/nv10_vbo.c | 2 + 7 files changed, 68 insertions(+), 67 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index 11f99babf65..9a9ac4ee451 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -508,6 +508,7 @@ void draw_update_vertex_fetch( struct draw_context *draw ) draw->vertex_fetch.nr_attrs = nr_attrs; draw->vertex_fetch.fetch_func = generic_vertex_fetch; + printf("pouet vertex fetch %x\n",draw->vertex_fetch.fetch_func); switch (nr_attrs) { case 2: @@ -524,5 +525,6 @@ void draw_update_vertex_fetch( struct draw_context *draw ) default: break; } + printf("pouet vertex fetch %x\n",draw->vertex_fetch.fetch_func); } diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 61eb4e6a937..8c13d6897f7 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -21,17 +21,16 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); -#define NV10_NEW_VERTPROG (1 << 1) -#define NV10_NEW_FRAGPROG (1 << 2) -#define NV10_NEW_ARRAYS (1 << 3) -#define NV10_NEW_VTXFMT (1 << 4) -#define NV10_NEW_BLEND (1 << 5) -#define NV10_NEW_BLENDCOL (1 << 6) -#define NV10_NEW_RAST (1 << 7) -#define NV10_NEW_DSA (1 << 8) -#define NV10_NEW_VIEWPORT (1 << 9) -#define NV10_NEW_SCISSOR (1 << 9) -#define NV10_NEW_FRAMEBUFFER (1 << 10) +#define NV10_NEW_VERTPROG (1 << 0) +#define NV10_NEW_FRAGPROG (1 << 1) +#define NV10_NEW_VTXARRAYS (1 << 2) +#define NV10_NEW_BLEND (1 << 3) +#define NV10_NEW_BLENDCOL (1 << 4) +#define NV10_NEW_RAST (1 << 5) +#define NV10_NEW_DSA (1 << 6) +#define NV10_NEW_VIEWPORT (1 << 7) +#define NV10_NEW_SCISSOR (1 << 8) +#define NV10_NEW_FRAMEBUFFER (1 << 9) #include "nv10_screen.h" diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index a4bf1082845..67e0b4bd45d 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -72,6 +72,7 @@ nv10_fragtex_format(uint pipe_format) static void nv10_fragtex_build(struct nv10_context *nv10, int unit) { +#if 0 struct nv10_sampler_state *ps = nv10->tex_sampler[unit]; struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; struct pipe_texture *pt = &nv10mt->base; @@ -115,11 +116,13 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) OUT_RING (ps->filt | 0x2000 /* magic */); OUT_RING ((pt->width[0] << 16) | pt->height[0]); OUT_RING (ps->bcol); +#endif } void nv10_fragtex_bind(struct nv10_context *nv10) { +#if 0 struct nv10_fragment_program *fp = nv10->fragprog.active; unsigned samplers, unit; @@ -141,5 +144,6 @@ nv10_fragtex_bind(struct nv10_context *nv10) } nv10->fp_samplers = fp->samplers; +#endif } diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index bbcfe1a2fdd..412ee9a23f9 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -94,6 +94,9 @@ nv10_vbuf_render_get_vertex_info( struct vbuf_render *render ) { struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); struct nv10_context *nv10 = nv10_render->nv10; + + nv10_emit_hw_state(nv10); + return &nv10->vertex_info; } @@ -111,7 +114,7 @@ nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, assert(!nv10_render->buffer); nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); - nv10->dirty |= NV10_NEW_ARRAYS; + nv10->dirty |= NV10_NEW_VTXARRAYS; return winsys->buffer_map(winsys, nv10_render->buffer, @@ -137,6 +140,8 @@ nv10_vbuf_render_draw( struct vbuf_render *render, struct nv10_context *nv10 = nv10_render->nv10; int push, i; + nv10_emit_hw_state(nv10); + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 182f6857f5b..924cf803acb 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -1,3 +1,4 @@ +#include "draw/draw_context.h" #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" @@ -7,38 +8,6 @@ #include "nv10_context.h" #include "nv10_state.h" -static void nv10_vertex_layout(struct pipe_context* pipe) -{ - struct nv10_context *nv10 = nv10_context(pipe); - struct nv10_fragment_program *fp = nv10->fragprog.current; - uint32_t src = 0; - int i; - struct vertex_info vinfo; - - memset(&vinfo, 0, sizeof(vinfo)); - - for (i = 0; i < fp->info.num_inputs; i++) { - switch (fp->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); - break; - case TGSI_SEMANTIC_COLOR: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); - break; - default: - case TGSI_SEMANTIC_GENERIC: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - break; - case TGSI_SEMANTIC_FOG: - draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); - break; - } - } - draw_compute_vertex_size(&vinfo); - - nv10->dirty |= NV10_NEW_VTXFMT; -} - static void * nv10_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) @@ -406,34 +375,29 @@ nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) static void * nv10_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) + const struct pipe_shader_state *templ) { - struct nv10_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nv10_vertex_program)); - vp->pipe = cso; + struct nv10_context *nv10 = nv10_context(pipe); - return (void *)vp; + return draw_create_vertex_shader(nv10->draw, templ); } static void -nv10_vp_state_bind(struct pipe_context *pipe, void *hwcso) +nv10_vp_state_bind(struct pipe_context *pipe, void *shader) { struct nv10_context *nv10 = nv10_context(pipe); - struct nv10_vertex_program *vp = hwcso; - nv10->vertprog.current = vp; + draw_bind_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader); + nv10->dirty |= NV10_NEW_VERTPROG; } static void -nv10_vp_state_delete(struct pipe_context *pipe, void *hwcso) +nv10_vp_state_delete(struct pipe_context *pipe, void *shader) { - //struct nv10_context *nv10 = nv10_context(pipe); - struct nv10_vertex_program *vp = hwcso; + struct nv10_context *nv10 = nv10_context(pipe); - //nv10_vertprog_destroy(nv10, vp); - FREE(vp); + draw_delete_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader); } static void * @@ -550,7 +514,7 @@ nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count, struct nv10_context *nv10 = nv10_context(pipe); memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count); - nv10->dirty |= NV10_NEW_ARRAYS; + nv10->dirty |= NV10_NEW_VTXARRAYS; } static void @@ -560,7 +524,7 @@ nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count, struct nv10_context *nv10 = nv10_context(pipe); memcpy(nv10->vtxelt, ve, sizeof(*ve) * count); - nv10->dirty |= NV10_NEW_ARRAYS; + nv10->dirty |= NV10_NEW_VTXARRAYS; } void diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 0c524963896..134e52bd622 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -154,6 +154,35 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) OUT_RING (((h - 1) << 16) | 0); } +static void nv10_vertex_layout(struct nv10_context *nv10) +{ + struct nv10_fragment_program *fp = nv10->fragprog.current; + uint32_t src = 0; + int i; + struct vertex_info vinfo; + + memset(&vinfo, 0, sizeof(vinfo)); + + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + default: + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + } + } + draw_compute_vertex_size(&vinfo); +} + void nv10_emit_hw_state(struct nv10_context *nv10) { @@ -176,13 +205,9 @@ nv10_emit_hw_state(struct nv10_context *nv10) nv10->dirty &= ~NV10_NEW_FRAGPROG; } - if (nv10->dirty & NV10_NEW_ARRAYS) { - nv10->dirty &= ~NV10_NEW_ARRAYS; - // array state will be put here once it's not emitted at each frame - } - - if (nv10->dirty & NV10_NEW_VTXFMT) { - nv10->dirty &= ~NV10_NEW_VTXFMT; + if (nv10->dirty & NV10_NEW_VTXARRAYS) { + nv10->dirty &= ~NV10_NEW_VTXARRAYS; + nv10_vertex_layout(nv10); nv10_vtxbuf_bind(nv10); } diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 025ad6de4bd..0f5902602f6 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -18,6 +18,8 @@ boolean nv10_draw_elements( struct pipe_context *pipe, struct draw_context *draw = nv10->draw; unsigned i; + nv10_emit_hw_state(nv10); + /* * Map vertex buffers */ -- cgit v1.2.3 From 7f21b63a988a041bca120751c795f6f6abf0f2bd Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Thu, 3 Apr 2008 04:07:16 +0200 Subject: nv10: fix more vertex stuff --- src/gallium/drivers/nv10/nv10_context.h | 8 ++++---- src/gallium/drivers/nv10/nv10_state.c | 13 +++++++++++-- src/gallium/drivers/nv10/nv10_state_emit.c | 10 ++++++++-- src/gallium/drivers/nv10/nv10_vbo.c | 2 ++ 4 files changed, 25 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 8c13d6897f7..63d33ef7c91 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -65,6 +65,8 @@ struct nv10_context { struct pipe_viewport_state *viewport; struct pipe_scissor_state *scissor; struct pipe_framebuffer_state *framebuffer; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct vertex_info vertex_info; struct { struct pipe_buffer *buffer; @@ -77,8 +79,7 @@ struct nv10_context { unsigned delta; } vb[16]; - struct vertex_info vertex_info; - struct { +/* struct { struct nouveau_resource *exec_heap; struct nouveau_resource *data_heap; @@ -86,9 +87,8 @@ struct nv10_context { struct nv10_vertex_program *active; struct nv10_vertex_program *current; - struct pipe_buffer *constant_buf; } vertprog; - +*/ struct { struct nv10_fragment_program *active; diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 924cf803acb..1d9a202dd7c 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -449,6 +449,9 @@ static void nv10_set_clip_state(struct pipe_context *pipe, const struct pipe_clip_state *clip) { + struct nv10_context *nv10 = nv10_context(pipe); + + draw_set_clip_state(nv10->draw, clip); } static void @@ -458,11 +461,11 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, struct nv10_context *nv10 = nv10_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv10->vertprog.constant_buf = buf->buffer; + nv10->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; nv10->dirty |= NV10_NEW_VERTPROG; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv10->fragprog.constant_buf = buf->buffer; + nv10->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; nv10->dirty |= NV10_NEW_FRAGPROG; } } @@ -504,6 +507,8 @@ nv10_set_viewport_state(struct pipe_context *pipe, nv10->viewport = (struct pipe_viewport_state*)vpt; + draw_set_viewport_state(nv10->draw, &nv10->viewport); + nv10->dirty |= NV10_NEW_VIEWPORT; } @@ -515,6 +520,8 @@ nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count, memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count); nv10->dirty |= NV10_NEW_VTXARRAYS; + + draw_set_vertex_buffers(nv10->draw, count, vb); } static void @@ -525,6 +532,8 @@ nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count, memcpy(nv10->vtxelt, ve, sizeof(*ve) * count); nv10->dirty |= NV10_NEW_VTXARRAYS; + + draw_set_vertex_elements(nv10->draw, count, ve); } void diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 134e52bd622..18566986b06 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -136,8 +136,14 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) assert(0); } - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING ( (rt->pitch * rt->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); + if (zeta) { + BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); + OUT_RING ( (rt->pitch * rt->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); + } else { + BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); + OUT_RING ( (rt->pitch * rt->cpp) ); + } + nv10->rt[0] = rt->buffer; if (zeta_format) diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 0f5902602f6..3a4f49e1564 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -44,6 +44,8 @@ boolean nv10_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } + draw_set_mapped_constant_buffer(draw, nv10->constbuf[PIPE_SHADER_VERTEX]); + /* draw! */ draw_arrays(nv10->draw, prim, start, count); -- cgit v1.2.3 From 8ed894bd17bd6f426a0d87f7113f23043cda3bc3 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Thu, 3 Apr 2008 04:20:22 +0200 Subject: nv10: emit dummy zeta size when no zbuffer is used. --- src/gallium/drivers/nv10/nv10_state_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 18566986b06..edc194588d1 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -141,7 +141,7 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) OUT_RING ( (rt->pitch * rt->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); } else { BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING ( (rt->pitch * rt->cpp) ); + OUT_RING ( (rt->pitch * rt->cpp) | ( (rt->pitch * rt->cpp) << 16) ); } nv10->rt[0] = rt->buffer; -- cgit v1.2.3 From 73322bba5c7102f0e100c9a07273a7a87705cf55 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 28 Mar 2008 23:51:24 +1100 Subject: nv40: need to resubmit buffers if pushbuf gets flushed during draw --- src/gallium/drivers/nouveau/nouveau_util.h | 64 +++++++++++++++ src/gallium/drivers/nv40/nv40_vbo.c | 120 ++++++++++++++++++----------- 2 files changed, 140 insertions(+), 44 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nouveau_util.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h new file mode 100644 index 00000000000..c92041ebeba --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -0,0 +1,64 @@ +#ifndef __NOUVEAU_UTIL_H__ +#define __NOUVEAU_UTIL_H__ + +/* Determine how many vertices can be pushed into the command stream. + * Where the remaining space isn't large enough to represent all verices, + * split the buffer at primitive boundaries. + * + * Returns a count of vertices that can be rendered, and an index to + * restart drawing at after a flush. + */ +static INLINE unsigned +nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp, + unsigned mode, unsigned start, unsigned count, + unsigned *restart) +{ + int max, adj = 0; + + max = remaining - overhead; + if (max < 0) + return 0; + + max *= vpp; + if (max >= count) + return count; + + switch (mode) { + case PIPE_PRIM_POINTS: + break; + case PIPE_PRIM_LINES: + max = max & 1; + break; + case PIPE_PRIM_TRIANGLES: + max = max - (max % 3); + break; + case PIPE_PRIM_QUADS: + max = max & 3; + break; + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + if (max < 2) + max = 0; + adj = 1; + break; + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + if (max < 3) + max = 0; + adj = 2; + break; + case PIPE_PRIM_QUAD_STRIP: + if (max < 4) + max = 0; + adj = 3; + break; + default: + assert(0); + } + + *restart = start + max - adj; + return max; +} + +#endif diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index bc53924a676..33d3efb58d0 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -7,6 +7,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" #define FORCE_SWTNL 0 @@ -170,44 +171,60 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, } boolean -nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, - unsigned count) +nv40_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - unsigned nr; + struct nouveau_channel *chan = nv40->nvws->channel; + unsigned restart; nv40_vbo_set_idxbuf(nv40, NULL, 0); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { return nv40_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); } - nv40_state_emit(nv40); - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + while (count) { + unsigned vc, nr; - nr = (count & 0xff); - if (nr) { - BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); - start += nr; - } + nv40_state_emit(nv40); - nr = count >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } - nr -= push; + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); - BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); - while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); - start += 0x100; + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; } - } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } pipe->flush(pipe, 0, NULL); return TRUE; @@ -329,35 +346,50 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - unsigned nr; + struct nouveau_channel *chan = nv40->nvws->channel; + unsigned restart; - nv40_state_emit(nv40); + while (count) { + unsigned nr, vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + nv40_state_emit(nv40); - nr = (count & 0xff); - if (nr) { - BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); - start += nr; - } + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } - nr = count >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; - nr -= push; + nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); - while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); - start += 0x100; + BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } } - } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } return TRUE; } -- cgit v1.2.3 From 7e9b83ac0ac59298f1b983e6a9aed3a8f2ccb147 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 29 Mar 2008 00:30:04 +1100 Subject: nv40: convert the inline idxbuf paths also --- src/gallium/drivers/nv40/nv40_vbo.c | 159 +++++++++++++++++++++++++----------- 1 file changed, 111 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 33d3efb58d0..8cc977a19a1 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -232,69 +232,139 @@ nv40_draw_arrays(struct pipe_context *pipe, static INLINE void nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, - unsigned start, unsigned count) + unsigned mode, unsigned start, unsigned count) { - uint8_t *elts = (uint8_t *)ib + start; - int push, i; - - if (count & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); - elts++; count--; - } + struct nouveau_channel *chan = nv40->nvws->channel; while (count) { - push = MIN2(count, 2047 * 2); + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); - count -= push; - elts += push; + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; } } static INLINE void nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, - unsigned start, unsigned count) + unsigned mode, unsigned start, unsigned count) { - uint16_t *elts = (uint16_t *)ib + start; - int push, i; - - if (count & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); - elts++; count--; - } + struct nouveau_channel *chan = nv40->nvws->channel; while (count) { - push = MIN2(count, 2047 * 2); + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart; - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + nv40_state_emit(nv40); - count -= push; - elts += push; + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; } } static INLINE void nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, - unsigned start, unsigned count) + unsigned mode, unsigned start, unsigned count) { - uint32_t *elts = (uint32_t *)ib + start; - int push; + struct nouveau_channel *chan = nv40->nvws->channel; while (count) { - push = MIN2(count, 2047); + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + while (vc) { + push = MIN2(vc, 2047); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); - count -= push; - elts += push; + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; } } @@ -315,29 +385,22 @@ nv40_draw_elements_inline(struct pipe_context *pipe, return FALSE; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - switch (ib_size) { case 1: - nv40_draw_elements_u08(nv40, map, start, count); + nv40_draw_elements_u08(nv40, map, mode, start, count); break; case 2: - nv40_draw_elements_u16(nv40, map, start, count); + nv40_draw_elements_u16(nv40, map, mode, start, count); break; case 4: - nv40_draw_elements_u32(nv40, map, start, count); + nv40_draw_elements_u32(nv40, map, mode, start, count); break; default: NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); break; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); - ws->buffer_unmap(ws, ib); - return TRUE; } -- cgit v1.2.3 From 6fbc50e013f1ac7684d8d63d9433f6dd72b4c1cb Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 3 Apr 2008 08:21:56 +1000 Subject: nv40: static attribs -> stateobj --- src/gallium/drivers/nv40/nv40_context.h | 3 +- src/gallium/drivers/nv40/nv40_state_emit.c | 3 +- src/gallium/drivers/nv40/nv40_vbo.c | 62 ++++++++++++++++-------------- 3 files changed, 38 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 525eef8d63d..a1b5c88a063 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -56,7 +56,8 @@ enum nv40_state_index { NV40_STATE_VERTTEX3 = 30, NV40_STATE_VTXBUF = 31, NV40_STATE_VTXFMT = 32, - NV40_STATE_MAX = 33 + NV40_STATE_VTXATTR = 33, + NV40_STATE_MAX = 34 }; #include "nv40_screen.h" diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 722b9f31e6d..c742e4f421e 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -82,7 +82,8 @@ nv40_state_emit(struct nv40_context *nv40) if (!(states & (1ULL << i))) continue; so_ref (state->hw[i], &nv40->screen->state[i]); - so_emit(nv40->nvws, nv40->screen->state[i]); + if (state->hw[i]) + so_emit(nv40->nvws, nv40->screen->state[i]); states &= ~(1ULL << i); } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 8cc977a19a1..e5ee5f3c470 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -109,11 +109,12 @@ nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, } static boolean -nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, - struct pipe_vertex_element *ve, +nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, + int attrib, struct pipe_vertex_element *ve, struct pipe_vertex_buffer *vb) { struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; unsigned type, ncomp; void *map; @@ -128,31 +129,28 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, int attrib, { float *v = map; - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); switch (ncomp) { case 4: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(v[2]); - OUT_RINGf(v[3]); + so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + so_data (so, fui(v[3])); break; case 3: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(v[2]); - OUT_RINGf(1.0); + so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); break; case 2: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(0.0); - OUT_RINGf(1.0); + so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); break; case 1: - OUT_RINGf(v[0]); - OUT_RINGf(0.0); - OUT_RINGf(0.0); - OUT_RINGf(1.0); + so_method(so, curie, 0x1e40 + (attrib * 4), 1); + so_data (so, fui(v[0])); break; default: ws->buffer_unmap(ws, vb->buffer); @@ -487,7 +485,8 @@ static boolean nv40_vbo_validate(struct nv40_context *nv40) { struct nv40_vertex_program *vp = nv40->vertprog; - struct nouveau_stateobj *vtxbuf, *vtxfmt; + struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; + struct nouveau_grobj *curie = nv40->screen->curie; struct pipe_buffer *ib = nv40->idxbuf; unsigned ib_format = nv40->idxbuf_format; unsigned inputs, hw, num_hw; @@ -503,9 +502,9 @@ nv40_vbo_validate(struct nv40_context *nv40) num_hw++; vtxbuf = so_new(20, 18); - so_method(vtxbuf, nv40->screen->curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); + so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); vtxfmt = so_new(17, 0); - so_method(vtxfmt, nv40->screen->curie, NV40TCL_VTXFMT(0), num_hw); + so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), num_hw); inputs = vp->ir; for (hw = 0; hw < num_hw; hw++) { @@ -522,10 +521,15 @@ nv40_vbo_validate(struct nv40_context *nv40) ve = &nv40->vtxelt[hw]; vb = &nv40->vtxbuf[ve->vertex_buffer_index]; - if (!vb->pitch && nv40_vbo_static_attrib(nv40, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; + if (!vb->pitch) { + if (!sattr) + sattr = so_new(16 * 5, 0); + + if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } } if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { @@ -543,19 +547,21 @@ nv40_vbo_validate(struct nv40_context *nv40) } if (ib) { - so_method(vtxbuf, nv40->screen->curie, NV40TCL_IDXBUF_ADDRESS, 2); + so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2); so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, 0, NV40TCL_IDXBUF_FORMAT_DMA1); } - so_method(vtxbuf, nv40->screen->curie, 0x1710, 1); + so_method(vtxbuf, curie, 0x1710, 1); so_data (vtxbuf, 0); so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); + so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR); return FALSE; } -- cgit v1.2.3 From 0b57662fa6feb3d4571e4a3bc3a2243547595816 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 3 Apr 2008 09:39:00 +1000 Subject: nv40: remove redundant state_emit() calls --- src/gallium/drivers/nv40/nv40_vbo.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index e5ee5f3c470..8c446936ea1 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -375,8 +375,6 @@ nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_winsys *ws = pipe->winsys; void *map; - nv40_state_emit(nv40); - map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); @@ -468,7 +466,6 @@ nv40_draw_elements(struct pipe_context *pipe, return nv40_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); } - nv40_state_emit(nv40); if (idxbuf) { nv40_draw_elements_vbo(pipe, mode, start, count); -- cgit v1.2.3 From 217d37940771dd02ff1aa365105eca2c7a09d623 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 2 Apr 2008 14:01:42 -0600 Subject: cell: minor texture improvements Precompute tiles_per_row. Use ushort multiplies in a few places. New comments. --- src/gallium/drivers/cell/spu/spu_main.c | 2 ++ src/gallium/drivers/cell/spu/spu_main.h | 3 ++- src/gallium/drivers/cell/spu/spu_texture.c | 32 ++++++++++++++++++++---------- 3 files changed, 26 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 1ab1c40379a..e04ffeb9b16 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -343,6 +343,8 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].width = width; spu.texture[unit].height = height; + spu.texture[unit].tiles_per_row = width / TILE_SIZE; + spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; spu.texture[unit].tex_size_mask = (vector unsigned int) { width - 1, height - 1, 0, 0 }; diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index e9e39cbeab3..e962e1426c6 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -105,7 +105,8 @@ struct spu_framebuffer { struct spu_texture { void *start; - uint width, height; + ushort width, height; + ushort tiles_per_row; vector float tex_size; vector unsigned int tex_size_mask; /**< == int(size - 1) */ vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index ceab2469803..e9a2754e571 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -51,22 +51,25 @@ invalidate_tex_cache(void) static uint get_texel(uint unit, vec_uint4 coordinate) { + const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; + ushort x = spu_extract(coordinate, 0); + ushort y = spu_extract(coordinate, 1); + unsigned tile_offset = sizeof(tile_t) + * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE)); + ushort texel_offset = (ushort) 4 + * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE)); vec_uint4 tmp; - unsigned x = spu_extract(coordinate, 0); - unsigned y = spu_extract(coordinate, 1); - const unsigned tiles_per_row = spu.texture[unit].width / TILE_SIZE; - unsigned tile_offset = sizeof(tile_t) * ((y / TILE_SIZE * tiles_per_row) - + (x / TILE_SIZE)); - unsigned texel_offset = 4 * (((y % TILE_SIZE) * TILE_SIZE) - + (x % TILE_SIZE)); spu_dcache_fetch_unaligned((qword *) & tmp, - spu.texture[unit].start + tile_offset + texel_offset, + texture_ea + tile_offset + texel_offset, 4); return spu_extract(tmp, 0); } +/** + * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... + */ static void get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { @@ -76,7 +79,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) const qword offset_x = si_andi((qword) x, 0x1f); const qword offset_y = si_andi((qword) y, 0x1f); - const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].width / TILE_SIZE); + const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); const qword tile_size = (qword) spu_splats(sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); @@ -97,6 +100,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) texture_ea + spu_extract(offset, 3), 4); } + /** * Get texture sample at texcoord. * XXX this is extremely primitive for now. @@ -126,17 +130,25 @@ sample_texture_bilinear(uint unit, vector float texcoord) vec_uint4 texels[4]; + /* setup texcoords for quad: + * +-----+-----+ + * |x0,y0|x1,y1| + * +-----+-----+ + * |x2,y2|x3,y3| + * +-----+-----+ + */ vec_uint4 x = spu_splats(spu_extract(itc, 0)); vec_uint4 y = spu_splats(spu_extract(itc, 1)); - x = spu_add(x, offset_x); y = spu_add(y, offset_y); + /* GL_REPEAT wrap mode: */ x = spu_and(x, spu.texture[unit].tex_size_x_mask); y = spu_and(y, spu.texture[unit].tex_size_y_mask); get_four_texels(unit, x, y, texels); + /* integer A8R8G8B8 to float texel conversion */ vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0)); -- cgit v1.2.3 From a7504ad587ee8cbfa9958ad23321a691ce0823d3 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Wed, 2 Apr 2008 14:30:28 -0600 Subject: cell: added some comments/ideas about better texture sampling --- src/gallium/drivers/cell/spu/spu_texture.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index e9a2754e571..5051774f00c 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -48,9 +48,16 @@ invalidate_tex_cache(void) } +/** + * XXX look into getting texels for all four pixels in a quad at once. + */ static uint get_texel(uint unit, vec_uint4 coordinate) { + /* + * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as + * SIMD since X and Y are already in a SIMD register. + */ const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; ushort x = spu_extract(coordinate, 0); ushort y = spu_extract(coordinate, 1); @@ -69,6 +76,16 @@ get_texel(uint unit, vec_uint4 coordinate) /** * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... + * + * NOTE: in the typical case of bilinear filtering, the four texels + * are in a 2x2 group so we could get by with just two dcache fetches + * (two side-by-side texels per fetch). But when bilinear filtering + * wraps around a texture edge, we'll probably need code like we have + * now. + * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, + * it's quite likely that the four pixels in a quad will need some of the + * same texels. So look into doing texture fetches for four pixels at + * a time. */ static void get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) @@ -103,7 +120,6 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) /** * Get texture sample at texcoord. - * XXX this is extremely primitive for now. */ vector float sample_texture_nearest(uint unit, vector float texcoord) -- cgit v1.2.3 From fbb6cc7842ec8a59b60018233275babc4deb6765 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 4 Apr 2008 12:39:45 +1000 Subject: nouveau: in some cases don't create the buffer in local mem initially. --- src/gallium/drivers/nouveau/nouveau_winsys.h | 5 +++++ src/gallium/drivers/nv10/nv10_screen.c | 3 +++ src/gallium/drivers/nv30/nv30_screen.c | 3 +++ src/gallium/drivers/nv40/nv40_miptree.c | 4 +++- src/gallium/drivers/nv40/nv40_screen.c | 8 ++++++++ src/gallium/drivers/nv50/nv50_screen.c | 3 +++ src/gallium/winsys/dri/nouveau/nouveau_context.c | 11 ++++++++++- src/gallium/winsys/dri/nouveau/nouveau_context.h | 5 +++++ src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c | 2 ++ .../winsys/dri/nouveau/nouveau_winsys_pipe.c | 22 ++++++++++++++++++++-- 10 files changed, 62 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 2a5305f7ce4..fbde7adc6ad 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -13,6 +13,11 @@ #include "nouveau/nouveau_resource.h" #include "nouveau/nouveau_pushbuf.h" +#define NOUVEAU_CAP_HW_VTXBUF (0xbeef0000) +#define NOUVEAU_CAP_HW_IDXBUF (0xbeef0001) + +#define NOUVEAU_BUFFER_USAGE_TEXTURE (1 << 16) + struct nouveau_winsys { struct nouveau_context *nv; diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 80676ead1a9..45fbde62ea2 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -50,6 +50,9 @@ nv10_screen_get_param(struct pipe_screen *screen, int param) return 0; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 12; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 3ca50e4fbf4..c7487b37bc0 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -50,6 +50,9 @@ nv30_screen_get_param(struct pipe_screen *screen, int param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 502edc16298..1b192172232 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -68,7 +68,9 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) nv40_miptree_layout(mt); - mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->buffer = ws->buffer_create(ws, 256, + PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE, mt->total_size); if (!mt->buffer) { FREE(mt); diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index e98005f749f..a408d7262f3 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -27,6 +27,8 @@ nv40_screen_get_vendor(struct pipe_screen *pscreen) static int nv40_screen_get_param(struct pipe_screen *pscreen, int param) { + struct nv40_screen *screen = nv40_screen(pscreen); + switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return 16; @@ -54,6 +56,12 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case NOUVEAU_CAP_HW_VTXBUF: + return 1; + case NOUVEAU_CAP_HW_IDXBUF: + if (screen->curie->grclass == NV40TCL) + return 1; + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 586373a5c4b..adb724b9b7a 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -63,6 +63,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index cf1d83b18f4..aaeaebd2718 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -7,6 +7,7 @@ #include "state_tracker/st_context.h" #include "pipe/p_defines.h" #include "pipe/p_context.h" +#include "pipe/p_screen.h" #include "nouveau_context.h" #include "nouveau_dri.h" @@ -133,7 +134,7 @@ nouveau_context_create(const __GLcontextModes *glVis, /* G80 */ break; default: - NOUVEAU_ERR("Unsupported chipset: NV%02x\n", nv->chipset); + NOUVEAU_ERR("Unsupported chipset: NV%02x\n", (int)nv->chipset); return GL_FALSE; } @@ -255,9 +256,17 @@ nouveau_context_create(const __GLcontextModes *glVis, } if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { + struct pipe_screen *pscreen; + pipe = nouveau_pipe_create(nv); if (!pipe) NOUVEAU_ERR("Couldn't create hw pipe\n"); + pscreen = nvc->pscreen; + + nv->cap.hw_vertex_buffer = + pscreen->get_param(pscreen, NOUVEAU_CAP_HW_VTXBUF); + nv->cap.hw_index_buffer = + pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF); } if (!pipe) { diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.h b/src/gallium/winsys/dri/nouveau/nouveau_context.h index 92f551855a4..acb58fab448 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.h @@ -56,6 +56,11 @@ struct nouveau_context { struct nouveau_screen *nv_screen; struct pipe_surface *frontbuffer; + struct { + int hw_vertex_buffer; + int hw_index_buffer; + } cap; + /* Hardware context */ struct nouveau_channel_context *nvc; int pctx_id; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c b/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c index 2e3ac5492fc..78919bdee8c 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c @@ -137,6 +137,8 @@ nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min) if (nvpb->base.remaining == nvpb->size) return 0; + nouveau_fence_flush(chan); + nvpb->size -= nvpb->base.remaining; nvchan->dma->cur += nvpb->size; nvchan->dma->free -= nvpb->size; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c index b1bf9c521aa..849e38d22b6 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c @@ -79,9 +79,10 @@ nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, unsigned usage, unsigned size) { struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; - struct nouveau_device *dev = nvpws->nv->nv_screen->device; + struct nouveau_context *nv = nvpws->nv; + struct nouveau_device *dev = nv->nv_screen->device; struct nouveau_pipe_buffer *nvbuf; - uint32_t flags = 0; + uint32_t flags; nvbuf = calloc(1, sizeof(*nvbuf)); if (!nvbuf) @@ -92,6 +93,23 @@ nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, nvbuf->base.size = size; flags = NOUVEAU_BO_LOCAL; + + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) + flags |= NOUVEAU_BO_GART; + flags |= NOUVEAU_BO_VRAM; + } + + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + if (nv->cap.hw_vertex_buffer) + flags |= NOUVEAU_BO_GART; + } + + if (usage & PIPE_BUFFER_USAGE_INDEX) { + if (nv->cap.hw_index_buffer) + flags |= NOUVEAU_BO_GART; + } + if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { free(nvbuf); return NULL; -- cgit v1.2.3 From a45a12e757a8f4d41daea2a3f632d4772ff69e38 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 4 Apr 2008 13:02:37 +1000 Subject: nv40: have test for hw idxbuf in single place --- src/gallium/drivers/nv40/nv40_vbo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 8c446936ea1..ec88470b317 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -75,6 +75,7 @@ static boolean nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, unsigned ib_size) { + struct pipe_screen *pscreen = &nv40->screen->pipe; unsigned type; if (!ib) { @@ -83,8 +84,7 @@ nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, return FALSE; } - /* No support for 8bit indices, no support at all on 0x4497 chips */ - if (nv40->screen->curie->grclass == NV44TCL || ib_size == 1) + if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) return FALSE; switch (ib_size) { -- cgit v1.2.3 From 2946a5a012f494bad280a0ecf082d81ed4e89c3b Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 4 Apr 2008 20:32:06 +1000 Subject: nv40: kill some warnings --- src/gallium/drivers/nv40/nv40_state.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 5dc2991212d..997beca883a 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -421,7 +421,6 @@ static void nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_rasterizer_state *rsso = hwcso; nv40->rasterizer = hwcso; nv40->dirty |= NV40_NEW_RAST; @@ -527,7 +526,6 @@ static void nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp = hwcso; nv40->vertprog = hwcso; nv40->dirty |= NV40_NEW_VERTPROG; -- cgit v1.2.3 From 2655f6901289bcfe3835cf28d7b9eefa242045b8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 7 Apr 2008 20:10:40 +1000 Subject: nv40: implement user clip planes It turns out the user planes handed to the driver are already in clip space. Hence, we no longer need to transform incoming vertices before computing the clip distance, and no longer need to change the interface provided by gallium. Yay :) The clip state change handling could be better, but this works. --- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 4 - src/gallium/drivers/nv40/nv40_draw.c | 2 +- src/gallium/drivers/nv40/nv40_state.h | 3 + src/gallium/drivers/nv40/nv40_state_clip.c | 22 ---- src/gallium/drivers/nv40/nv40_state_emit.c | 8 +- src/gallium/drivers/nv40/nv40_vertprog.c | 159 +++++++++++++++-------------- 7 files changed, 87 insertions(+), 112 deletions(-) delete mode 100644 src/gallium/drivers/nv40/nv40_state_clip.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 3369a21574f..9c8eadf7e44 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -14,7 +14,6 @@ DRIVER_SOURCES = \ nv40_screen.c \ nv40_state.c \ nv40_state_blend.c \ - nv40_state_clip.c \ nv40_state_emit.c \ nv40_state_fb.c \ nv40_state_rasterizer.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index a1b5c88a063..436351b6bcc 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -76,9 +76,6 @@ enum nv40_state_index { #define NV40_NEW_ARRAYS (1 << 11) #define NV40_NEW_UCP (1 << 12) -#define NV40_FALLBACK_TNL (1 << 0) -#define NV40_FALLBACK_RAST (1 << 1) - struct nv40_rasterizer_state { struct pipe_rasterizer_state pipe; struct nouveau_stateobj *so; @@ -203,7 +200,6 @@ extern void nv40_fragtex_bind(struct nv40_context *); extern boolean nv40_state_validate(struct nv40_context *nv40); extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); extern void nv40_state_emit(struct nv40_context *nv40); -extern struct nv40_state_entry nv40_state_clip; extern struct nv40_state_entry nv40_state_rasterizer; extern struct nv40_state_entry nv40_state_scissor; extern struct nv40_state_entry nv40_state_stipple; diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 9cd8fa6a497..7f008aca3a8 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -236,7 +236,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, if (!nv40_state_validate_swtnl(nv40)) return FALSE; - nv40->dirty &= ~(1ULL << NV40_STATE_VTXBUF); + nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); for (i = 0; i < PIPE_ATTRIB_MAX; i++) { diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index e018464c9f8..2b4225deb2e 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -29,6 +29,9 @@ struct nv40_vertex_program { struct draw_vertex_shader *draw; boolean translated; + + struct pipe_clip_state ucp; + struct nv40_vertex_program_exec *insns; unsigned nr_insns; struct nv40_vertex_program_data *consts; diff --git a/src/gallium/drivers/nv40/nv40_state_clip.c b/src/gallium/drivers/nv40/nv40_state_clip.c deleted file mode 100644 index c52390f9edb..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_clip.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_clip_validate(struct nv40_context *nv40) -{ - - if (nv40->render_mode == HW) { - nv40->fallback_swtnl &= ~NV40_NEW_UCP; - if (nv40->clip.nr) - nv40->fallback_swtnl |= NV40_NEW_UCP; - } - - return FALSE; -} - -struct nv40_state_entry nv40_state_clip = { - .validate = nv40_state_clip_validate, - .dirty = { - .pipe = NV40_NEW_UCP, - .hw = 0 - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index c742e4f421e..864dfc2e0c3 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -5,7 +5,6 @@ static struct nv40_state_entry *render_states[] = { &nv40_state_framebuffer, &nv40_state_rasterizer, - &nv40_state_clip, &nv40_state_scissor, &nv40_state_stipple, &nv40_state_fragprog, @@ -22,7 +21,6 @@ static struct nv40_state_entry *render_states[] = { static struct nv40_state_entry *swtnl_states[] = { &nv40_state_framebuffer, &nv40_state_rasterizer, - &nv40_state_clip, &nv40_state_scissor, &nv40_state_stipple, &nv40_state_fragprog, @@ -127,8 +125,7 @@ nv40_state_validate(struct nv40_context *nv40) nv40->pipe.flush(&nv40->pipe, 0, NULL); nv40->dirty |= (NV40_NEW_VIEWPORT | NV40_NEW_VERTPROG | - NV40_NEW_ARRAYS | - NV40_NEW_UCP); + NV40_NEW_ARRAYS); nv40->render_mode = HW; } @@ -153,8 +150,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40) nv40->pipe.flush(&nv40->pipe, 0, NULL); nv40->dirty |= (NV40_NEW_VIEWPORT | NV40_NEW_VERTPROG | - NV40_NEW_ARRAYS | - NV40_NEW_UCP); + NV40_NEW_ARRAYS); nv40->render_mode = SWTNL; } diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 08d3f387e09..e10250528e2 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -52,6 +52,8 @@ struct nv40_vpc { struct nv40_sreg *imm; unsigned nr_imm; + + unsigned hpos_idx; }; static struct nv40_sreg @@ -423,11 +425,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, int ai = -1, ci = -1, ii = -1; int i; - struct { - struct nv40_sreg dst; - unsigned m; - } clip; - if (finst->Instruction.Opcode == TGSI_OPCODE_END) return TRUE; @@ -501,47 +498,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); - /* If writing to clip distance regs, need to modify instruction to - * change which component is written to. On NV40 the clip regs - * are the unused components (yzw) of FOGC/PSZ. - */ - clip.dst = none; - if (dst.type == NV40SR_OUTPUT && - dst.index >= NV40_VP_INST_DEST_CLIP(0) && - dst.index <= NV40_VP_INST_DEST_CLIP(5)) { - unsigned n = dst.index - NV40_VP_INST_DEST_CLIP(0); - unsigned m[] = - { MASK_Y, MASK_Z, MASK_W, MASK_Y, MASK_Z, MASK_W }; - - /* Some instructions we can get away with swizzling and/or - * changing the writemask. Others, we'll use a temp reg. - */ - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_DST: - case TGSI_OPCODE_EXP: - case TGSI_OPCODE_LIT: - case TGSI_OPCODE_LOG: - case TGSI_OPCODE_XPD: - clip.dst = dst; - clip.m = m[n]; - dst = temp(vpc); - break; - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - case TGSI_OPCODE_POW: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - mask = m[n]; - break; - default: - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) - src[i] = swz(src[i], X, X, X, X); - mask = m[n]; - break; - } - } - switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); @@ -639,11 +595,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, return FALSE; } - if (clip.dst.type != NV40SR_NONE) { - arith(vpc, 0, OP_MOV, clip.dst, clip.m, - swz(dst, X, X, X, X), none, none); - } - release_temps(vpc); return TRUE; } @@ -658,6 +609,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, switch (fdec->Semantic.SemanticName) { case TGSI_SEMANTIC_POSITION: hw = NV40_VP_INST_DEST_POS; + vpc->hpos_idx = idx; break; case TGSI_SEMANTIC_COLOR: if (fdec->Semantic.SemanticIndex == 0) { @@ -695,15 +647,6 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, return FALSE; } break; -#if 0 - case TGSI_SEMANTIC_CLIP: - if (fdec->Semantic.SemanticIndex >= 6) { - NOUVEAU_ERR("bad clip distance index\n"); - return FALSE; - } - hw = NV40_VP_INST_DEST_CLIP(fdec->Semantic.SemanticIndex); - break; -#endif default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; @@ -748,6 +691,10 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) } break; #endif + case TGSI_FILE_OUTPUT: + if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + return FALSE; + break; default: break; } @@ -803,6 +750,8 @@ nv40_vertprog_translate(struct nv40_context *nv40, { struct tgsi_parse_context parse; struct nv40_vpc *vpc = NULL; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int i; vpc = CALLOC(1, sizeof(struct nv40_vpc)); if (!vpc) @@ -814,26 +763,21 @@ nv40_vertprog_translate(struct nv40_context *nv40, return; } + /* Redirect post-transform vertex position to a temp if user clip + * planes are enabled. We need to append code the the vtxprog + * to handle clip planes later. + */ + if (vp->ucp.nr) { + vpc->r_result[vpc->hpos_idx] = temp(vpc); + vpc->r_temps_discard = 0; + } + tgsi_parse_init(&parse, vp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv40_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; case TGSI_TOKEN_TYPE_IMMEDIATE: { const struct tgsi_full_immediate *imm; @@ -862,6 +806,39 @@ nv40_vertprog_translate(struct nv40_context *nv40, } } + /* Write out HPOS if it was redirected to a temp earlier */ + if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { + struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_POS); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + + arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); + } + + /* Insert code to handle user clip planes */ + for (i = 0; i < vp->ucp.nr; i++) { + struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_CLIP(i)); + struct nv40_sreg ceqn = constant(vpc, -1, + nv40->clip.ucp[i][0], + nv40->clip.ucp[i][1], + nv40->clip.ucp[i][2], + nv40->clip.ucp[i][3]); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + unsigned mask; + + switch (i) { + case 0: case 3: mask = MASK_Y; break; + case 1: case 4: mask = MASK_Z; break; + case 2: case 5: mask = MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + + arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); + } + vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; vp->translated = TRUE; out_err: @@ -883,6 +860,12 @@ nv40_vertprog_validate(struct nv40_context *nv40) if (nv40->render_mode == HW) { vp = nv40->vertprog; constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + + if ((nv40->dirty & NV40_NEW_UCP) || + memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { + nv40_vertprog_destroy(nv40, vp); + memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); + } } else { vp = nv40->swtnl.vertprog; constbuf = NULL; @@ -1045,16 +1028,36 @@ check_gpu_resources: void nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) { - if (vp->nr_consts) - FREE(vp->consts); - if (vp->nr_insns) + struct nouveau_winsys *nvws = nv40->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = vp->clip_ctrl = 0; + so_ref(NULL, &vp->so); } struct nv40_state_entry nv40_state_vertprog = { .validate = nv40_vertprog_validate, .dirty = { - .pipe = NV40_NEW_VERTPROG, + .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, .hw = NV40_STATE_VERTPROG, } }; -- cgit v1.2.3 From c7daa68ca312cc98abe351be2fef8d8246929627 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 7 Apr 2008 21:59:12 -0600 Subject: gallium: begin reworking quad stages for multiple color outputs --- src/gallium/drivers/softpipe/sp_context.h | 2 - src/gallium/drivers/softpipe/sp_fs_llvm.c | 16 +- src/gallium/drivers/softpipe/sp_headers.h | 4 +- src/gallium/drivers/softpipe/sp_quad.c | 9 - src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 5 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 1192 +++++++++++---------- src/gallium/drivers/softpipe/sp_quad_bufloop.c | 4 +- src/gallium/drivers/softpipe/sp_quad_colormask.c | 59 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 15 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 60 +- src/gallium/drivers/softpipe/sp_quad_output.c | 39 +- 11 files changed, 734 insertions(+), 671 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index dc9d0e6d5d6..9724d185d15 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -138,8 +138,6 @@ struct softpipe_context { struct draw_stage *vbuf; struct softpipe_vbuf_render *vbuf_render; - uint current_cbuf; /**< current color buffer being written to */ - struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; struct softpipe_tile_cache *zsbuf_cache; diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 07d058155f2..6e1d9280bb4 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -99,19 +99,19 @@ shade_quad_llvm(struct quad_stage *qs, allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); for (i = 0; i < QUAD_SIZE; ++i) { - quad->outputs.color[0][i] = dests[i][qss->colorOutSlot][0]; - quad->outputs.color[1][i] = dests[i][qss->colorOutSlot][1]; - quad->outputs.color[2][i] = dests[i][qss->colorOutSlot][2]; - quad->outputs.color[3][i] = dests[i][qss->colorOutSlot][3]; + quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0]; + quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1]; + quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2]; + quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3]; } } #if DLLVM for (int i = 0; i < QUAD_SIZE; ++i) { debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, - quad->outputs.color[0][i], - quad->outputs.color[1][i], - quad->outputs.color[2][i], - quad->outputs.color[3][i]); + quad->outputs.color[0][0][i], + quad->outputs.color[0][1][i], + quad->outputs.color[0][2][i], + quad->outputs.color[0][3][i]); } #endif diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index 9cf82221333..3d9ede69bbd 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -31,6 +31,7 @@ #ifndef SP_HEADERS_H #define SP_HEADERS_H +#include "pipe/p_state.h" #include "tgsi/exec/tgsi_exec.h" #define PRIM_POINT 1 @@ -66,7 +67,8 @@ struct quad_header { unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ struct { - float color[NUM_CHANNELS][QUAD_SIZE]; /* rrrr, gggg, bbbb, aaaa */ + /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ + float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; float depth[QUAD_SIZE]; } outputs; diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 8603c1a3677..bc83d78ea16 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -76,15 +76,6 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp_push_quad_first( sp, sp->quad.blend ); } - if (sp->framebuffer.num_cbufs == 1) { - /* the usual case: write to exactly one colorbuf */ - sp->current_cbuf = 0; - } - else { - /* insert bufloop stage */ - sp_push_quad_first( sp, sp->quad.bufloop ); - } - if (sp->depth_stencil->depth.occlusion_count) { sp_push_quad_first( sp, sp->quad.occlusion ); } diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 318916fe301..7a42b08ef53 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -16,7 +16,8 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_context *softpipe = qs->softpipe; const float ref = softpipe->depth_stencil->alpha.ref; unsigned passMask = 0x0, j; - const float *aaaa = quad->outputs.color[3]; + const uint cbuf = 0; /* only output[0].alpha is tested */ + const float *aaaa = quad->outputs.color[cbuf][3]; switch (softpipe->depth_stencil->alpha.func) { case PIPE_FUNC_NEVER: @@ -25,7 +26,7 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) case PIPE_FUNC_LESS: /* * If mask were an array [4] we could do this SIMD-style: - * passMask = (quad->outputs.color[3] <= vec4(ref)); + * passMask = (quad->outputs.color[0][3] <= vec4(ref)); */ for (j = 0; j < QUAD_SIZE; j++) { if (aaaa[j] < ref) { diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 0b79cfa1ed2..8be8025f40c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -101,114 +101,119 @@ static void logicop_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - float dest[4][QUAD_SIZE]; - ubyte src[4][4], dst[4][4], res[4][4]; - uint *src4 = (uint *) src; - uint *dst4 = (uint *) dst; - uint *res4 = (uint *) res; - struct softpipe_cached_tile * - tile = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[softpipe->current_cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color; - uint i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float dest[4][QUAD_SIZE]; + ubyte src[4][4], dst[4][4], res[4][4]; + uint *src4 = (uint *) src; + uint *dst4 = (uint *) dst; + uint *res4 = (uint *) res; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - } - /* convert to ubyte */ - for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ - - UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ - } + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ + UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ + + UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ + UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ + } - switch (softpipe->blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: - for (j = 0; j < 4; j++) - res4[j] = 0; - break; - case PIPE_LOGICOP_NOR: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] | dst4[j]); - break; - case PIPE_LOGICOP_AND_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_COPY_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j]; - break; - case PIPE_LOGICOP_AND_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & ~dst4[j]; - break; - case PIPE_LOGICOP_INVERT: - for (j = 0; j < 4; j++) - res4[j] = ~dst4[j]; - break; - case PIPE_LOGICOP_XOR: - for (j = 0; j < 4; j++) - res4[j] = dst4[j] ^ src4[j]; - break; - case PIPE_LOGICOP_NAND: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] & dst4[j]); - break; - case PIPE_LOGICOP_AND: - for (j = 0; j < 4; j++) - res4[j] = src4[j] & dst4[j]; - break; - case PIPE_LOGICOP_EQUIV: - for (j = 0; j < 4; j++) - res4[j] = ~(src4[j] ^ dst4[j]); - break; - case PIPE_LOGICOP_NOOP: - for (j = 0; j < 4; j++) - res4[j] = dst4[j]; - break; - case PIPE_LOGICOP_OR_INVERTED: - for (j = 0; j < 4; j++) - res4[j] = ~src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_COPY: - for (j = 0; j < 4; j++) - res4[j] = src4[j]; - break; - case PIPE_LOGICOP_OR_REVERSE: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | ~dst4[j]; - break; - case PIPE_LOGICOP_OR: - for (j = 0; j < 4; j++) - res4[j] = src4[j] | dst4[j]; - break; - case PIPE_LOGICOP_SET: - for (j = 0; j < 4; j++) - res4[j] = ~0; - break; - default: - assert(0); - } + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } - for (j = 0; j < 4; j++) { - quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); - quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); - quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); - quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + for (j = 0; j < 4; j++) { + quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); + quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); + quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); + quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + } } /* pass quad to next stage */ @@ -221,504 +226,511 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) static void blend_quad(struct quad_stage *qs, struct quad_header *quad) { - struct softpipe_context *softpipe = qs->softpipe; static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; - float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[softpipe->current_cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color; - uint i, j; - - if (softpipe->blend->logicop_enable) { - logicop_quad(qs, quad); - return; - } - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color[cbuf]; + uint i, j; + + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quad); + return; } - } - /* - * Compute src/first term RGB - */ - switch (softpipe->blend->rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[0], quadColor[0]); /* R */ - VEC4_COPY(source[1], quadColor[1]); /* G */ - VEC4_COPY(source[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ - VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ - VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - { - const float *alpha = dest[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(source[0], alpha, diff); /* R */ - VEC4_MIN(source[1], alpha, diff); /* G */ - VEC4_MIN(source[2], alpha, diff); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], comp); /* R */ - VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], comp); /* G */ - VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float alpha[4]; - VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_SRC1_ALPHA: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[0], zero); /* R */ - VEC4_COPY(source[1], zero); /* G */ - VEC4_COPY(source[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(source[0], quadColor[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(source[1], quadColor[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(source[2], quadColor[2], inv_comp); - } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_alpha[4]; - VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - assert(0); /* to do */ - break; - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - assert(0); /* to do */ - break; - default: - assert(0); - } - - /* - * Compute src/first term A - */ - switch (softpipe->blend->alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - VEC4_COPY(source[3], quadColor[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - { - const float *alpha = quadColor[3]; - VEC4_MUL(source[3], quadColor[3], alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(source[3], alpha, diff); /* A */ - } - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(source[3], quadColor[3], comp); /* A */ + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(source[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, quadColor[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - /* A */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(source[3], quadColor[3], inv_comp); - } - break; - default: - assert(0); - } - - - /* - * Compute dest/second term RGB - */ - switch (softpipe->blend->rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ - VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ - VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ - VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ - VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[0], dest[0], comp); /* R */ - VEC4_MUL(dest[1], dest[1], comp); /* G */ - VEC4_MUL(dest[2], dest[2], comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[0], zero); /* R */ - VEC4_COPY(dest[1], zero); /* G */ - VEC4_COPY(dest[2], zero); /* B */ - break; - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ - VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ - VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[0]); /* R */ - VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ - VEC4_SUB(inv_comp, one, dest[1]); /* G */ - VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ - VEC4_SUB(inv_comp, one, dest[2]); /* B */ - VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ - } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - { - float inv_comp[4]; - /* R */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); - VEC4_MUL(dest[0], dest[0], inv_comp); - /* G */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); - VEC4_MUL(dest[1], dest[1], inv_comp); - /* B */ - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); - VEC4_MUL(dest[2], dest[2], inv_comp); + + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(source[0], alpha, diff); /* R */ + VEC4_MIN(source[1], alpha, diff); /* G */ + VEC4_MIN(source[2], alpha, diff); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float alpha[4]; + VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(source[0], quadColor[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(source[1], quadColor[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(source[2], quadColor[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_alpha[4]; + VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[0], dest[0], inv_comp); - VEC4_MUL(dest[1], dest[1], inv_comp); - VEC4_MUL(dest[2], dest[2], inv_comp); + + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(source[3], alpha, diff); /* A */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(source[3], quadColor[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + /* A */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[3], quadColor[3], inv_comp); + } + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - /* XXX what are these? */ - assert(0); - break; - default: - assert(0); - } - - /* - * Compute dest/second term A - */ - switch (softpipe->blend->alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* dest = dest * 1 NO-OP, leave dest as-is */ - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_SRC_ALPHA: - VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - assert(0); /* illegal */ - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - { - float comp[4]; - VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ - VEC4_MUL(dest[3], dest[3], comp); /* A */ + + + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(dest[0], dest[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(dest[1], dest[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_ZERO: - VEC4_COPY(dest[3], zero); /* A */ - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - { - float one_minus_alpha[QUAD_SIZE]; - VEC4_SUB(one_minus_alpha, one, quadColor[3]); - VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[3], dest[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[3], dest[3], inv_comp); + } + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); } - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - { - float inv_comp[4]; - VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); - VEC4_MUL(dest[3], dest[3], inv_comp); + + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + assert(0); } - break; - default: - assert(0); - } - - /* - * Combine RGB terms - */ - switch (softpipe->blend->rgb_func) { - case PIPE_BLEND_ADD: - VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ - VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ - VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ - VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ - VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ - VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ - VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ - VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ - VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ - break; - default: - assert(0); - } - - /* - * Combine A terms - */ - switch (softpipe->blend->alpha_func) { - case PIPE_BLEND_ADD: - VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_SUBTRACT: - VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ - break; - case PIPE_BLEND_MIN: - VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ - break; - case PIPE_BLEND_MAX: - VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ - break; - default: - assert(0); - } - + + } /* cbuf loop */ + /* pass blended quad to next stage */ qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c index 2ae4e22a7de..b3db428ef18 100644 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -13,7 +13,7 @@ static void cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - float tmp[4][QUAD_SIZE]; + float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE]; unsigned i; assert(sizeof(quad->outputs.color) == sizeof(tmp)); @@ -30,7 +30,9 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { /* set current cbuffer */ +#if 0 /* obsolete & going away */ softpipe->current_cbuf = i; +#endif /* pass blended quad to next stage */ qs->next->run(qs->next, quad); diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index 1f09d900ca8..7fe080990bd 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -47,38 +47,43 @@ static void colormask_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - float dest[4][QUAD_SIZE]; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[softpipe->current_cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color; - uint i, j; - - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); - for (i = 0; i < 4; i++) { - dest[i][j] = tile->data.color[y][x][i]; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } } - } - /* R */ - if (!(softpipe->blend->colormask & PIPE_MASK_R)) - COPY_4V(quadColor[0], dest[0]); + /* R */ + if (!(softpipe->blend->colormask & PIPE_MASK_R)) + COPY_4V(quadColor[0], dest[0]); - /* G */ - if (!(softpipe->blend->colormask & PIPE_MASK_G)) - COPY_4V(quadColor[1], dest[1]); + /* G */ + if (!(softpipe->blend->colormask & PIPE_MASK_G)) + COPY_4V(quadColor[1], dest[1]); - /* B */ - if (!(softpipe->blend->colormask & PIPE_MASK_B)) - COPY_4V(quadColor[2], dest[2]); + /* B */ + if (!(softpipe->blend->colormask & PIPE_MASK_B)) + COPY_4V(quadColor[2], dest[2]); - /* A */ - if (!(softpipe->blend->colormask & PIPE_MASK_A)) - COPY_4V(quadColor[3], dest[3]); + /* A */ + if (!(softpipe->blend->colormask & PIPE_MASK_A)) + COPY_4V(quadColor[3], dest[3]); + } /* pass quad to next stage */ qs->next->run(qs->next, quad); diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index b3d3fae22fd..dd5ebb22961 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -50,12 +50,17 @@ coverage_quad(struct quad_stage *qs, struct quad_header *quad) if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) || (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) || (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) { - float (*quadColor)[4] = quad->outputs.color; - unsigned j; - for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->coverage[j] >= 0.0); - assert(quad->coverage[j] <= 1.0); + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float (*quadColor)[4] = quad->outputs.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->coverage[j] >= 0.0); + assert(quad->coverage[j] <= 1.0); quadColor[3][j] *= quad->coverage[j]; + } } } diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index a73df31383f..9a20c056a22 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -88,22 +88,64 @@ shade_quad( &qss->machine, quad ); - /* store result color */ +#if 0 /* XXX multi color outputs - untested */ + /* store outputs */ + boolean z_written = FALSE; + { + const ubyte *sem_name = softpipe->fs->info.output_semantic_name; + const ubyte *sem_index = softpipe->fs->info.output_semantic_index; + const uint n = qss->stage.softpipe->fs->info.num_outputs; + uint i; + for (i = 0; i < n; i++) { + switch (sem_name[i]) { + case TGSI_SEMANTIC_COLOR: + { + uint cbuf = sem_index[i]; + memcpy(quad->outputs.color[cbuf], + &machine->Outputs[i].xyzw[0].f[0], + sizeof(quad->outputs.color[0]) ); + } + break; + case TGSI_SEMANTIC_POSITION: + { + uint j; + for (j = 0; j < 4; j++) { + quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + } + z_written = TRUE; + } + break; + } + } + } + + if (!z_written) { + /* compute Z values now, as in the quad earlyz stage */ + /* XXX we should really only do this if the earlyz stage is not used */ + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->outputs.depth[0] = z0; + quad->outputs.depth[1] = z0 + dzdx; + quad->outputs.depth[2] = z0 + dzdy; + quad->outputs.depth[3] = z0 + dzdx + dzdy; + } +#endif + + /* store result color(s) */ if (qss->colorOutSlot >= 0) { /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] + assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot] == TGSI_SEMANTIC_COLOR); memcpy( - quad->outputs.color, + quad->outputs.color[0], &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], - sizeof( quad->outputs.color ) ); + sizeof( quad->outputs.color[0] ) ); } - /* - * XXX the following code for updating quad->outputs.depth - * isn't really needed if we did early z testing. - */ - /* store result Z */ if (qss->depthOutSlot >= 0) { /* output[slot] is new Z */ diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index cfe8f118081..40083138a43 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -34,31 +34,36 @@ /** - * Write quad to framebuffer, taking mask into account. - * - * Note that surfaces support only full quad reads and writes. + * Last step of quad processing: write quad colors to the framebuffer, + * taking mask into account. */ static void output_quad(struct quad_stage *qs, struct quad_header *quad) { - struct softpipe_context *softpipe = qs->softpipe; - struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, - softpipe->cbuf_cache[softpipe->current_cbuf], - quad->x0, quad->y0); /* in-tile pos: */ const int itx = quad->x0 % TILE_SIZE; const int ity = quad->y0 % TILE_SIZE; - float (*quadColor)[4] = quad->outputs.color; - int i, j; - /* get/swizzle dest colors */ - for (j = 0; j < QUAD_SIZE; j++) { - if (quad->mask & (1 << j)) { - int x = itx + (j & 1); - int y = ity + (j >> 1); - for (i = 0; i < 4; i++) { /* loop over color chans */ - tile->data.color[y][x][i] = quadColor[i][j]; + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->x0, quad->y0); + float (*quadColor)[4] = quad->outputs.color[cbuf]; + int i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } } } } -- cgit v1.2.3 From 17f640990350823f530257ee1b1fa7fa1f83493f Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 7 Apr 2008 22:00:41 -0600 Subject: gallium: get rid of bufloop quad stage --- src/gallium/drivers/softpipe/Makefile | 1 - src/gallium/drivers/softpipe/SConscript | 1 - src/gallium/drivers/softpipe/sp_context.c | 2 -- src/gallium/drivers/softpipe/sp_context.h | 1 - src/gallium/drivers/softpipe/sp_quad.h | 1 - 5 files changed, 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index f32db35d587..19dfd8c1a7a 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -17,7 +17,6 @@ C_SOURCES = \ sp_quad.c \ sp_quad_alpha_test.c \ sp_quad_blend.c \ - sp_quad_bufloop.c \ sp_quad_colormask.c \ sp_quad_coverage.c \ sp_quad_depth_test.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 88c21ee3b0f..06931fa8d8b 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -16,7 +16,6 @@ softpipe = env.ConvenienceLibrary( 'sp_prim_vbuf.c', 'sp_quad_alpha_test.c', 'sp_quad_blend.c', - 'sp_quad_bufloop.c', 'sp_quad.c', 'sp_quad_colormask.c', 'sp_quad_coverage.c', diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index e298ed37c36..8c84ddbe196 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -98,7 +98,6 @@ static void softpipe_destroy( struct pipe_context *pipe ) softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); softpipe->quad.coverage->destroy( softpipe->quad.coverage ); - softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); softpipe->quad.blend->destroy( softpipe->quad.blend ); softpipe->quad.colormask->destroy( softpipe->quad.colormask ); softpipe->quad.output->destroy( softpipe->quad.output ); @@ -207,7 +206,6 @@ softpipe_create( struct pipe_screen *screen, softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); softpipe->quad.blend = sp_quad_blend_stage(softpipe); softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); softpipe->quad.output = sp_quad_output_stage(softpipe); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 9724d185d15..6f0b234d893 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -124,7 +124,6 @@ struct softpipe_context { struct quad_stage *depth_test; struct quad_stage *occlusion; struct quad_stage *coverage; - struct quad_stage *bufloop; struct quad_stage *blend; struct quad_stage *colormask; struct quad_stage *output; diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index f1e0281764f..08513cb95f1 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -58,7 +58,6 @@ struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ); struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ); struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ); struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ); struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ); struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); -- cgit v1.2.3 From 28cf8c8fdcbd2817b93f27cad2bba3b4fb8aecf5 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 8 Apr 2008 21:07:14 -0600 Subject: gallium: keep track of num_vertex_attribs/buffers for shorter loops --- src/gallium/drivers/softpipe/sp_context.h | 2 ++ src/gallium/drivers/softpipe/sp_draw_arrays.c | 22 +++++++++------------- src/gallium/drivers/softpipe/sp_state_vertex.c | 2 ++ 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 6f0b234d893..2442bd1eb00 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -83,6 +83,8 @@ struct softpipe_context { unsigned num_samplers; unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; /* Counter for occlusion queries. Note this supports overlapping * queries. diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index ab54050d3fe..61bcf518993 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -125,14 +125,12 @@ softpipe_draw_elements(struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (sp->vertex_buffer[i].buffer) { - void *buf - = pipe->winsys->buffer_map(pipe->winsys, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(draw, i, buf); - } + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { @@ -153,11 +151,9 @@ softpipe_draw_elements(struct pipe_context *pipe, /* * unmap vertex/index buffers - will cause draw module to flush */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (sp->vertex_buffer[i].buffer) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); - } + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index e0230e16a4e..46b6991195d 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -47,6 +47,7 @@ softpipe_set_vertex_elements(struct pipe_context *pipe, memcpy(softpipe->vertex_element, attribs, count * sizeof(struct pipe_vertex_element)); + softpipe->num_vertex_elements = count; softpipe->dirty |= SP_NEW_VERTEX; @@ -64,6 +65,7 @@ softpipe_set_vertex_buffers(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); + softpipe->num_vertex_buffers = count; softpipe->dirty |= SP_NEW_VERTEX; -- cgit v1.2.3 From 1f888abf16ce4de00231505b8d1bc68426b04e8f Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 8 Apr 2008 21:14:58 -0600 Subject: i915: keep track of num_vertex_attribs/buffers for shorter loops --- src/gallium/drivers/i915simple/i915_context.c | 22 +++++++++------------- src/gallium/drivers/i915simple/i915_context.h | 2 ++ src/gallium/drivers/i915simple/i915_state.c | 3 +++ 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index fee33d82de4..58a5854f0d9 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -65,14 +65,12 @@ i915_draw_elements( struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (i915->vertex_buffer[i].buffer) { - void *buf - = pipe->winsys->buffer_map(pipe->winsys, - i915->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(draw, i, buf); - } + for (i = 0; i < i915->num_vertex_buffers; i++) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + i915->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { @@ -96,11 +94,9 @@ i915_draw_elements( struct pipe_context *pipe, /* * unmap vertex/index buffers */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (i915->vertex_buffer[i].buffer) { - pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer); - draw_set_mapped_vertex_buffer(draw, i, NULL); - } + for (i = 0; i < i915->num_vertex_buffers; i++) { + pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); } if (indexBuffer) { pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 8e707ea574a..38e6a348845 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -238,6 +238,8 @@ struct i915_context unsigned num_samplers; unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; unsigned *batch_start; diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 4404bc45901..24d31ad740e 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -694,6 +694,8 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, struct i915_context *i915 = i915_context(pipe); memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); + i915->num_vertex_buffers = count; + /* pass-through to draw module */ draw_set_vertex_buffers(i915->draw, count, buffers); } @@ -703,6 +705,7 @@ static void i915_set_vertex_elements(struct pipe_context *pipe, const struct pipe_vertex_element *elements) { struct i915_context *i915 = i915_context(pipe); + i915->num_vertex_elements = count; /* pass-through to draw module */ draw_set_vertex_elements(i915->draw, count, elements); } -- cgit v1.2.3 From 7e57a9e8bba322b2ba8a02eec4b79c90e7052738 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Tue, 8 Apr 2008 21:18:21 -0600 Subject: cell: keep track of num_vertex_attribs/buffers for shorter loops --- src/gallium/drivers/cell/ppu/cell_context.h | 2 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 22 +++++++++------------- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 ++ 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 7f656a97447..8df41c1d4ca 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -104,7 +104,9 @@ struct cell_context uint num_textures; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + uint num_vertex_buffers; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + uint num_vertex_elements; ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index b896252f817..36af5be5f02 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -123,14 +123,12 @@ cell_draw_elements(struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (sp->vertex_buffer[i].buffer) { - void *buf = pipe->winsys->buffer_map(pipe->winsys, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); - draw_set_mapped_vertex_buffer(draw, i, buf); - } + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf = pipe->winsys->buffer_map(pipe->winsys, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); + draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { @@ -151,11 +149,9 @@ cell_draw_elements(struct pipe_context *pipe, /* * unmap vertex/index buffers - will cause draw module to flush */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (sp->vertex_buffer[i].buffer) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); - } + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 6c83b8dc722..114684c2a33 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -45,6 +45,7 @@ cell_set_vertex_elements(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); memcpy(cell->vertex_element, elements, count * sizeof(elements[0])); + cell->num_vertex_elements = count; cell->dirty |= CELL_NEW_VERTEX; @@ -62,6 +63,7 @@ cell_set_vertex_buffers(struct pipe_context *pipe, assert(count <= PIPE_MAX_ATTRIBS); memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0])); + cell->num_vertex_buffers = count; cell->dirty |= CELL_NEW_VERTEX; -- cgit v1.2.3 From bdfcce47921cdd808740ee26e6781837351bad98 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 9 Apr 2008 13:58:51 +1000 Subject: nv40: use vb/ve counts rather than shader inputs for related loops --- src/gallium/drivers/nv40/nv40_draw.c | 9 ++------- src/gallium/drivers/nv40/nv40_vbo.c | 25 ++++--------------------- 2 files changed, 6 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 7f008aca3a8..dd5cc8fc998 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -239,9 +239,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { - if (!nv40->vtxbuf[i].buffer) - continue; + for (i = 0; i < nv40->vtxbuf_nr; i++) { map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(nv40->draw, i, map); @@ -262,11 +260,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, draw_arrays(nv40->draw, mode, start, count); - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { - if (!nv40->vtxbuf[i].buffer) - continue; + for (i = 0; i < nv40->vtxbuf_nr; i++) ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); - } if (idxbuf) ws->buffer_unmap(ws, idxbuf); diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index ec88470b317..06374184b12 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -481,40 +481,23 @@ nv40_draw_elements(struct pipe_context *pipe, static boolean nv40_vbo_validate(struct nv40_context *nv40) { - struct nv40_vertex_program *vp = nv40->vertprog; struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; struct nouveau_grobj *curie = nv40->screen->curie; struct pipe_buffer *ib = nv40->idxbuf; unsigned ib_format = nv40->idxbuf_format; - unsigned inputs, hw, num_hw; unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - inputs = vp->ir; - for (hw = 0; hw < 16 && inputs; hw++) { - if (inputs & (1 << hw)) { - num_hw = hw; - inputs &= ~(1 << hw); - } - } - num_hw++; + int hw; vtxbuf = so_new(20, 18); - so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); + so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); vtxfmt = so_new(17, 0); - so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), num_hw); + so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); - inputs = vp->ir; - for (hw = 0; hw < num_hw; hw++) { + for (hw = 0; hw < nv40->vtxelt_nr; hw++) { struct pipe_vertex_element *ve; struct pipe_vertex_buffer *vb; unsigned type, ncomp; - if (!(inputs & (1 << hw))) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - ve = &nv40->vtxelt[hw]; vb = &nv40->vtxbuf[ve->vertex_buffer_index]; -- cgit v1.2.3 From f7e475280a0b98484a8c1a98f18b2733532486b4 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 9 Apr 2008 14:17:48 -0600 Subject: gallium: fix bug in PIPE_BLENDFACTOR_INV_DST_ALPHA case --- src/gallium/drivers/softpipe/sp_quad_blend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 8be8025f40c..802472df45f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -561,7 +561,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[3]); /* A */ + VEC4_SUB(inv_comp, one, dest[3]); /* A */ VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ -- cgit v1.2.3 From c95dcc49629b72b95826e87e067d7a48753605fb Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Tue, 8 Apr 2008 17:59:28 +0100 Subject: remove usage of vertex_header --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 6 - src/gallium/auxiliary/draw/draw_vbuf.c | 20 - src/gallium/auxiliary/draw/draw_vertex.c | 5 - src/gallium/auxiliary/draw/draw_vertex.h | 2 - src/gallium/auxiliary/draw/draw_vf.c | 56 +- src/gallium/drivers/softpipe/Makefile | 1 + src/gallium/drivers/softpipe/SConscript | 1 + src/gallium/drivers/softpipe/sp_prim_setup.c | 1189 +-------------------- src/gallium/drivers/softpipe/sp_prim_setup.h | 6 + src/gallium/drivers/softpipe/sp_prim_vbuf.c | 223 ++-- src/gallium/drivers/softpipe/sp_setup.c | 1249 +++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_setup.h | 53 + src/gallium/drivers/softpipe/sp_state_derived.c | 13 +- 13 files changed, 1480 insertions(+), 1344 deletions(-) create mode 100644 src/gallium/drivers/softpipe/sp_setup.c create mode 100644 src/gallium/drivers/softpipe/sp_setup.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 0806076956d..e4e144ef194 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -212,12 +212,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, case EMIT_1F: feme->fetch[i].emit = emit_R32_FLOAT; break; - case EMIT_HEADER: - feme->fetch[i].ptr = (const ubyte *)&zero; - feme->fetch[i].pitch = 0; - feme->fetch[i].fetch = fetch_R32_FLOAT; - feme->fetch[i].emit = emit_R32_FLOAT; - break; case EMIT_1F_PSIZE: feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size; feme->fetch[i].pitch = 0; diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c index f83b441e939..e3216ff711d 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -126,13 +126,6 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) case EMIT_OMIT: debug_printf("EMIT_OMIT:"); break; - case EMIT_ALL: - assert(i == 0); - assert(j == 0); - debug_printf("EMIT_ALL:\t"); - for(k = 0; k < vinfo->size*4; ++k) - debug_printf("%02x ", *data++); - break; case EMIT_1F: debug_printf("EMIT_1F:\t"); debug_printf("%f ", *(float *)data); data += sizeof(float); @@ -217,19 +210,6 @@ emit_vertex( struct vbuf_stage *vbuf, case EMIT_OMIT: /* no-op */ break; - case EMIT_ALL: - /* just copy the whole vertex as-is to the vbuf */ - assert(i == 0); - assert(j == 0); - memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4); - vbuf->vertex_ptr += vinfo->size; - count += vinfo->size; - break; - case EMIT_HEADER: - memcpy(vbuf->vertex_ptr, vertex, sizeof(*vertex)); - *vbuf->vertex_ptr += sizeof(*vertex) / 4; - count += sizeof(*vertex) / 4; - break; case EMIT_1F: *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); count++; diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 970adc95e74..168036eee84 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -52,9 +52,6 @@ draw_compute_vertex_size(struct vertex_info *vinfo) switch (vinfo->emit[i]) { case EMIT_OMIT: break; - case EMIT_HEADER: - vinfo->size += sizeof(struct vertex_header) / 4; - break; case EMIT_4UB: /* fall-through */ case EMIT_1F_PSIZE: @@ -71,8 +68,6 @@ draw_compute_vertex_size(struct vertex_info *vinfo) case EMIT_4F: vinfo->size += 4; break; - case EMIT_ALL: - /* fall-through */ default: assert(0); } diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index abd2017ed33..65818463cae 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -47,8 +47,6 @@ */ enum attrib_emit { EMIT_OMIT, /**< don't emit the attribute */ - EMIT_ALL, /**< emit whole post-xform vertex, w/ header */ - EMIT_HEADER, /**< emit vertex_header struct (XXX temp?) */ EMIT_1F, EMIT_1F_PSIZE, /**< insert constant point size */ EMIT_2F, diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index 7bb34ace7aa..9d0154c50dc 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -205,7 +205,7 @@ void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, const struct vertex_info *vinfo, float point_size ) { - unsigned i, j, k; + unsigned i, j; struct draw_vf_attr *a = vf->attr; struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS]; unsigned count = 0; /* for debug/sanity */ @@ -217,60 +217,6 @@ void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, case EMIT_OMIT: /* no-op */ break; - case EMIT_ALL: { - /* just copy the whole vertex as-is to the vbuf */ - unsigned s = vinfo->size; - assert(i == 0); - assert(j == 0); - /* copy the vertex header */ - /* XXX: we actually don't copy the header, just pad it */ - attrs[nr_attrs].attrib = 0; - attrs[nr_attrs].format = DRAW_EMIT_PAD; - attrs[nr_attrs].offset = offsetof(struct vertex_header, data); - s -= offsetof(struct vertex_header, data)/4; - count += offsetof(struct vertex_header, data)/4; - nr_attrs++; - /* copy the vertex data */ - for(k = 0; k < (s & ~0x3); k += 4) { - attrs[nr_attrs].attrib = k/4; - attrs[nr_attrs].format = DRAW_EMIT_4F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 4; - } - /* tail */ - /* XXX: actually, this shouldn't be needed */ - attrs[nr_attrs].attrib = k/4; - attrs[nr_attrs].offset = 0; - switch(s & 0x3) { - case 0: - break; - case 1: - attrs[nr_attrs].format = DRAW_EMIT_1F; - nr_attrs++; - count += 1; - break; - case 2: - attrs[nr_attrs].format = DRAW_EMIT_2F; - nr_attrs++; - count += 2; - break; - case 3: - attrs[nr_attrs].format = DRAW_EMIT_3F; - nr_attrs++; - count += 3; - break; - } - break; - } - case EMIT_HEADER: - /* XXX emit new DRAW_EMIT_HEADER attribute??? */ - attrs[nr_attrs].attrib = 0; - attrs[nr_attrs].format = DRAW_EMIT_PAD; - attrs[nr_attrs].offset = offsetof(struct vertex_header, data); - count += offsetof(struct vertex_header, data)/4; - nr_attrs++; - break; case EMIT_1F: attrs[nr_attrs].attrib = j; attrs[nr_attrs].format = DRAW_EMIT_1F; diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 19dfd8c1a7a..120bdfd9dd2 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ sp_quad_stencil.c \ sp_quad_stipple.c \ sp_screen.c \ + sp_setup.c \ sp_state_blend.c \ sp_state_clip.c \ sp_state_derived.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 06931fa8d8b..c1f7daa8ab3 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -14,6 +14,7 @@ softpipe = env.ConvenienceLibrary( 'sp_flush.c', 'sp_prim_setup.c', 'sp_prim_vbuf.c', + 'sp_setup.c', 'sp_quad_alpha_test.c', 'sp_quad_blend.c', 'sp_quad.c', diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index c7eb12b3bb7..6fe463b74c7 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -26,7 +26,9 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines, triangles) + * \brief A draw stage that drives our triangle setup routines from + * within the draw pipeline. One of two ways to drive setup, the + * other being in sp_prim_vbuf.c. * * \author Keith Whitwell <keith@tungstengraphics.com> * \author Brian Paul @@ -34,29 +36,12 @@ #include "sp_context.h" -#include "sp_headers.h" -#include "sp_quad.h" +#include "sp_setup.h" #include "sp_state.h" #include "sp_prim_setup.h" #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" - -#define DEBUG_VERTS 0 -#define DEBUG_FRAGS 0 - -/** - * Triangle edge info - */ -struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - /** * Triangle setup info (derived from draw_stage). @@ -65,39 +50,7 @@ struct edge { struct setup_stage { struct draw_stage stage; /**< This must be first (base class) */ - struct softpipe_context *softpipe; - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - const struct vertex_header *vmax; - const struct vertex_header *vmid; - const struct vertex_header *vmin; - const struct vertex_header *vprovoke; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneoverarea; - - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; - struct tgsi_interp_coef posCoef; /* For Z, W */ - struct quad_header quad; - - struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; - int y; - unsigned y_flags; - unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ - } span; - -#if DEBUG_FRAGS - uint numFragsEmitted; /**< per primitive */ - uint numFragsWritten; /**< per primitive */ -#endif + struct setup_context *setup; }; @@ -111,1112 +64,50 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) } -/** - * Clip setup->quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip(struct setup_stage *setup) -{ - const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (setup->quad.x0 >= maxx || - setup->quad.y0 >= maxy || - setup->quad.x0 + 1 < minx || - setup->quad.y0 + 1 < miny) { - /* totally clipped */ - setup->quad.mask = 0x0; - return; - } - if (setup->quad.x0 < minx) - setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup->quad.y0 < miny) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup->quad.x0 == maxx - 1) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup->quad.y0 == maxy - 1) - setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} - - -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad(struct setup_stage *setup) -{ - quad_clip(setup); - if (setup->quad.mask) { - struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run(sp->quad.first, &setup->quad); - } -} - - -/** - * Emit a quad (pass to next stage). No clipping is done. - */ -static INLINE void -emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) -{ - struct softpipe_context *sp = setup->softpipe; - setup->quad.x0 = x; - setup->quad.y0 = y; - setup->quad.mask = mask; -#if DEBUG_FRAGS - if (mask & 1) setup->numFragsEmitted++; - if (mask & 2) setup->numFragsEmitted++; - if (mask & 4) setup->numFragsEmitted++; - if (mask & 8) setup->numFragsEmitted++; -#endif - sp->quad.first->run(sp->quad.first, &setup->quad); -#if DEBUG_FRAGS - mask = setup->quad.mask; - if (mask & 1) setup->numFragsWritten++; - if (mask & 2) setup->numFragsWritten++; - if (mask & 4) setup->numFragsWritten++; - if (mask & 8) setup->numFragsWritten++; -#endif -} - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int block( int x ) -{ - return x & ~1; -} - - -/** - * Compute mask which indicates which pixels in the 2x2 quad are actually inside - * the triangle's bounds. - * - * this is pretty nasty... may need to rework flush_spans again to - * fix it, if possible. - */ -static unsigned calculate_mask( struct setup_stage *setup, int x ) -{ - unsigned mask = 0x0; - - if (x >= setup->span.left[0] && x < setup->span.right[0]) - mask |= MASK_TOP_LEFT; - - if (x >= setup->span.left[1] && x < setup->span.right[1]) - mask |= MASK_BOTTOM_LEFT; - - if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) - mask |= MASK_TOP_RIGHT; - - if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) - mask |= MASK_BOTTOM_RIGHT; - - return mask; -} - - -/** - * Render a horizontal span of quads - */ -static void flush_spans( struct setup_stage *setup ) -{ - int minleft, maxright; - int x; - - switch (setup->span.y_flags) { - case 0x3: - /* both odd and even lines written (both quad rows) */ - minleft = MIN2(setup->span.left[0], setup->span.left[1]); - maxright = MAX2(setup->span.right[0], setup->span.right[1]); - break; - - case 0x1: - /* only even line written (quad top row) */ - minleft = setup->span.left[0]; - maxright = setup->span.right[0]; - break; - - case 0x2: - /* only odd line written (quad bottom row) */ - minleft = setup->span.left[1]; - maxright = setup->span.right[1]; - break; - - default: - return; - } - - /* XXX this loop could be moved into the above switch cases and - * calculate_mask() could be simplified a bit... - */ - for (x = block(minleft); x <= block(maxright); x += 2) { - emit_quad( setup, x, setup->span.y, - calculate_mask( setup, x ) ); - } - - setup->span.y = 0; - setup->span.y_flags = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; -} - -#if DEBUG_VERTS -static void print_vertex(const struct setup_stage *setup, - const struct vertex_header *v) -{ - int i; - debug_printf("Vertex: (%p)\n", v); - for (i = 0; i < setup->quad.nr_attrs; i++) { - debug_printf(" %d: %f %f %f %f\n", i, - v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); - } -} -#endif - -static boolean setup_sort_vertices( struct setup_stage *setup, - const struct prim_header *prim ) -{ - const struct vertex_header *v0 = prim->v[0]; - const struct vertex_header *v1 = prim->v[1]; - const struct vertex_header *v2 = prim->v[2]; - -#if DEBUG_VERTS - debug_printf("Triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif - - setup->vprovoke = v2; - - /* determine bottom to top order of vertices */ - { - float y0 = v0->data[0][1]; - float y1 = v1->data[0][1]; - float y2 = v2->data[0][1]; - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup->vmin = v0; - setup->vmid = v1; - setup->vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup->vmin = v2; - setup->vmid = v0; - setup->vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup->vmin = v0; - setup->vmid = v2; - setup->vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup->vmin = v1; - setup->vmid = v0; - setup->vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup->vmin = v2; - setup->vmid = v1; - setup->vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup->vmin = v1; - setup->vmid = v2; - setup->vmax = v0; - } - } - } - - setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0]; - setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1]; - setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; - setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; - setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0]; - setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1]; - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. - */ - { - const float area = (setup->emaj.dx * setup->ebot.dy - - setup->ebot.dx * setup->emaj.dy); - - setup->oneoverarea = 1.0f / area; - /* - debug_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup->oneoverarea, area, prim->det ); - */ - } - - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); - - return TRUE; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex->data[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - assert(i <= 3); - - coef->dadx[i] = 0; - coef->dady[i] = 0; - /* need provoking vertex info! - */ - coef->a0[i] = setup->vprovoke->data[vertSlot][i]; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_linear_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void tri_persp_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - /* premultiply by 1/w (v->data[0][3] is always W): - */ - float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; - float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3]; - float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; - float botda = mida - mina; - float majda = maxa - mina; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - /* - debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, - setup->vmin->data[vertSlot][i], - setup->vmid->data[vertSlot][i], - setup->vmax->data[vertSlot][i] - ); - */ - assert(i <= 3); - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (mina - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. - * Z and W are copied from posCoef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ static void -setup_fragcoord_coeff(struct setup_stage *setup, uint slot) -{ - /*X*/ - setup->coef[slot].a0[0] = 0; - setup->coef[slot].dadx[0] = 1.0; - setup->coef[slot].dady[0] = 0.0; - /*Y*/ - if (setup->softpipe->rasterizer->origin_lower_left) { - /* y=0=bottom */ - const int winHeight = setup->softpipe->framebuffer.height; - setup->coef[slot].a0[1] = (float) (winHeight - 1); - setup->coef[slot].dady[1] = -1.0; - } - else { - /* y=0=top */ - setup->coef[slot].a0[1] = 0.0; - setup->coef[slot].dady[1] = 1.0; - } - setup->coef[slot].dadx[1] = 0.0; - /*Z*/ - setup->coef[slot].a0[2] = setup->posCoef.a0[2]; - setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; - setup->coef[slot].dady[2] = setup->posCoef.dady[2]; - /*W*/ - setup->coef[slot].a0[3] = setup->posCoef.a0[3]; - setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; - setup->coef[slot].dady[3] = setup->posCoef.dady[3]; -} - - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. - */ -static void setup_tri_coefficients( struct setup_stage *setup ) -{ - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* z and w are done by linear interpolation: - */ - tri_linear_coeff(setup, &setup->posCoef, 0, 2); - tri_linear_coeff(setup, &setup->posCoef, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } -} - - - -static void setup_tri_edges( struct setup_stage *setup ) -{ - float vmin_x = setup->vmin->data[0][0] + 0.5f; - float vmid_x = setup->vmid->data[0][0] + 0.5f; - - float vmin_y = setup->vmin->data[0][1] - 0.5f; - float vmid_y = setup->vmid->data[0][1] - 0.5f; - float vmax_y = setup->vmax->data[0][1] - 0.5f; - - setup->emaj.sy = CEILF(vmin_y); - setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); - setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; - setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - - setup->etop.sy = CEILF(vmid_y); - setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); - setup->etop.dxdy = setup->etop.dx / setup->etop.dy; - setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - - setup->ebot.sy = CEILF(vmin_y); - setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); - setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; - setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void subtriangle( struct setup_stage *setup, - struct edge *eleft, - struct edge *eright, - unsigned lines ) -{ - const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - assert((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - finish_y = sy + lines; - - if (start_y < miny) - start_y = miny; - - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup->span.y) { - flush_spans(setup); - setup->span.y = block(_y); - } - - setup->span.left[_y&1] = left; - setup->span.right[_y&1] = right; - setup->span.y_flags |= 1<<(_y&1); - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Do setup for triangle rasterization, then render the triangle. - */ -static void setup_tri( struct draw_stage *stage, - struct prim_header *prim ) +do_tri(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); - /* - debug_printf("%s\n", __FUNCTION__ ); - */ - -#if DEBUG_FRAGS - setup->numFragsEmitted = 0; - setup->numFragsWritten = 0; -#endif - - setup_sort_vertices( setup, prim ); - setup_tri_coefficients( setup ); - setup_tri_edges( setup ); - - setup->quad.prim = PRIM_TRI; - - setup->span.y = 0; - setup->span.y_flags = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ - - /* init_constant_attribs( setup ); */ - - if (setup->oneoverarea < 0.0) { - /* emaj on left: - */ - subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); - subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); - } - else { - /* emaj on right: - */ - subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); - subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); - } - - flush_spans( setup ); - -#if DEBUG_FRAGS - printf("Tri: %u frags emitted, %u written\n", - setup->numFragsEmitted, - setup->numFragsWritten); -#endif -} - - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -line_linear_coeff(struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a line. - */ -static void -line_persp_coeff(struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; - const float a1 = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; - const float da = a1 - a0; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmax are initialized. - */ -static INLINE void -setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) -{ - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* use setup->vmin, vmax to point to vertices */ - setup->vprovoke = prim->v[1]; - setup->vmin = prim->v[0]; - setup->vmax = prim->v[1]; - - setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; - setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; - /* NOTE: this is not really 1/area */ - setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + - setup->emaj.dy * setup->emaj.dy); - - /* z and w are done by linear interpolation: - */ - line_linear_coeff(setup, &setup->posCoef, 0, 2); - line_linear_coeff(setup, &setup->posCoef, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } + setup_tri( setup->setup, + prim->det, + prim->v[0]->data, + prim->v[1]->data, + prim->v[2]->data ); } - -/** - * Plot a pixel in a line segment. - */ -static INLINE void -plot(struct setup_stage *setup, int x, int y) -{ - const int iy = y & 1; - const int ix = x & 1; - const int quadX = x - ix; - const int quadY = y - iy; - const int mask = (1 << ix) << (2 * iy); - - if (quadX != setup->quad.x0 || - quadY != setup->quad.y0) - { - /* flush prev quad, start new quad */ - - if (setup->quad.x0 != -1) - clip_emit_quad(setup); - - setup->quad.x0 = quadX; - setup->quad.y0 = quadY; - setup->quad.mask = 0x0; - } - - setup->quad.mask |= mask; -} - - -/** - * Do setup for line rasterization, then render the line. - * Single-pixel width, no stipple, etc. We rely on the 'draw' module - * to handle stippling and wide lines. - */ static void -setup_line(struct draw_stage *stage, struct prim_header *prim) +do_line(struct draw_stage *stage, struct prim_header *prim) { - const struct vertex_header *v0 = prim->v[0]; - const struct vertex_header *v1 = prim->v[1]; struct setup_stage *setup = setup_stage( stage ); - int x0 = (int) v0->data[0][0]; - int x1 = (int) v1->data[0][0]; - int y0 = (int) v0->data[0][1]; - int y1 = (int) v1->data[0][1]; - int dx = x1 - x0; - int dy = y1 - y0; - int xstep, ystep; - if (dx == 0 && dy == 0) - return; - - setup_line_coefficients(setup, prim); - - if (dx < 0) { - dx = -dx; /* make positive */ - xstep = -1; - } - else { - xstep = 1; - } - - if (dy < 0) { - dy = -dy; /* make positive */ - ystep = -1; - } - else { - ystep = 1; - } - - assert(dx >= 0); - assert(dy >= 0); - - setup->quad.x0 = setup->quad.y0 = -1; - setup->quad.mask = 0x0; - setup->quad.prim = PRIM_LINE; - /* XXX temporary: set coverage to 1.0 so the line appears - * if AA mode happens to be enabled. - */ - setup->quad.coverage[0] = - setup->quad.coverage[1] = - setup->quad.coverage[2] = - setup->quad.coverage[3] = 1.0; - - if (dx > dy) { - /*** X-major line ***/ - int i; - const int errorInc = dy + dy; - int error = errorInc - dx; - const int errorDec = error - dx; - - for (i = 0; i < dx; i++) { - plot(setup, x0, y0); - - x0 += xstep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - y0 += ystep; - } - } - } - else { - /*** Y-major line ***/ - int i; - const int errorInc = dx + dx; - int error = errorInc - dy; - const int errorDec = error - dy; - - for (i = 0; i < dy; i++) { - plot(setup, x0, y0); - - y0 += ystep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - x0 += xstep; - } - } - } - - /* draw final quad */ - if (setup->quad.mask) { - clip_emit_quad(setup); - } + setup_line( setup->setup, + prim->v[0]->data, + prim->v[1]->data ); } - static void -point_persp_coeff(struct setup_stage *setup, - const struct vertex_header *vert, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - assert(i <= 3); - coef->dadx[i] = 0.0F; - coef->dady[i] = 0.0F; - coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3]; -} - - -/** - * Do setup for point rasterization, then render the point. - * Round or square points... - * XXX could optimize a lot for 1-pixel points. - */ -static void -setup_point(struct draw_stage *stage, struct prim_header *prim) +do_point(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_header *v0 = prim->v[0]; - const int sizeAttr = setup->softpipe->psize_slot; - const float size - = sizeAttr > 0 ? v0->data[sizeAttr][0] - : setup->softpipe->rasterizer->point_size; - const float halfSize = 0.5F * size; - const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; - const float x = v0->data[0][0]; /* Note: data[0] is always position */ - const float y = v0->data[0][1]; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* For points, all interpolants are constant-valued. - * However, for point sprites, we'll need to setup texcoords appropriately. - * XXX: which coefficients are the texcoords??? - * We may do point sprites as textured quads... - * - * KW: We don't know which coefficients are texcoords - ultimately - * the choice of what interpolation mode to use for each attribute - * should be determined by the fragment program, using - * per-attribute declaration statements that include interpolation - * mode as a parameter. So either the fragment program will have - * to be adjusted for pointsprite vs normal point behaviour, or - * otherwise a special interpolation mode will have to be defined - * which matches the required behaviour for point sprites. But - - * the latter is not a feature of normal hardware, and as such - * probably should be ruled out on that basis. - */ - setup->vprovoke = prim->v[0]; - - /* setup Z, W */ - const_coeff(setup, &setup->posCoef, 0, 2); - const_coeff(setup, &setup->posCoef, 0, 3); - - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - point_persp_coeff(setup, setup->vprovoke, - &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } - - setup->quad.prim = PRIM_POINT; - - if (halfSize <= 0.5 && !round) { - /* special case for 1-pixel points */ - const int ix = ((int) x) & 1; - const int iy = ((int) y) & 1; - setup->quad.x0 = (int) x - ix; - setup->quad.y0 = (int) y - iy; - setup->quad.mask = (1 << ix) << (2 * iy); - clip_emit_quad(setup); - } - else { - if (round) { - /* rounded points */ - const int ixmin = block((int) (x - halfSize)); - const int ixmax = block((int) (x + halfSize)); - const int iymin = block((int) (y - halfSize)); - const int iymax = block((int) (y + halfSize)); - const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ - const float rmax = halfSize + 0.7071F; - const float rmin2 = MAX2(0.0F, rmin * rmin); - const float rmax2 = rmax * rmax; - const float cscale = 1.0F / (rmax2 - rmin2); - int ix, iy; - - for (iy = iymin; iy <= iymax; iy += 2) { - for (ix = ixmin; ix <= ixmax; ix += 2) { - float dx, dy, dist2, cover; - - setup->quad.mask = 0x0; - - dx = (ix + 0.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_RIGHT; - } - dx = (ix + 0.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_RIGHT; - } - - if (setup->quad.mask) { - setup->quad.x0 = ix; - setup->quad.y0 = iy; - clip_emit_quad(setup); - } - } - } - } - else { - /* square points */ - const int xmin = (int) (x + 0.75 - halfSize); - const int ymin = (int) (y + 0.25 - halfSize); - const int xmax = xmin + (int) size; - const int ymax = ymin + (int) size; - /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ - const int ixmin = block(xmin); - const int ixmax = block(xmax - 1); - const int iymin = block(ymin); - const int iymax = block(ymax - 1); - int ix, iy; - - /* - debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); - */ - for (iy = iymin; iy <= iymax; iy += 2) { - uint rowMask = 0xf; - if (iy < ymin) { - /* above the top edge */ - rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - } - if (iy + 1 >= ymax) { - /* below the bottom edge */ - rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); - } - - for (ix = ixmin; ix <= ixmax; ix += 2) { - uint mask = rowMask; - - if (ix < xmin) { - /* fragment is past left edge of point, turn off left bits */ - mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - } - if (ix + 1 >= xmax) { - /* past the right edge */ - mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - } - - setup->quad.mask = mask; - setup->quad.x0 = ix; - setup->quad.y0 = iy; - clip_emit_quad(setup); - } - } - } - } + setup_point( setup->setup, + prim->v[0]->data ); } + static void setup_begin( struct draw_stage *stage ) { struct setup_stage *setup = setup_stage(stage); - struct softpipe_context *sp = setup->softpipe; - const struct sp_fragment_shader *fs = setup->softpipe->fs; - uint i; - - if (sp->dirty) { - softpipe_update_derived(sp); - } - /* Mark surfaces as defined now */ - for (i = 0; i < sp->framebuffer.num_cbufs; i++){ - if (sp->framebuffer.cbufs[i]) { - sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; - } - } - if (sp->framebuffer.zsbuf) { - sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; - } + setup_prepare( setup->setup ); - setup->quad.nr_attrs = fs->info.num_inputs; - - sp->quad.first->begin(sp->quad.first); - - stage->point = setup_point; - stage->line = setup_line; - stage->tri = setup_tri; + stage->point = do_point; + stage->line = do_line; + stage->tri = do_tri; } @@ -1269,19 +160,29 @@ static void render_destroy( struct draw_stage *stage ) */ struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) { - struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - setup->softpipe = softpipe; - setup->stage.draw = softpipe->draw; - setup->stage.point = setup_first_point; - setup->stage.line = setup_first_line; - setup->stage.tri = setup_first_tri; - setup->stage.flush = setup_flush; - setup->stage.reset_stipple_counter = reset_stipple_counter; - setup->stage.destroy = render_destroy; + sstage->setup = setup_create_context(softpipe); + sstage->stage.draw = softpipe->draw; + sstage->stage.point = setup_first_point; + sstage->stage.line = setup_first_line; + sstage->stage.tri = setup_first_tri; + sstage->stage.flush = setup_flush; + sstage->stage.reset_stipple_counter = reset_stipple_counter; + sstage->stage.destroy = render_destroy; - setup->quad.coef = setup->coef; - setup->quad.posCoef = &setup->posCoef; + return (struct draw_stage *)sstage; +} - return &setup->stage; +struct setup_context * +sp_draw_setup_context( struct draw_stage *stage ) +{ + struct setup_stage *ssetup = setup_stage(stage); + return ssetup->setup; +} + +void +sp_draw_flush( struct draw_stage *stage ) +{ + stage->flush( stage, 0 ); } diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h index f3e8a79dd9e..49bdd98ed87 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.h +++ b/src/gallium/drivers/softpipe/sp_prim_setup.h @@ -69,6 +69,12 @@ typedef void (*vbuf_draw_func)( struct pipe_context *pipe, extern struct draw_stage * sp_draw_render_stage( struct softpipe_context *softpipe ); +extern struct setup_context * +sp_draw_setup_context( struct draw_stage * ); + +extern void +sp_draw_flush( struct draw_stage * ); + extern struct draw_stage * sp_draw_vbuf_stage( struct draw_context *draw_context, diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index d940718ed2b..025a0113bf1 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /** @@ -30,7 +30,7 @@ * softpipe rendering pipeline. * Probably not desired in general, but useful for testing/debuggin. * Enabled/Disabled with SP_VBUF env var. - * + * * Authors * Brian Paul */ @@ -39,14 +39,16 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_prim_vbuf.h" +#include "sp_prim_setup.h" +#include "sp_setup.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" #include "draw/draw_vbuf.h" #define SP_MAX_VBUF_INDEXES 1024 #define SP_MAX_VBUF_SIZE 4096 +typedef const float (*cptrf4)[4]; /** * Subclass of vbuf_render. @@ -69,7 +71,6 @@ softpipe_vbuf_render(struct vbuf_render *vbr) } - static const struct vertex_info * sp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -91,7 +92,7 @@ sp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void -sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, +sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, unsigned vertex_size, unsigned vertices_used) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); @@ -114,7 +115,7 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) else { return FALSE; } - + } @@ -122,22 +123,19 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) * Recalculate prim's determinant. * XXX is this needed? */ -static void -calc_det(struct prim_header *header) +static float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) { - /* Window coords: */ - const float *v0 = header->v[0]->data[0]; - const float *v1 = header->v[1]->data[0]; - const float *v2 = header->v[2]->data[0]; - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0] - v2[0]; - const float ey = v0[1] - v2[1]; - const float fx = v1[0] - v2[0]; - const float fy = v1[1] - v2[1]; - + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + /* det = cross(e,f).z */ - header->det = ex * fy - ey * fx; + return ex * fy - ey * fx; } @@ -147,48 +145,51 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; - struct prim_header prim; unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i, j; void *vertex_buffer = cvbr->vertex_buffer; - - prim.det = 0; - prim.reset_line_stipple = 0; - prim.edgeflags = 0; - prim.pad = 0; + cptrf4 v[3]; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); switch (cvbr->prim) { case PIPE_PRIM_TRIANGLES: for (i = 0; i < nr_indices; i += 3) { - for (j = 0; j < 3; j++) - prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + - indices[i+j] * vertex_size); - - calc_det(&prim); - setup->tri( setup, &prim ); + for (j = 0; j < 3; j++) + v[j] = (cptrf4)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2]); } break; case PIPE_PRIM_LINES: for (i = 0; i < nr_indices; i += 2) { - for (j = 0; j < 2; j++) - prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + - indices[i+j] * vertex_size); - - setup->line( setup, &prim ); + for (j = 0; j < 2; j++) + v[j] = (cptrf4)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + setup_line( setup_ctx, + v[0], + v[1] ); } break; case PIPE_PRIM_POINTS: for (i = 0; i < nr_indices; i++) { - prim.v[0] = (struct vertex_header *)((char *)vertex_buffer + - indices[i] * vertex_size); - setup->point( setup, &prim ); + v[0] = (cptrf4)((char *)vertex_buffer + + indices[i] * vertex_size); + + setup_point( setup_ctx, + v[0] ); } break; } - setup->flush( setup, 0 ); + sp_draw_flush( setup ); } @@ -202,111 +203,131 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; - struct prim_header prim; const void *vertex_buffer = cvbr->vertex_buffer; const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i; - - prim.det = 0; - prim.reset_line_stipple = 0; - prim.edgeflags = 0; - prim.pad = 0; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); + cptrf4 v[3]; #define VERTEX(I) \ - (struct vertex_header *) ((char *) vertex_buffer + (I) * vertex_size) + (cptrf4) ((char *) vertex_buffer + (I) * vertex_size) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - prim.v[0] = VERTEX(i); - setup->point( setup, &prim ); + v[0] = VERTEX(i); + setup_point( setup_ctx, v[0] ); } break; case PIPE_PRIM_LINES: assert(nr % 2 == 0); for (i = 0; i < nr; i += 2) { - prim.v[0] = VERTEX(i); - prim.v[1] = VERTEX(i + 1); - setup->line( setup, &prim ); + v[0] = VERTEX(i); + v[1] = VERTEX(i + 1); + setup_line( setup_ctx, v[0], v[1] ); } break; case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i++) { - prim.v[0] = VERTEX(i - 1); - prim.v[1] = VERTEX(i); - setup->line( setup, &prim ); + v[0] = VERTEX(i - 1); + v[1] = VERTEX(i); + setup_line( setup_ctx, v[0], v[1] ); } break; case PIPE_PRIM_TRIANGLES: assert(nr % 3 == 0); for (i = 0; i < nr; i += 3) { - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i + 2); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i + 2); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_TRIANGLE_STRIP: assert(nr >= 3); for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_TRIANGLE_FAN: assert(nr >= 3); for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(0); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(0); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_QUADS: assert(nr % 4 == 0); for (i = 0; i < nr; i += 4) { - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i + 2); - calc_det(&prim); - setup->tri( setup, &prim ); - - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 2); - prim.v[2] = VERTEX(i + 3); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i + 2); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); + + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 2); + v[2] = VERTEX(i + 3); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_QUAD_STRIP: assert(nr >= 4); for (i = 2; i < nr; i += 2) { - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i); - prim.v[2] = VERTEX(i + 1); - calc_det(&prim); - setup->tri( setup, &prim ); - - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i - 1); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i); + v[2] = VERTEX(i + 1); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); + + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i - 1); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_POLYGON: /* draw as tri fan */ for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(0); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(0); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; default: diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c new file mode 100644 index 00000000000..48617a66ed2 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -0,0 +1,1249 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Primitive rasterization/rendering (points, lines, triangles) + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Brian Paul + */ + +#include "sp_setup.h" + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#define DEBUG_VERTS 0 +#define DEBUG_FRAGS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_context { + struct softpipe_context *softpipe; + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const float (*vmax)[4]; + const float (*vmid)[4]; + const float (*vmin)[4]; + const float (*vprovoke)[4]; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ + struct quad_header quad; + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; + +#if DEBUG_FRAGS + uint numFragsEmitted; /**< per primitive */ + uint numFragsWritten; /**< per primitive */ +#endif +}; + + + + + +/** + * Recalculate prim's determinant. + * XXX is this needed? + */ +static INLINE float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + +/** + * Clip setup->quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip(struct setup_context *setup) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (setup->quad.x0 >= maxx || + setup->quad.y0 >= maxy || + setup->quad.x0 + 1 < minx || + setup->quad.y0 + 1 < miny) { + /* totally clipped */ + setup->quad.mask = 0x0; + return; + } + if (setup->quad.x0 < minx) + setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup->quad.y0 < miny) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup->quad.x0 == maxx - 1) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup->quad.y0 == maxy - 1) + setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} + + +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad(struct setup_context *setup) +{ + quad_clip(setup); + if (setup->quad.mask) { + struct softpipe_context *sp = setup->softpipe; + sp->quad.first->run(sp->quad.first, &setup->quad); + } +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + */ +static INLINE void +emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) +{ + struct softpipe_context *sp = setup->softpipe; + setup->quad.x0 = x; + setup->quad.y0 = y; + setup->quad.mask = mask; +#if DEBUG_FRAGS + if (mask & 1) setup->numFragsEmitted++; + if (mask & 2) setup->numFragsEmitted++; + if (mask & 4) setup->numFragsEmitted++; + if (mask & 8) setup->numFragsEmitted++; +#endif + sp->quad.first->run(sp->quad.first, &setup->quad); +#if DEBUG_FRAGS + mask = setup->quad.mask; + if (mask & 1) setup->numFragsWritten++; + if (mask & 2) setup->numFragsWritten++; + if (mask & 4) setup->numFragsWritten++; + if (mask & 8) setup->numFragsWritten++; +#endif +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * + * this is pretty nasty... may need to rework flush_spans again to + * fix it, if possible. + */ +static unsigned calculate_mask( struct setup_context *setup, int x ) +{ + unsigned mask = 0x0; + + if (x >= setup->span.left[0] && x < setup->span.right[0]) + mask |= MASK_TOP_LEFT; + + if (x >= setup->span.left[1] && x < setup->span.right[1]) + mask |= MASK_BOTTOM_LEFT; + + if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) + mask |= MASK_TOP_RIGHT; + + if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) + mask |= MASK_BOTTOM_RIGHT; + + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( struct setup_context *setup ) +{ + int minleft, maxright; + int x; + + switch (setup->span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup->span.left[0], setup->span.left[1]); + maxright = MAX2(setup->span.right[0], setup->span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup->span.left[0]; + maxright = setup->span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup->span.left[1]; + maxright = setup->span.right[1]; + break; + + default: + return; + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { + emit_quad( setup, x, setup->span.y, + calculate_mask( setup, x ) ); + } + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; +} + +#if DEBUG_VERTS +static void print_vertex(const struct setup_context *setup, + const float (*v)[4]) +{ + int i; + debug_printf("Vertex: (%p)\n", v); + for (i = 0; i < setup->quad.nr_attrs; i++) { + debug_printf(" %d: %f %f %f %f\n", i, + v[i][0], v[i][1], v[i][2], v[i][3]); + } +} +#endif + +static boolean setup_sort_vertices( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +#if DEBUG_VERTS + debug_printf("Triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif + + setup->vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = v0[0][1]; + float y1 = v1[0][1]; + float y2 = v2[0][1]; + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup->vmin = v0; + setup->vmid = v1; + setup->vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup->vmin = v2; + setup->vmid = v0; + setup->vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup->vmin = v0; + setup->vmid = v2; + setup->vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup->vmin = v1; + setup->vmid = v0; + setup->vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup->vmin = v2; + setup->vmid = v1; + setup->vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup->vmin = v1; + setup->vmid = v2; + setup->vmax = v0; + } + } + } + + setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; + setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; + setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup->emaj.dx * setup->ebot.dy - + setup->ebot.dx * setup->emaj.dy); + + setup->oneoverarea = 1.0f / area; + /* + debug_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup->oneoverarea, area, det ); + */ + } + + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup->quad.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex[slot][i]. + * The result will be put into setup->coef[slot].a0[i]. + * \param slot which attribute slot + * \param i which component of the slot (0..3) + */ +static void const_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + + coef->dadx[i] = 0; + coef->dady[i] = 0; + + /* need provoking vertex info! + */ + coef->a0[i] = setup->vprovoke[vertSlot][i]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void tri_linear_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; + float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); + + /* + debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup->coef[slot].a0[i], + setup->coef[slot].dadx[i], + setup->coef[slot].dady[i]); + */ +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* premultiply by 1/w (v[0][3] is always W): + */ + float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; + float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + float botda = mida - mina; + float majda = maxa - mina; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + /* + debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin[vertSlot][i], + setup->vmid[vertSlot][i], + setup->vmax[vertSlot][i] + ); + */ + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_context *setup, uint slot) +{ + /*X*/ + setup->coef[slot].a0[0] = 0; + setup->coef[slot].dadx[0] = 1.0; + setup->coef[slot].dady[0] = 0.0; + /*Y*/ + if (setup->softpipe->rasterizer->origin_lower_left) { + /* y=0=bottom */ + const int winHeight = setup->softpipe->framebuffer.height; + setup->coef[slot].a0[1] = (float) (winHeight - 1); + setup->coef[slot].dady[1] = -1.0; + } + else { + /* y=0=top */ + setup->coef[slot].a0[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; + } + setup->coef[slot].dadx[1] = 0.0; + /*Z*/ + setup->coef[slot].a0[2] = setup->posCoef.a0[2]; + setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[slot].dady[2] = setup->posCoef.dady[2]; + /*W*/ + setup->coef[slot].a0[3] = setup->posCoef.a0[3]; + setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[slot].dady[3] = setup->posCoef.dady[3]; +} + + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients( struct setup_context *setup ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* z and w are done by linear interpolation: + */ + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + + +static void setup_tri_edges( struct setup_context *setup ) +{ + float vmin_x = setup->vmin[0][0] + 0.5f; + float vmid_x = setup->vmid[0][0] + 0.5f; + + float vmin_y = setup->vmin[0][1] - 0.5f; + float vmid_y = setup->vmid[0][1] - 0.5f; + float vmax_y = setup->vmax[0][1] - 0.5f; + + setup->emaj.sy = CEILF(vmin_y); + setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); + setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; + setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; + + setup->etop.sy = CEILF(vmid_y); + setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); + setup->etop.dxdy = setup->etop.dx / setup->etop.dy; + setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; + + setup->ebot.sy = CEILF(vmin_y); + setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); + setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; + setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct setup_context *setup, + struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + assert((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup->span.y) { + flush_spans(setup); + setup->span.y = block(_y); + } + + setup->span.left[_y&1] = left; + setup->span.right[_y&1] = right; + setup->span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_tri( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* + debug_printf("%s\n", __FUNCTION__ ); + */ + +#if DEBUG_FRAGS + setup->numFragsEmitted = 0; + setup->numFragsWritten = 0; +#endif + + setup_sort_vertices( setup, calc_det(v0, v1, v2), + v0, v1, v2 ); + setup_tri_coefficients( setup ); + setup_tri_edges( setup ); + + setup->quad.prim = PRIM_TRI; + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; + /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup->oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); + subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); + subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + } + + flush_spans( setup ); + +#if DEBUG_FRAGS + printf("Tri: %u frags emitted, %u written\n", + setup->numFragsEmitted, + setup->numFragsWritten); +#endif +} + + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a line. + */ +static void +line_linear_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a line. + */ +static void +line_persp_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* XXX double-check/verify this arithmetic */ + const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + const float da = a1 - a0; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmax are initialized. + */ +static INLINE void +setup_line_coefficients(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* use setup->vmin, vmax to point to vertices */ + setup->vprovoke = v1; + setup->vmin = v0; + setup->vmax = v1; + + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + /* NOTE: this is not really 1/area */ + setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + + setup->emaj.dy * setup->emaj.dy); + + /* z and w are done by linear interpolation: + */ + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + +/** + * Plot a pixel in a line segment. + */ +static INLINE void +plot(struct setup_context *setup, int x, int y) +{ + const int iy = y & 1; + const int ix = x & 1; + const int quadX = x - ix; + const int quadY = y - iy; + const int mask = (1 << ix) << (2 * iy); + + if (quadX != setup->quad.x0 || + quadY != setup->quad.y0) + { + /* flush prev quad, start new quad */ + + if (setup->quad.x0 != -1) + clip_emit_quad(setup); + + setup->quad.x0 = quadX; + setup->quad.y0 = quadY; + setup->quad.mask = 0x0; + } + + setup->quad.mask |= mask; +} + + +/** + * Do setup for line rasterization, then render the line. + * Single-pixel width, no stipple, etc. We rely on the 'draw' module + * to handle stippling and wide lines. + */ +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + int x0 = (int) v0[0][0]; + int x1 = (int) v1[0][0]; + int y0 = (int) v0[0][1]; + int y1 = (int) v1[0][1]; + int dx = x1 - x0; + int dy = y1 - y0; + int xstep, ystep; + + if (dx == 0 && dy == 0) + return; + + setup_line_coefficients(setup, v0, v1); + + if (dx < 0) { + dx = -dx; /* make positive */ + xstep = -1; + } + else { + xstep = 1; + } + + if (dy < 0) { + dy = -dy; /* make positive */ + ystep = -1; + } + else { + ystep = 1; + } + + assert(dx >= 0); + assert(dy >= 0); + + setup->quad.x0 = setup->quad.y0 = -1; + setup->quad.mask = 0x0; + setup->quad.prim = PRIM_LINE; + /* XXX temporary: set coverage to 1.0 so the line appears + * if AA mode happens to be enabled. + */ + setup->quad.coverage[0] = + setup->quad.coverage[1] = + setup->quad.coverage[2] = + setup->quad.coverage[3] = 1.0; + + if (dx > dy) { + /*** X-major line ***/ + int i; + const int errorInc = dy + dy; + int error = errorInc - dx; + const int errorDec = error - dx; + + for (i = 0; i < dx; i++) { + plot(setup, x0, y0); + + x0 += xstep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + y0 += ystep; + } + } + } + else { + /*** Y-major line ***/ + int i; + const int errorInc = dx + dx; + int error = errorInc - dy; + const int errorDec = error - dy; + + for (i = 0; i < dy; i++) { + plot(setup, x0, y0); + + y0 += ystep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + x0 += xstep; + } + } + } + + /* draw final quad */ + if (setup->quad.mask) { + clip_emit_quad(setup); + } +} + + +static void +point_persp_coeff(struct setup_context *setup, + const float (*vert)[4], + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert[vertSlot][i] * vert[0][3]; +} + + +/** + * Do setup for point rasterization, then render the point. + * Round or square points... + * XXX could optimize a lot for 1-pixel points. + */ +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const int sizeAttr = setup->softpipe->psize_slot; + const float size + = sizeAttr > 0 ? v0[sizeAttr][0] + : setup->softpipe->rasterizer->point_size; + const float halfSize = 0.5F * size; + const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; + const float x = v0[0][0]; /* Note: data[0] is always position */ + const float y = v0[0][1]; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* For points, all interpolants are constant-valued. + * However, for point sprites, we'll need to setup texcoords appropriately. + * XXX: which coefficients are the texcoords??? + * We may do point sprites as textured quads... + * + * KW: We don't know which coefficients are texcoords - ultimately + * the choice of what interpolation mode to use for each attribute + * should be determined by the fragment program, using + * per-attribute declaration statements that include interpolation + * mode as a parameter. So either the fragment program will have + * to be adjusted for pointsprite vs normal point behaviour, or + * otherwise a special interpolation mode will have to be defined + * which matches the required behaviour for point sprites. But - + * the latter is not a feature of normal hardware, and as such + * probably should be ruled out on that basis. + */ + setup->vprovoke = v0; + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + + setup->quad.prim = PRIM_POINT; + + if (halfSize <= 0.5 && !round) { + /* special case for 1-pixel points */ + const int ix = ((int) x) & 1; + const int iy = ((int) y) & 1; + setup->quad.x0 = (int) x - ix; + setup->quad.y0 = (int) y - iy; + setup->quad.mask = (1 << ix) << (2 * iy); + clip_emit_quad(setup); + } + else { + if (round) { + /* rounded points */ + const int ixmin = block((int) (x - halfSize)); + const int ixmax = block((int) (x + halfSize)); + const int iymin = block((int) (y - halfSize)); + const int iymax = block((int) (y + halfSize)); + const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ + const float rmax = halfSize + 0.7071F; + const float rmin2 = MAX2(0.0F, rmin * rmin); + const float rmax2 = rmax * rmax; + const float cscale = 1.0F / (rmax2 - rmin2); + int ix, iy; + + for (iy = iymin; iy <= iymax; iy += 2) { + for (ix = ixmin; ix <= ixmax; ix += 2) { + float dx, dy, dist2, cover; + + setup->quad.mask = 0x0; + + dx = (ix + 0.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_RIGHT; + } + + dx = (ix + 0.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_RIGHT; + } + + if (setup->quad.mask) { + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } + else { + /* square points */ + const int xmin = (int) (x + 0.75 - halfSize); + const int ymin = (int) (y + 0.25 - halfSize); + const int xmax = xmin + (int) size; + const int ymax = ymin + (int) size; + /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ + const int ixmin = block(xmin); + const int ixmax = block(xmax - 1); + const int iymin = block(ymin); + const int iymax = block(ymax - 1); + int ix, iy; + + /* + debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); + */ + for (iy = iymin; iy <= iymax; iy += 2) { + uint rowMask = 0xf; + if (iy < ymin) { + /* above the top edge */ + rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + } + if (iy + 1 >= ymax) { + /* below the bottom edge */ + rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + } + + for (ix = ixmin; ix <= ixmax; ix += 2) { + uint mask = rowMask; + + if (ix < xmin) { + /* fragment is past left edge of point, turn off left bits */ + mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + } + if (ix + 1 >= xmax) { + /* past the right edge */ + mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + } + + setup->quad.mask = mask; + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } +} + +void setup_prepare( struct setup_context *setup ) +{ + struct softpipe_context *sp = setup->softpipe; + unsigned i; + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + /* Mark surfaces as defined now */ + for (i = 0; i < sp->framebuffer.num_cbufs; i++){ + if (sp->framebuffer.cbufs[i]) { + sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + } + } + if (sp->framebuffer.zsbuf) { + sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + } + + { + const struct sp_fragment_shader *fs = setup->softpipe->fs; + setup->quad.nr_attrs = fs->info.num_inputs; + sp->quad.first->begin(sp->quad.first); + } +} + + + +void setup_destroy_context( struct setup_context *setup ) +{ + FREE( setup ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct setup_context *setup_create_context( struct softpipe_context *softpipe ) +{ + struct setup_context *setup = CALLOC_STRUCT(setup_context); + + setup->softpipe = softpipe; + + setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; + + return setup; +} diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h new file mode 100644 index 00000000000..3133fc2a3d5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef SP_SETUP_H +#define SP_SETUP_H + +struct setup_context; +struct softpipe_context; + +void setup_tri( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ); + +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]); + +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ); + + +struct setup_context *setup_create_context( struct softpipe_context *softpipe ); +void setup_prepare( struct setup_context *setup ); +void setup_destroy_context( struct setup_context *setup ); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 14abb20eeb2..f10a1fa4718 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -71,25 +71,16 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * simply emit the whole post-xform vertex as-is: */ struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; -#if 0 - vinfo_vbuf->num_attribs = 0; - /* special-case to allow memcpy of whole vertex */ - draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - /* size in dwords or floats */ - vinfo_vbuf->size = 4 * draw_num_vs_outputs(softpipe->draw) - + sizeof(struct vertex_header) / 4; -#else - /* for pass-through mode, we need a more explicit list of attribs */ const uint num = draw_num_vs_outputs(softpipe->draw); uint i; + /* No longer any need to try and emit draw vertex_header info. + */ vinfo_vbuf->num_attribs = 0; - draw_emit_vertex_attr(vinfo_vbuf, EMIT_HEADER, INTERP_NONE, 0); for (i = 0; i < num; i++) { draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } draw_compute_vertex_size(vinfo_vbuf); -#endif } /* -- cgit v1.2.3 From b1de9c948f9794db97ed7d34b8cbcdccc206ea77 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 11 Apr 2008 21:37:28 +0200 Subject: nv30: Set pipe status on clear --- src/gallium/drivers/nv30/nv30_clear.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c index 71f413588ee..8c3ca204d58 100644 --- a/src/gallium/drivers/nv30/nv30_clear.c +++ b/src/gallium/drivers/nv30/nv30_clear.c @@ -9,4 +9,5 @@ nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; } -- cgit v1.2.3 From 4c498c1b0c38a3ba93649fa4435937d15689bae1 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 11 Apr 2008 21:37:41 +0200 Subject: nv30: Do flip --- src/gallium/drivers/nv30/nv30_surface.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c index b20a3dd4c15..b22211ac86e 100644 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -42,8 +42,20 @@ nv30_surface_copy(struct pipe_context *pipe, unsigned do_flip, struct nv30_context *nv30 = nv30_context(pipe); struct nouveau_winsys *nvws = nv30->nvws; - nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, - width, height); + if (do_flip) { + /*XXX: This dodgyness will do for now for correctness. But, + * need to investigate whether the 2D engine is able to + * manage a flip (perhaps SIFM?), if not, use the 3D engine + */ + desty += height; + while (height--) { + nvws->surface_copy(nvws, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + } else { + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); + } } static void -- cgit v1.2.3 From e3cf0cd6a9f3f072594e5712763b98ce7e579bcf Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 11 Apr 2008 14:18:07 -0600 Subject: gallium: implement immediates (aka literals) for SSE fragment shaders --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 82 +++++++++++++++++++++++++++-- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 4 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 10 ++-- 3 files changed, 88 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 93813f36cbe..748c53dbfc9 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -225,6 +225,15 @@ get_coef_base( void ) return get_output_base(); } +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + /** * Data access helpers. */ @@ -238,6 +247,16 @@ get_argument( (index + 1) * 4 ); } +static struct x86_reg +get_immediate( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_immediate_base(), + (vec * 4 + chan) * 4 ); +} + static struct x86_reg get_const( unsigned vec, @@ -572,6 +591,25 @@ emit_const( SHUF( 0, 0, 0, 0 ) ); } +static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + emit_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + /** * Copy a shader input to xmm register * \param xmm the destination xmm register @@ -1191,6 +1229,14 @@ emit_fetch( swizzle ); break; + case TGSI_FILE_IMMEDIATE: + emit_immediate( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + case TGSI_FILE_INPUT: emit_inputf( func, @@ -2343,15 +2389,21 @@ tgsi_emit_sse2( * DECLARATION and INSTRUCTION phase. * GP register holding the output argument is aliased with the coeff argument, * as outputs are not needed in the DECLARATION phase. + * + * \param tokens the TGSI input shader + * \param func the output SSE code/function + * \param immediates buffer to place immediates, later passed to SSE func */ unsigned tgsi_emit_sse2_fs( struct tgsi_token *tokens, - struct x86_function *func ) + struct x86_function *func, + float (*immediates)[4]) { struct tgsi_parse_context parse; boolean instruction_phase = FALSE; unsigned ok = 1; + uint num_immediates = 0; DUMP_START(); @@ -2362,6 +2414,7 @@ tgsi_emit_sse2_fs( func, get_input_base(), get_argument( 0 ) ); + /* skipping outputs argument here */ emit_mov( func, get_const_base(), @@ -2374,6 +2427,10 @@ tgsi_emit_sse2_fs( func, get_coef_base(), get_argument( 4 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 5 ) ); tgsi_parse_init( &parse, tokens ); @@ -2407,9 +2464,26 @@ tgsi_emit_sse2_fs( break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX implement this */ - ok = 0; - debug_printf("failed to emit immediate value to SSE\n"); + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } +#if 0 + debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", + num_immediates, + immediates[num_immediates][0], + immediates[num_immediates][1], + immediates[num_immediates][2], + immediates[num_immediates][3]); +#endif + num_immediates++; + } break; default: diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 63b8ef39112..fde06047fe8 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -16,7 +16,9 @@ tgsi_emit_sse2( unsigned tgsi_emit_sse2_fs( struct tgsi_token *tokens, - struct x86_function *function ); + struct x86_function *function, + float (*immediates)[4] + ); #if defined __cplusplus } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 8095d662ee6..9c7948264fe 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -51,7 +51,8 @@ typedef void (XSTDCALL *codegen_function)( struct tgsi_exec_vector *output, float (*constant)[4], struct tgsi_exec_vector *temporary, - const struct tgsi_interp_coef *coef + const struct tgsi_interp_coef *coef, + float (*immediates)[4] //, const struct tgsi_exec_vector *quadPos ); @@ -60,6 +61,7 @@ struct sp_sse_fragment_shader { struct sp_fragment_shader base; struct x86_function sse2_program; codegen_function func; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -96,7 +98,8 @@ fs_sse_run( struct sp_fragment_shader *base, machine->Outputs, machine->Consts, machine->Temps, - machine->InterpCoefs + machine->InterpCoefs, + shader->immediates // , &machine->QuadPos ); @@ -129,7 +132,8 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); - if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program )) { + if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program, + shader->immediates)) { FREE(shader); return NULL; } -- cgit v1.2.3 From 593cf5a6b55eb9b490a2aee2c3850d2d493fc4df Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 11 Apr 2008 15:27:00 -0600 Subject: gallium: merge the tgsi_emit_sse2() and tgsi_emit_sse2_fs() functions. The two functions were mostly the same. We can look at the shader header info to determine if it's a vertex or fragment shader. --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 201 +++++++++++----------------- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 7 - src/gallium/drivers/softpipe/sp_fs_sse.c | 4 +- 3 files changed, 80 insertions(+), 132 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index e55fae6047b..c37e201b2bc 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2313,104 +2313,25 @@ emit_declaration( } } -unsigned -tgsi_emit_sse2( - struct tgsi_token *tokens, - struct x86_function *func, - float (*immediates)[4] ) -{ - struct tgsi_parse_context parse; - unsigned ok = 1; - uint num_immediates = 0; - - DUMP_START(); - - func->csr = func->store; - - emit_mov( - func, - get_input_base(), - get_argument( 0 ) ); - emit_mov( - func, - get_output_base(), - get_argument( 1 ) ); - emit_mov( - func, - get_const_base(), - get_argument( 2 ) ); - emit_mov( - func, - get_temp_base(), - get_argument( 3 ) ); - emit_mov( - func, - get_immediate_base(), - get_argument( 4 ) ); - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - ok = emit_instruction( - func, - &parse.FullToken.FullInstruction ); - - if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE\n", - parse.FullToken.FullInstruction.Instruction.Opcode ); - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* simply copy the immediate values into the next immediates[] slot */ - { - const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; - uint i; - assert(size <= 4); - assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); - for( i = 0; i < size; i++ ) { - immediates[num_immediates][i] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - } - num_immediates++; - } - break; - - default: - assert( 0 ); - ok = 0; - break; - } - } - - tgsi_parse_free( &parse ); - - DUMP_END(); - - return ok; -} /** - * Fragment shaders are responsible for interpolating shader inputs. Because on - * x86 we have only 4 GP registers, and here we have 5 shader arguments (input, - * output, const, temp and coef), the code is split into two phases -- - * DECLARATION and INSTRUCTION phase. - * GP register holding the output argument is aliased with the coeff argument, - * as outputs are not needed in the DECLARATION phase. + * Translate a TGSI vertex/fragment shader to SSE2 code. + * Slightly different things are done for vertex vs. fragment shaders. + * + * Note that fragment shaders are responsible for interpolating shader + * inputs. Because on x86 we have only 4 GP registers, and here we + * have 5 shader arguments (input, output, const, temp and coef), the + * code is split into two phases -- DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff + * argument, as outputs are not needed in the DECLARATION phase. * * \param tokens the TGSI input shader * \param func the output SSE code/function * \param immediates buffer to place immediates, later passed to SSE func + * \param return 1 for success, 0 if translation failed */ unsigned -tgsi_emit_sse2_fs( +tgsi_emit_sse2( struct tgsi_token *tokens, struct x86_function *func, float (*immediates)[4]) @@ -2424,50 +2345,84 @@ tgsi_emit_sse2_fs( func->csr = func->store; - /* DECLARATION phase, do not load output argument. */ - emit_mov( - func, - get_input_base(), - get_argument( 0 ) ); - /* skipping outputs argument here */ - emit_mov( - func, - get_const_base(), - get_argument( 2 ) ); - emit_mov( - func, - get_temp_base(), - get_argument( 3 ) ); - emit_mov( - func, - get_coef_base(), - get_argument( 4 ) ); - emit_mov( - func, - get_immediate_base(), - get_argument( 5 ) ); - tgsi_parse_init( &parse, tokens ); + /* + * Different function args for vertex/fragment shaders: + */ + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + emit_mov( + func, + get_input_base(), + get_argument( 0 ) ); + /* skipping outputs argument here */ + emit_mov( + func, + get_const_base(), + get_argument( 2 ) ); + emit_mov( + func, + get_temp_base(), + get_argument( 3 ) ); + emit_mov( + func, + get_coef_base(), + get_argument( 4 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 5 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + emit_mov( + func, + get_input_base(), + get_argument( 0 ) ); + emit_mov( + func, + get_output_base(), + get_argument( 1 ) ); + emit_mov( + func, + get_const_base(), + get_argument( 2 ) ); + emit_mov( + func, + get_temp_base(), + get_argument( 3 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 4 ) ); + } + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - emit_declaration( - func, - &parse.FullToken.FullDeclaration ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + } break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if( !instruction_phase ) { - /* INSTRUCTION phase, overwrite coeff with output. */ - instruction_phase = TRUE; - emit_mov( - func, - get_output_base(), - get_argument( 1 ) ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + emit_mov( + func, + get_output_base(), + get_argument( 1 ) ); + } } + ok = emit_instruction( func, &parse.FullToken.FullInstruction ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index d1190727d03..d56bf7f98ab 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -15,13 +15,6 @@ tgsi_emit_sse2( float (*immediates)[4] ); -unsigned -tgsi_emit_sse2_fs( - struct tgsi_token *tokens, - struct x86_function *function, - float (*immediates)[4] - ); - #if defined __cplusplus } #endif diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 9c7948264fe..5ef02a71426 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -132,8 +132,8 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); - if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program, - shader->immediates)) { + if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, + shader->immediates)) { FREE(shader); return NULL; } -- cgit v1.2.3 From 186277ee928a7c9ad8a31776f93e502613ad94fd Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 11 Apr 2008 23:39:29 +0200 Subject: nv30: Move some structures and functions from context to screen --- src/gallium/drivers/nv30/nv30_context.c | 185 ++-------------------------- src/gallium/drivers/nv30/nv30_context.h | 71 +++++++++-- src/gallium/drivers/nv30/nv30_query.c | 23 ++-- src/gallium/drivers/nv30/nv30_screen.c | 202 +++++++++++++++++++++++++++---- src/gallium/drivers/nv30/nv30_screen.h | 17 ++- src/gallium/drivers/nv30/nv30_vertprog.c | 31 ++++- 6 files changed, 303 insertions(+), 226 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index b8e8b86d99a..d38713e98ad 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -26,198 +26,36 @@ static void nv30_destroy(struct pipe_context *pipe) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_winsys *nvws = nv30->nvws; if (nv30->draw) draw_destroy(nv30->draw); - - nvws->res_free(&nv30->vertprog.exec_heap); - nvws->res_free(&nv30->vertprog.data_heap); - - nvws->res_free(&nv30->query_heap); - nvws->notifier_free(&nv30->query); - - nvws->notifier_free(&nv30->sync); - - nvws->grobj_free(&nv30->rankine); - FREE(nv30); } -static boolean -nv30_init_hwctx(struct nv30_context *nv30, int rankine_class) -{ - struct nouveau_winsys *nvws = nv30->nvws; - int ret; - int i; - - ret = nvws->grobj_alloc(nvws, rankine_class, &nv30->rankine); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - BEGIN_RING(rankine, NV34TCL_DMA_NOTIFY, 1); - OUT_RING (nv30->sync->handle); - BEGIN_RING(rankine, NV34TCL_DMA_TEXTURE0, 2); - OUT_RING (nvws->channel->vram->handle); - OUT_RING (nvws->channel->gart->handle); - BEGIN_RING(rankine, NV34TCL_DMA_COLOR1, 1); - OUT_RING (nvws->channel->vram->handle); - BEGIN_RING(rankine, NV34TCL_DMA_COLOR0, 2); - OUT_RING (nvws->channel->vram->handle); - OUT_RING (nvws->channel->vram->handle); - BEGIN_RING(rankine, NV34TCL_DMA_VTXBUF0, 2); - OUT_RING (nvws->channel->vram->handle); - OUT_RING (nvws->channel->gart->handle); -/* BEGIN_RING(rankine, NV34TCL_DMA_FENCE, 2); - OUT_RING (0); - OUT_RING (nv30->query->handle);*/ - BEGIN_RING(rankine, NV34TCL_DMA_IN_MEMORY7, 1); - OUT_RING (nvws->channel->vram->handle); - BEGIN_RING(rankine, NV34TCL_DMA_IN_MEMORY8, 1); - OUT_RING (nvws->channel->vram->handle); - - for (i=1; i<8; i++) { - BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); - } - - BEGIN_RING(rankine, 0x220, 1); - OUT_RING (1); - - BEGIN_RING(rankine, 0x03b0, 1); - OUT_RING (0x00100000); - BEGIN_RING(rankine, 0x1454, 1); - OUT_RING (0); - BEGIN_RING(rankine, 0x1d80, 1); - OUT_RING (3); - BEGIN_RING(rankine, 0x1450, 1); - OUT_RING (0x00030004); - - /* NEW */ - BEGIN_RING(rankine, 0x1e98, 1); - OUT_RING (0); - BEGIN_RING(rankine, 0x17e0, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x3f800000); - BEGIN_RING(rankine, 0x1f80, 16); - OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); - OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); - OUT_RING (0x0000ffff); - OUT_RING (0); OUT_RING (0); OUT_RING (0); OUT_RING (0); - OUT_RING (0); OUT_RING (0); OUT_RING (0); - - BEGIN_RING(rankine, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); - - BEGIN_RING(rankine, 0x1d88, 1); - OUT_RING (0x00001200); - - BEGIN_RING(rankine, NV34TCL_RC_ENABLE, 1); - OUT_RING (0); - - BEGIN_RING(rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - - BEGIN_RING(rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); - OUT_RING (0xffff0000); - - /* enables use of vp rather than fixed-function somehow */ - BEGIN_RING(rankine, 0x1e94, 1); - OUT_RING (0x13); - - FIRE_RING (NULL); - return TRUE; -} - -#define NV30TCL_CHIPSET_3X_MASK 0x00000003 -#define NV34TCL_CHIPSET_3X_MASK 0x00000010 -#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 - struct pipe_context * -nv30_create(struct pipe_screen *screen, unsigned pctx_id) +nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) { - struct pipe_winsys *pipe_winsys = screen->winsys; - struct nouveau_winsys *nvws = nv30_screen(screen)->nvws; - unsigned chipset = nv30_screen(screen)->chipset; + struct nv30_screen *screen = nv30_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; struct nv30_context *nv30; - int rankine_class = 0, ret; + unsigned chipset = screen->chipset; + struct nouveau_winsys *nvws = screen->nvws; - if ((chipset & 0xf0) != 0x30) { - NOUVEAU_ERR("Not a NV3X chipset\n"); - return NULL; - } - - if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { - rankine_class = 0x0397; - } else if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { - rankine_class = 0x0697; - } else if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) { - rankine_class = 0x0497; - } else { - NOUVEAU_ERR("Unknown NV3X chipset: NV%02x\n", chipset); - return NULL; - } - - nv30 = CALLOC_STRUCT(nv30_context); + nv30 = CALLOC(1, sizeof(struct nv30_context)); if (!nv30) return NULL; + nv30->screen = screen; + nv30->pctx_id = pctx_id; + nv30->chipset = chipset; nv30->nvws = nvws; - /* Notifier for sync purposes */ - ret = nvws->notifier_alloc(nvws, 1, &nv30->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv30_destroy(&nv30->pipe); - return NULL; - } - - /* Query objects */ - ret = nvws->notifier_alloc(nvws, 32, &nv30->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv30_destroy(&nv30->pipe); - return NULL; - } - - ret = nvws->res_init(&nv30->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv30_destroy(&nv30->pipe); - return NULL; - } - - /* Vtxprog resources */ - if (nvws->res_init(&nv30->vertprog.exec_heap, 0, 256) || - nvws->res_init(&nv30->vertprog.data_heap, 0, 256)) { - nv30_destroy(&nv30->pipe); - return NULL; - } - - /* Static rankine initialisation */ - if (!nv30_init_hwctx(nv30, rankine_class)) { - nv30_destroy(&nv30->pipe); - return NULL; - } - - /* Pipe context setup */ - nv30->pipe.winsys = pipe_winsys; - nv30->pipe.screen = screen; - + nv30->pipe.winsys = ws; + nv30->pipe.screen = pscreen; nv30->pipe.destroy = nv30_destroy; - nv30->pipe.draw_arrays = nv30_draw_arrays; nv30->pipe.draw_elements = nv30_draw_elements; nv30->pipe.clear = nv30_clear; - nv30->pipe.flush = nv30_flush; nv30_init_query_functions(nv30); @@ -226,7 +64,6 @@ nv30_create(struct pipe_screen *screen, unsigned pctx_id) nv30_init_miptree_functions(nv30); nv30->draw = draw_create(); - assert(nv30->draw); draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); return &nv30->pipe; diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index c1cc3eca1e3..180969731bf 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -11,8 +11,9 @@ #include "nouveau/nouveau_gldefs.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv30_context *ctx = nv30 + struct nv30_screen *ctx = nv30->screen #include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" #include "nv30_state.h" @@ -21,23 +22,70 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); -#define NV30_NEW_VERTPROG (1 << 1) -#define NV30_NEW_FRAGPROG (1 << 2) -#define NV30_NEW_ARRAYS (1 << 3) +enum nv30_state_index { + NV30_STATE_FB = 0, + NV30_STATE_VIEWPORT = 1, + NV30_STATE_BLEND = 2, + NV30_STATE_RAST = 3, + NV30_STATE_ZSA = 4, + NV30_STATE_BCOL = 5, + NV30_STATE_CLIP = 6, + NV30_STATE_SCISSOR = 7, + NV30_STATE_STIPPLE = 8, + NV30_STATE_FRAGPROG = 9, + NV30_STATE_VERTPROG = 10, + NV30_STATE_FRAGTEX0 = 11, + NV30_STATE_FRAGTEX1 = 12, + NV30_STATE_FRAGTEX2 = 13, + NV30_STATE_FRAGTEX3 = 14, + NV30_STATE_FRAGTEX4 = 15, + NV30_STATE_FRAGTEX5 = 16, + NV30_STATE_FRAGTEX6 = 17, + NV30_STATE_FRAGTEX7 = 18, + NV30_STATE_FRAGTEX8 = 19, + NV30_STATE_FRAGTEX9 = 20, + NV30_STATE_FRAGTEX10 = 21, + NV30_STATE_FRAGTEX11 = 22, + NV30_STATE_FRAGTEX12 = 23, + NV30_STATE_FRAGTEX13 = 24, + NV30_STATE_FRAGTEX14 = 25, + NV30_STATE_FRAGTEX15 = 26, + NV30_STATE_VERTTEX0 = 27, + NV30_STATE_VERTTEX1 = 28, + NV30_STATE_VERTTEX2 = 29, + NV30_STATE_VERTTEX3 = 30, + NV30_STATE_VTXBUF = 31, + NV30_STATE_VTXFMT = 32, + NV30_STATE_VTXATTR = 33, + NV30_STATE_MAX = 34 +}; + +#include "nv30_screen.h" + +#define NV30_NEW_BLEND (1 << 0) +#define NV30_NEW_RAST (1 << 1) +#define NV30_NEW_ZSA (1 << 2) +#define NV30_NEW_SAMPLER (1 << 3) +#define NV30_NEW_FB (1 << 4) +#define NV30_NEW_STIPPLE (1 << 5) +#define NV30_NEW_SCISSOR (1 << 6) +#define NV30_NEW_VIEWPORT (1 << 7) +#define NV30_NEW_BCOL (1 << 8) +#define NV30_NEW_VERTPROG (1 << 9) +#define NV30_NEW_FRAGPROG (1 << 10) +#define NV30_NEW_ARRAYS (1 << 11) +#define NV30_NEW_UCP (1 << 12) struct nv30_context { struct pipe_context pipe; + struct nouveau_winsys *nvws; + struct nv30_screen *screen; + unsigned pctx_id; struct draw_context *draw; int chipset; - struct nouveau_grobj *rankine; - struct nouveau_notifier *sync; - - /* query objects */ - struct nouveau_notifier *query; - struct nouveau_resource *query_heap; uint32_t dirty; @@ -63,9 +111,6 @@ struct nv30_context { } vb[16]; struct { - struct nouveau_resource *exec_heap; - struct nouveau_resource *data_heap; - struct nv30_vertex_program *active; struct nv30_vertex_program *current; diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index 0c2d941562c..d40d75f2640 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -1,5 +1,4 @@ #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "nv30_context.h" @@ -10,7 +9,7 @@ struct nv30_query { uint64_t result; }; -static inline struct nv30_query * +static INLINE struct nv30_query * nv30_query(struct pipe_query *pipe) { return (struct nv30_query *)pipe; @@ -46,9 +45,18 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - if (nv30->nvws->res_alloc(nv30->query_heap, 1, NULL, &q->object)) + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64 tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nv30->nvws->res_alloc(nv30->screen->query_heap, 1, NULL, &q->object)) assert(0); - nv30->nvws->notifier_reset(nv30->query, q->object->start); + nv30->nvws->notifier_reset(nv30->screen->query, q->object->start); BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); OUT_RING (1); @@ -83,16 +91,17 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { unsigned status; - status = nvws->notifier_status(nv30->query, q->object->start); + status = nvws->notifier_status(nv30->screen->query, + q->object->start); if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { if (wait == FALSE) return FALSE; - nvws->notifier_wait(nv30->query, q->object->start, + nvws->notifier_wait(nv30->screen->query, q->object->start, NV_NOTIFY_STATE_STATUS_COMPLETED, 0); } - q->result = nvws->notifier_retval(nv30->query, + q->result = nvws->notifier_retval(nv30->screen->query, q->object->start); q->ready = TRUE; nvws->res_free(&q->object); diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index c7487b37bc0..ce6c9ec523a 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -4,24 +4,28 @@ #include "nv30_context.h" #include "nv30_screen.h" +#define NV30TCL_CHIPSET_3X_MASK 0x00000003 +#define NV34TCL_CHIPSET_3X_MASK 0x00000010 +#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 + static const char * -nv30_screen_get_name(struct pipe_screen *screen) +nv30_screen_get_name(struct pipe_screen *pscreen) { - struct nv30_screen *nv30screen = nv30_screen(screen); + struct nv30_screen *screen = nv30_screen(pscreen); static char buffer[128]; - snprintf(buffer, sizeof(buffer), "NV%02X", nv30screen->chipset); + snprintf(buffer, sizeof(buffer), "NV%02X", screen->chipset); return buffer; } static const char * -nv30_screen_get_vendor(struct pipe_screen *screen) +nv30_screen_get_vendor(struct pipe_screen *pscreen) { return "nouveau"; } static int -nv30_screen_get_param(struct pipe_screen *screen, int param) +nv30_screen_get_param(struct pipe_screen *pscreen, int param) { switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: @@ -60,7 +64,7 @@ nv30_screen_get_param(struct pipe_screen *screen, int param) } static float -nv30_screen_get_paramf(struct pipe_screen *screen, int param) +nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) { switch (param) { case PIPE_CAP_MAX_LINE_WIDTH: @@ -82,8 +86,8 @@ nv30_screen_get_paramf(struct pipe_screen *screen, int param) } static boolean -nv30_screen_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, uint type) +nv30_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, uint type) { switch (type) { case PIPE_SURFACE: @@ -122,36 +126,184 @@ nv30_screen_is_format_supported(struct pipe_screen *screen, } static void -nv30_screen_destroy(struct pipe_screen *screen) +nv30_screen_destroy(struct pipe_screen *pscreen) { - FREE(screen); + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->res_free(&screen->vp_exec_heap); + nvws->res_free(&screen->vp_data_heap); + nvws->res_free(&screen->query_heap); + nvws->notifier_free(&screen->query); + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->rankine); + + FREE(pscreen); } struct pipe_screen * -nv30_screen_create(struct pipe_winsys *winsys, struct nouveau_winsys *nvws, +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, unsigned chipset) { - struct nv30_screen *nv30screen = CALLOC_STRUCT(nv30_screen); + struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); + struct nouveau_stateobj *so; + unsigned rankine_class = 0; + int ret, i; + + if (!screen) + return NULL; + screen->chipset = chipset; + screen->nvws = nvws; + + /* 3D object */ + switch (chipset & 0xf0) { + case 0x30: + if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0397; + else + if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0697; + else + if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0497; + break; + default: + break; + } - if (!nv30screen) + if (!rankine_class) { + NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", chipset); return NULL; + } + + ret = nvws->grobj_alloc(nvws, rankine_class, &screen->rankine); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->res_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&screen->vp_exec_heap, 0, 256) || + nvws->res_init(&screen->vp_data_heap, 0, 256)) { + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static rankine initialisation */ + so = so_new(128, 0); + so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); +/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle);*/ + so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1); + so_data (so, nvws->channel->vram->handle); + + for (i=1; i<8; i++) { + so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); + so_data (so, 0); + so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); + so_data (so, 0); + } + + so_method(so, screen->rankine, 0x220, 1); + so_data (so, 1); + + so_method(so, screen->rankine, 0x03b0, 1); + so_data (so, 0x00100000); + so_method(so, screen->rankine, 0x1454, 1); + so_data (so, 0); + so_method(so, screen->rankine, 0x1d80, 1); + so_data (so, 3); + so_method(so, screen->rankine, 0x1450, 1); + so_data (so, 0x00030004); + + /* NEW */ + so_method(so, screen->rankine, 0x1e98, 1); + so_data (so, 0); + so_method(so, screen->rankine, 0x17e0, 3); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_method(so, screen->rankine, 0x1f80, 16); + for (i=0; i<16; i++) { + so_data (so, (i==8) ? 0x0000ffff : 0); + } + + so_method(so, screen->rankine, 0x120, 3); + so_data (so, 0); + so_data (so, 1); + so_data (so, 2); + + so_method(so, screen->rankine, 0x1d88, 1); + so_data (so, 0x00001200); + + so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1); + so_data (so, 0); + + so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); + so_data (so, 0xffff0000); + + /* enables use of vp rather than fixed-function somehow */ + so_method(so, screen->rankine, 0x1e94, 1); + so_data (so, 0x13); - nv30screen->chipset = chipset; - nv30screen->nvws = nvws; + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws, 0, NULL); - nv30screen->screen.winsys = winsys; + screen->pipe.winsys = ws; + screen->pipe.destroy = nv30_screen_destroy; - nv30screen->screen.destroy = nv30_screen_destroy; + screen->pipe.get_name = nv30_screen_get_name; + screen->pipe.get_vendor = nv30_screen_get_vendor; + screen->pipe.get_param = nv30_screen_get_param; + screen->pipe.get_paramf = nv30_screen_get_paramf; - nv30screen->screen.get_name = nv30_screen_get_name; - nv30screen->screen.get_vendor = nv30_screen_get_vendor; - nv30screen->screen.get_param = nv30_screen_get_param; - nv30screen->screen.get_paramf = nv30_screen_get_paramf; - nv30screen->screen.is_format_supported = - nv30_screen_is_format_supported; + screen->pipe.is_format_supported = nv30_screen_surface_format_supported; - nv30_screen_init_miptree_functions(&nv30screen->screen); + nv30_screen_init_miptree_functions(&screen->pipe); - return &nv30screen->screen; + return &screen->pipe; } diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h index f878f81e11f..56f8776a173 100644 --- a/src/gallium/drivers/nv30/nv30_screen.h +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -4,10 +4,25 @@ #include "pipe/p_screen.h" struct nv30_screen { - struct pipe_screen screen; + struct pipe_screen pipe; struct nouveau_winsys *nvws; unsigned chipset; + + /* HW graphics objects */ + struct nouveau_grobj *rankine; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV30_STATE_MAX]; }; static INLINE struct nv30_screen * diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index fe1a467565d..71aea3a59cb 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -1,7 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_parse.h" @@ -654,7 +653,7 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) /* Allocate hw vtxprog exec slots */ if (!vp->exec) { - struct nouveau_resource *heap = nv30->vertprog.exec_heap; + struct nouveau_resource *heap = nv30->screen->vp_exec_heap; uint vplen = vp->nr_insns; if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { @@ -674,7 +673,7 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) /* Allocate hw vtxprog const slots */ if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv30->vertprog.data_heap; + struct nouveau_resource *heap = nv30->screen->vp_data_heap; if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { while (heap->next && heap->size < vp->nr_consts) { @@ -789,9 +788,29 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) void nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) { - if (vp->nr_consts) - FREE(vp->consts); - if (vp->nr_insns) + struct nouveau_winsys *nvws = nv30->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + /* vp->ir = vp->or = vp->clip_ctrl = 0; + so_ref(NULL, &vp->so); */ } -- cgit v1.2.3 From 7f811f2c42937f254ae1b11e5b0ece765a8ea31b Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Apr 2008 22:16:42 +0200 Subject: nv30: Update miptree stuff --- src/gallium/drivers/nv30/nv30_miptree.c | 37 +++++++++++++++++---------------- 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index afb05fdd2bf..10ab46e19a5 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -4,7 +4,6 @@ #include "pipe/p_inlines.h" #include "nv30_context.h" -#include "nv30_screen.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) @@ -13,7 +12,7 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) boolean swizzled = FALSE; uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; uint offset = 0; - int nr_faces, l, f; + int nr_faces, l, f, pitch; if (pt->target == PIPE_TEXTURE_CUBE) { nr_faces = 6; @@ -24,17 +23,17 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) nr_faces = 1; } + pitch = pt->width[0]; for (l = 0; l <= pt->last_level; l++) { pt->width[l] = width; pt->height[l] = height; pt->depth[l] = depth; if (swizzled) - nv30mt->level[l].pitch = pt->width[l] * pt->cpp; - else - nv30mt->level[l].pitch = pt->width[0] * pt->cpp; - nv30mt->level[l].pitch = (nv30mt->level[l].pitch + 63) & ~63; + pitch = pt->width[l]; + pitch = (pitch + 63) & ~63; + nv30mt->level[l].pitch = pitch * pt->cpp; nv30mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); @@ -55,9 +54,9 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) } static struct pipe_texture * -nv30_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { - struct pipe_winsys *ws = screen->winsys; + struct pipe_winsys *ws = pscreen->winsys; struct nv30_miptree *mt; mt = MALLOC(sizeof(struct nv30_miptree)); @@ -65,24 +64,26 @@ nv30_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) return NULL; mt->base = *pt; mt->base.refcount = 1; - mt->base.screen = screen; + mt->base.screen = pscreen; nv30_miptree_layout(mt); - mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, - mt->total_size); + mt->buffer = ws->buffer_create(ws, 256, + PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE, + mt->total_size); if (!mt->buffer) { FREE(mt); return NULL; } - + return &mt->base; } static void -nv30_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) { - struct pipe_winsys *ws = screen->winsys; + struct pipe_winsys *ws = pscreen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -106,10 +107,10 @@ nv30_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, } static struct pipe_surface * -nv30_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) +nv30_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) { - struct pipe_winsys *ws = screen->winsys; + struct pipe_winsys *ws = pscreen->winsys; struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; struct pipe_surface *ps; @@ -146,6 +147,6 @@ nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) { pscreen->texture_create = nv30_miptree_create; pscreen->texture_release = nv30_miptree_release; - pscreen->get_tex_surface = nv30_miptree_surface_get; + pscreen->get_tex_surface = nv30_miptree_surface; } -- cgit v1.2.3 From 8e7326832a7420154fc0d526ac682494db1be160 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Mon, 14 Apr 2008 11:32:50 +0100 Subject: softpipe: do our own culling, don't rely on the draw module. May not always happen due to passthrough modes, etc. --- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 21 +++++++++++--- src/gallium/drivers/softpipe/sp_setup.c | 44 +++++++++++++++++------------ 2 files changed, 43 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 025a0113bf1..74cd675908c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -120,8 +120,9 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) /** - * Recalculate prim's determinant. - * XXX is this needed? + * Recalculate prim's determinant. This is needed as we don't have + * get this information through the vbuf_render interface & we must + * calculate it here. */ static float calc_det( const float (*v0)[4], @@ -144,12 +145,21 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; - struct draw_stage *setup = softpipe->setup; unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i, j; void *vertex_buffer = cvbr->vertex_buffer; cptrf4 v[3]; - struct setup_context *setup_ctx = sp_draw_setup_context(setup); + + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct draw_stage *setup = softpipe->setup; + struct setup_context *setup_ctx = sp_draw_setup_context(softpipe->setup); + + /* XXX: call this from allocate_vertices: + */ + setup_prepare( setup_ctx ); + switch (cvbr->prim) { case PIPE_PRIM_TRIANGLES: @@ -189,6 +199,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) break; } + /* XXX: why are we calling this??? If we had to call something, it + * would be a function in sp_setup.c: + */ sp_draw_flush( setup ); } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 48617a66ed2..5a307888504 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -97,31 +97,35 @@ struct setup_context { uint numFragsEmitted; /**< per primitive */ uint numFragsWritten; /**< per primitive */ #endif + + unsigned winding; /* which winding to cull */ }; -/** - * Recalculate prim's determinant. - * XXX is this needed? - */ -static INLINE float -calc_det( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static boolean cull_tri( struct setup_context *setup, + float det ) { - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; + if (det != 0) + { + /* if (det < 0 then Z points toward camera and triangle is + * counter-clockwise winding. + */ + unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; + + if ((winding & setup->winding) == 0) + return FALSE; + } + + /* Culled: + */ + return TRUE; } + + /** * Clip setup->quad against the scissor/surface bounds. */ @@ -709,8 +713,10 @@ void setup_tri( struct setup_context *setup, setup->numFragsWritten = 0; #endif - setup_sort_vertices( setup, calc_det(v0, v1, v2), - v0, v1, v2 ); + if (cull_tri( setup, det )) + return; + + setup_sort_vertices( setup, det, v0, v1, v2 ); setup_tri_coefficients( setup ); setup_tri_edges( setup ); @@ -1223,6 +1229,8 @@ void setup_prepare( struct setup_context *setup ) setup->quad.nr_attrs = fs->info.num_inputs; sp->quad.first->begin(sp->quad.first); } + + setup->winding = sp->rasterizer->cull_mode; } -- cgit v1.2.3 From 871d39ec8c168fa58d8758013e99da63fa58111d Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Mon, 14 Apr 2008 16:18:00 +0100 Subject: softpipe: calculate determinant for all triangles, don't rely on draw module to do it --- src/gallium/drivers/softpipe/sp_prim_setup.c | 1 - src/gallium/drivers/softpipe/sp_prim_vbuf.c | 28 ---------------------------- src/gallium/drivers/softpipe/sp_setup.c | 26 +++++++++++++++++++++++++- src/gallium/drivers/softpipe/sp_setup.h | 10 +++++----- 4 files changed, 30 insertions(+), 35 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 6fe463b74c7..0ddb06764af 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -71,7 +71,6 @@ do_tri(struct draw_stage *stage, struct prim_header *prim) struct setup_stage *setup = setup_stage( stage ); setup_tri( setup->setup, - prim->det, prim->v[0]->data, prim->v[1]->data, prim->v[2]->data ); diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 74cd675908c..4fed19ecb6d 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -119,25 +119,6 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) } -/** - * Recalculate prim's determinant. This is needed as we don't have - * get this information through the vbuf_render interface & we must - * calculate it here. - */ -static float -calc_det( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; -} static void @@ -169,7 +150,6 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) indices[i+j] * vertex_size); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2]); @@ -254,7 +234,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i + 1); v[2] = VERTEX(i + 2); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -267,7 +246,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i - 1); v[2] = VERTEX(i); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -280,7 +258,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i - 1); v[2] = VERTEX(i); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -293,7 +270,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i + 1); v[2] = VERTEX(i + 2); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -302,7 +278,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i + 2); v[2] = VERTEX(i + 3); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -315,7 +290,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i); v[2] = VERTEX(i + 1); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -324,7 +298,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i + 1); v[2] = VERTEX(i - 1); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -337,7 +310,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i - 1); v[2] = VERTEX(i); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 5a307888504..0164d0588d6 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -695,15 +695,37 @@ static void subtriangle( struct setup_context *setup, } +/** + * Recalculate prim's determinant. This is needed as we don't have + * get this information through the vbuf_render interface & we must + * calculate it here. + */ +static float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + + /** * Do setup for triangle rasterization, then render the triangle. */ void setup_tri( struct setup_context *setup, - float det, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { + float det = calc_det(v0, v1, v2); + /* debug_printf("%s\n", __FUNCTION__ ); */ @@ -713,6 +735,8 @@ void setup_tri( struct setup_context *setup, setup->numFragsWritten = 0; #endif + + if (cull_tri( setup, det )) return; diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h index 3133fc2a3d5..d54f3344288 100644 --- a/src/gallium/drivers/softpipe/sp_setup.h +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -30,11 +30,11 @@ struct setup_context; struct softpipe_context; -void setup_tri( struct setup_context *setup, - float det, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ); +void +setup_tri( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ); void setup_line(struct setup_context *setup, -- cgit v1.2.3 From f58ab8e75c082a0aea36ed63fe7e21fb5fac14b6 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 14 Apr 2008 10:56:56 -0600 Subject: gallium: take reduced prim, fill modes into account when culling --- src/gallium/drivers/softpipe/sp_context.h | 2 ++ src/gallium/drivers/softpipe/sp_draw_arrays.c | 15 +++++++++++++++ src/gallium/drivers/softpipe/sp_setup.c | 11 ++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 2442bd1eb00..0e1d5e561d9 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -105,6 +105,8 @@ struct softpipe_context { int psize_slot; + unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */ + #if 0 /* Stipple derived state: */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 61bcf518993..421509495a0 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -78,6 +78,20 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) } +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -115,6 +129,7 @@ softpipe_draw_elements(struct pipe_context *pipe, assert(0); #endif + sp->reduced_api_prim = reduced_prim[mode]; if (sp->dirty) softpipe_update_derived( sp ); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0164d0588d6..813d7031083 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1254,7 +1254,16 @@ void setup_prepare( struct setup_context *setup ) sp->quad.first->begin(sp->quad.first); } - setup->winding = sp->rasterizer->cull_mode; + if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && + sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && + sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { + /* we'll do culling */ + setup->winding = sp->rasterizer->cull_mode; + } + else { + /* 'draw' will do culling */ + setup->winding = PIPE_WINDING_NONE; + } } -- cgit v1.2.3 From 5b8fa518476868530d748ce6d03674e9cca3d89f Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 14 Apr 2008 23:55:36 +0900 Subject: gallium: Don't assume snprintf are always available. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 9 ++-- src/gallium/auxiliary/util/p_debug.c | 7 +-- src/gallium/auxiliary/util/u_snprintf.c | 16 +++--- src/gallium/auxiliary/util/u_string.h | 63 ++++++++++++++++++++++ .../drivers/i915simple/i915_fpc_translate.c | 3 +- src/gallium/drivers/i915simple/i915_screen.c | 3 +- src/gallium/drivers/i965simple/brw_screen.c | 3 +- src/gallium/include/pipe/p_format.h | 4 +- src/gallium/include/pipe/p_util.h | 8 --- 9 files changed, 88 insertions(+), 28 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_string.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index cb3573ceb66..ff6a2c4194d 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -30,6 +30,7 @@ #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_parse.h" #include "tgsi_build.h" @@ -147,7 +148,7 @@ gen_dump_uix( { char str[36]; - sprintf( str, "0x%x", ui ); + util_snprintf( str, sizeof(str), "0x%x", ui ); gen_dump_str( dump, str ); } @@ -158,7 +159,7 @@ gen_dump_uid( { char str[16]; - sprintf( str, "%u", ui ); + util_snprintf( str, sizeof(str), "%u", ui ); gen_dump_str( dump, str ); } @@ -169,7 +170,7 @@ gen_dump_sid( { char str[16]; - sprintf( str, "%d", si ); + util_snprintf( str, sizeof(str), "%d", si ); gen_dump_str( dump, str ); } @@ -180,7 +181,7 @@ gen_dump_flt( { char str[48]; - sprintf( str, "%10.4f", flt ); + util_snprintf( str, sizeof(str), "%10.4f", flt ); gen_dump_str( dump, str ); } diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 090e3b7794d..f9366467cd5 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -39,6 +39,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_string.h" #ifdef WIN32 @@ -60,7 +61,7 @@ void _debug_vprintf(const char *format, va_list ap) /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation */ char buf[512 + 1]; - vsnprintf(buf, sizeof(buf), format, ap); + util_vsnprintf(buf, sizeof(buf), format, ap); _EngDebugPrint("%s", buf); #else /* TODO: Implement debug print for WINCE */ @@ -311,7 +312,7 @@ debug_dump_enum(const struct debug_named_value *names, ++names; } - snprintf(rest, sizeof(rest), "0x%08lx", value); + util_snprintf(rest, sizeof(rest), "0x%08lx", value); return rest; } @@ -344,7 +345,7 @@ debug_dump_flags(const struct debug_named_value *names, else first = 0; - snprintf(rest, sizeof(rest), "0x%08lx", value); + util_snprintf(rest, sizeof(rest), "0x%08lx", value); strncat(output, rest, sizeof(output)); } diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index 48426abcb7a..c4f4bbd30c2 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -166,8 +166,8 @@ #include <config.h> #else #ifdef WIN32 -#define vsnprintf rpl_vsnprintf -#define snprintf rpl_snprintf +#define vsnprintf util_vsnprintf +#define snprintf util_snprintf #define HAVE_VSNPRINTF 0 #define HAVE_SNPRINTF 0 #define HAVE_VASPRINTF 1 /* not needed */ @@ -445,7 +445,7 @@ static UINTMAX_T myround(LDOUBLE); static LDOUBLE mypow10(int); int -rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) +util_vsnprintf(char *str, size_t size, const char *format, va_list args) { LDOUBLE fvalue; INTMAX_T value; @@ -1404,7 +1404,7 @@ mymemcpy(void *dst, void *src, size_t len) #endif /* NEED_MYMEMCPY */ int -rpl_vasprintf(char **ret, const char *format, va_list ap) +util_vasprintf(char **ret, const char *format, va_list ap) { size_t size; int len; @@ -1422,10 +1422,10 @@ rpl_vasprintf(char **ret, const char *format, va_list ap) #if !HAVE_SNPRINTF #if HAVE_STDARG_H int -rpl_snprintf(char *str, size_t size, const char *format, ...) +util_snprintf(char *str, size_t size, const char *format, ...) #else int -rpl_snprintf(va_alist) va_dcl +util_snprintf(va_alist) va_dcl #endif /* HAVE_STDARG_H */ { #if !HAVE_STDARG_H @@ -1449,10 +1449,10 @@ rpl_snprintf(va_alist) va_dcl #if !HAVE_ASPRINTF #if HAVE_STDARG_H int -rpl_asprintf(char **ret, const char *format, ...) +util_asprintf(char **ret, const char *format, ...) #else int -rpl_asprintf(va_alist) va_dcl +util_asprintf(va_alist) va_dcl #endif /* HAVE_STDARG_H */ { #if !HAVE_STDARG_H diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h new file mode 100644 index 00000000000..b99d4e8021c --- /dev/null +++ b/src/gallium/auxiliary/util/u_string.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Platform independent functions for string manipulation. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_STRING_H_ +#define U_STRING_H_ + +#ifndef WIN32 +#include <stdio.h> +#endif +#include <stddef.h> +#include <stdarg.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef WIN32 +int util_vsnprintf(char *, size_t, const char *, va_list); +int util_snprintf(char *str, size_t size, const char *format, ...); +#else +#define util_vsnprintf vsnprintf +#define util_snprintf snprintf +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_STRING_H_ */ diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 7b4fca5db1b..3ccf74c72c4 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,6 +33,7 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" +#include "util/u_string.h" #include "tgsi/util/tgsi_parse.h" #include "tgsi/util/tgsi_dump.h" @@ -122,7 +123,7 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) debug_printf("i915_program_error: "); va_start( args, msg ); - vsprintf( buffer, msg, args ); + util_vsnprintf( buffer, sizeof(buffer), msg, args ); va_end( args ); debug_printf(buffer); debug_printf("\n"); diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 839b98c0cec..9ae594ce54c 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -28,6 +28,7 @@ #include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_string.h" #include "i915_reg.h" #include "i915_context.h" @@ -78,7 +79,7 @@ i915_get_name( struct pipe_screen *pscreen ) break; } - sprintf(buffer, "i915 (chipset: %s)", chipset); + util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index 5be369fe521..6845c7abde2 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -28,6 +28,7 @@ #include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_string.h" #include "brw_context.h" #include "brw_screen.h" @@ -66,7 +67,7 @@ brw_get_name( struct pipe_screen *screen ) break; } - sprintf(buffer, "i965 (chipset: %s)", chipset); + util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 9e0f91f202b..ef9e3a3d6c0 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -28,7 +28,7 @@ #ifndef PIPE_FORMAT_H #define PIPE_FORMAT_H -#include <stdio.h> /* for sprintf */ +#include "util/u_string.h" #include "p_compiler.h" #include "p_debug.h" @@ -367,7 +367,7 @@ static INLINE char *pf_sprint_name( char *str, enum pipe_format format ) strcat( str, "S" ); break; } - sprintf( &str[strlen( str )], "%u", size * scale ); + util_snprintf( &str[strlen( str )], 32, "%u", size * scale ); } if (i != 0) { strcat( str, "_" ); diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 8e3aaee496f..dbca080a4b9 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -138,14 +138,6 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define GETENV( X ) debug_get_option( X, NULL ) -#ifdef WIN32 -int rpl_vsnprintf(char *, size_t, const char *, va_list); -int rpl_snprintf(char *str, size_t size, const char *format, ...); -#define vsnprintf rpl_vsnprintf -#define snprintf rpl_snprintf -#endif - - /** * Return memory on given byte alignment */ -- cgit v1.2.3 From fdd794dcfa33482bdabe7c04ec9df655e0c69bfc Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 14 Apr 2008 20:55:14 -0600 Subject: gallium: fix PIPE_CAP_MAX_RENDER_TARGETS query --- src/gallium/drivers/softpipe/sp_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 1850a1ced33..7dacb1c4613 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -68,7 +68,7 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; + return PIPE_MAX_COLOR_BUFS; case PIPE_CAP_OCCLUSION_QUERY: return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: -- cgit v1.2.3 From d3878b070b7b5084526b65499737cc686a6039b6 Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 14 Apr 2008 21:01:40 -0600 Subject: gallium: enable new quad output code, remove old code --- src/gallium/drivers/softpipe/sp_quad_fs.c | 36 ------------------------------- 1 file changed, 36 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 9a20c056a22..8dbdbe57649 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -88,7 +88,6 @@ shade_quad( &qss->machine, quad ); -#if 0 /* XXX multi color outputs - untested */ /* store outputs */ boolean z_written = FALSE; { @@ -133,41 +132,6 @@ shade_quad( quad->outputs.depth[2] = z0 + dzdy; quad->outputs.depth[3] = z0 + dzdx + dzdy; } -#endif - - /* store result color(s) */ - if (qss->colorOutSlot >= 0) { - /* XXX need to handle multiple color outputs someday */ - assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - memcpy( - quad->outputs.color[0], - &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], - sizeof( quad->outputs.color[0] ) ); - } - - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; - } - } - else { - /* compute Z values now, as in the quad earlyz stage */ - /* XXX we should really only do this if the earlyz stage is not used */ - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; - const float dzdx = quad->posCoef->dadx[2]; - const float dzdy = quad->posCoef->dady[2]; - const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - - quad->outputs.depth[0] = z0; - quad->outputs.depth[1] = z0 + dzdx; - quad->outputs.depth[2] = z0 + dzdy; - quad->outputs.depth[3] = z0 + dzdx + dzdy; - } /* shader may cull fragments */ if( quad->mask ) { -- cgit v1.2.3 From 7d2085bd7d9378703bfff3d974536ff5e9fd9b30 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 15 Apr 2008 13:25:28 +1000 Subject: nouveau: PIPE_ATTRIB_MAX -> PIPE_MAX_ATTRIBS --- src/gallium/drivers/nv10/nv10_context.h | 4 ++-- src/gallium/drivers/nv10/nv10_vbo.c | 4 ++-- src/gallium/drivers/nv30/nv30_context.h | 4 ++-- src/gallium/drivers/nv40/nv40_context.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 63d33ef7c91..b80f36ad34c 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -96,8 +96,8 @@ struct nv10_context { struct pipe_buffer *constant_buf; } fragprog; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; }; static INLINE struct nv10_context * diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 3a4f49e1564..2a334e137d9 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -23,7 +23,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv10->vtxbuf[i].buffer) { void *buf = pipe->winsys->buffer_map(pipe->winsys, @@ -52,7 +52,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, /* * unmap vertex/index buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv10->vtxbuf[i].buffer) { pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 180969731bf..1a016405e62 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -124,8 +124,8 @@ struct nv30_context { struct pipe_buffer *constant_buf; } fragprog; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; }; static INLINE struct nv30_context * diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 436351b6bcc..2f10540ff0c 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -151,9 +151,9 @@ struct nv40_context { unsigned nr_samplers; unsigned nr_textures; unsigned dirty_samplers; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; - struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; unsigned vtxelt_nr; }; -- cgit v1.2.3 From 50bbbbe581edd6b8d4fe9f8ba7f134e17dc80a0b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 15 Apr 2008 14:58:32 +0900 Subject: gallium: Remove middle of scope declarations. --- src/gallium/drivers/softpipe/sp_quad_fs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 8dbdbe57649..625d0f9b489 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -77,7 +77,8 @@ shade_quad( struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = &qss->machine; - + boolean z_written; + /* Consts do not require 16 byte alignment. */ machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; @@ -89,7 +90,7 @@ shade_quad( quad ); /* store outputs */ - boolean z_written = FALSE; + z_written = FALSE; { const ubyte *sem_name = softpipe->fs->info.output_semantic_name; const ubyte *sem_index = softpipe->fs->info.output_semantic_index; -- cgit v1.2.3 From 8b607f42d094e61432c5718b8baa0a68268ec150 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Wed, 16 Apr 2008 12:20:39 +0100 Subject: softpipe: call setup_prepare earlier so that vertex info is correct when queried --- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 4fed19ecb6d..e063fe82efc 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -106,6 +106,16 @@ static boolean sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct setup_context *setup_ctx = sp_draw_setup_context(cvbr->softpipe->setup); + + setup_prepare( setup_ctx ); + + + if (prim == PIPE_PRIM_TRIANGLES || prim == PIPE_PRIM_LINES || prim == PIPE_PRIM_POINTS) { @@ -136,10 +146,6 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) */ struct draw_stage *setup = softpipe->setup; struct setup_context *setup_ctx = sp_draw_setup_context(softpipe->setup); - - /* XXX: call this from allocate_vertices: - */ - setup_prepare( setup_ctx ); switch (cvbr->prim) { -- cgit v1.2.3 From ce454d2192918ae4b2535d0e76c68ebde3c4653f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 17 Apr 2008 11:13:05 -0600 Subject: gallium: assert that we're not deleting the currently bound shader --- src/gallium/drivers/softpipe/sp_state_fs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 4eefd1d61f5..2921066ce36 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -82,7 +82,10 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) void softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { + struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state = fs; + + assert(fs != softpipe->fs); state->delete( state ); } -- cgit v1.2.3 From f631bebe1a05c8af863e514546763433343b7c53 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Fri, 18 Apr 2008 17:34:55 +0100 Subject: softpipe: fix const-related compiler warnings --- src/gallium/drivers/softpipe/sp_fs_sse.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 5ef02a71426..f857d26143e 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -49,7 +49,7 @@ typedef void (XSTDCALL *codegen_function)( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, - float (*constant)[4], + const float (*constant)[4], struct tgsi_exec_vector *temporary, const struct tgsi_interp_coef *coef, float (*immediates)[4] @@ -67,9 +67,9 @@ struct sp_sse_fragment_shader { static void -fs_sse_prepare( struct sp_fragment_shader *base, - struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers ) +fs_sse_prepare( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct tgsi_sampler *samplers ) { } @@ -80,9 +80,9 @@ fs_sse_prepare( struct sp_fragment_shader *base, * TODO: process >1 quad at a time */ static unsigned -fs_sse_run( struct sp_fragment_shader *base, - struct tgsi_exec_machine *machine, - struct quad_header *quad ) +fs_sse_run( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct quad_header *quad ) { struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base; -- cgit v1.2.3 From c717f1fbe25f16ce6e607b0f7319ce74f9ae99b4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Sat, 19 Apr 2008 00:43:58 +0100 Subject: softpipe: don't need to trim prims --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 421509495a0..778291dded7 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -118,17 +118,6 @@ softpipe_draw_elements(struct pipe_context *pipe, struct draw_context *draw = sp->draw; unsigned i; - /* first, check that the primitive is not malformed. It is the - * state tracker's responsibility to do send only correctly formed - * primitives down. It currently isn't doing that though... - */ -#if 1 - count = draw_trim_prim( mode, count ); -#else - if (!draw_validate_prim( mode, count )) - assert(0); -#endif - sp->reduced_api_prim = reduced_prim[mode]; if (sp->dirty) -- cgit v1.2.3 From 43452886e2e33e33bdc57abe7e0b4af0abbbd2b1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Sat, 19 Apr 2008 00:44:09 +0100 Subject: cell: don't need to trim prims --- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 36af5be5f02..6e08cf6fe88 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -101,17 +101,6 @@ cell_draw_elements(struct pipe_context *pipe, struct draw_context *draw = sp->draw; unsigned i; - /* first, check that the primitive is not malformed. It is the - * state tracker's responsibility to do send only correctly formed - * primitives down. It currently isn't doing that though... - */ -#if 1 - count = draw_trim_prim( mode, count ); -#else - if (!draw_validate_prim( mode, count )) - assert(0); -#endif - if (sp->dirty) cell_update_derived( sp ); -- cgit v1.2.3 From 507fbe2d327efb8d608ce8e07436b97321560808 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Sat, 19 Apr 2008 15:29:27 +0100 Subject: draw: move some pipeline-specific code & state to draw_pipe.[ch] --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_context.c | 141 +++++-------------- src/gallium/auxiliary/draw/draw_pipe.c | 161 ++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pipe.h | 110 +++++++++++++++ src/gallium/auxiliary/draw/draw_pipe_aaline.c | 1 + src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 1 + src/gallium/auxiliary/draw/draw_pipe_clip.c | 11 +- src/gallium/auxiliary/draw/draw_pipe_cull.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 1 + src/gallium/auxiliary/draw/draw_pipe_offset.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 1 + src/gallium/auxiliary/draw/draw_pipe_validate.c | 17 +-- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 1 + src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 1 + src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 5 +- src/gallium/auxiliary/draw/draw_private.h | 132 ++++-------------- src/gallium/auxiliary/draw/draw_pt.c | 18 --- src/gallium/auxiliary/draw/draw_pt_pipeline.c | 1 + src/gallium/auxiliary/draw/draw_pt_post_vs.c | 11 ++ src/gallium/auxiliary/draw/draw_vertex.c | 2 - src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_prim_emit.c | 5 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 2 +- src/mesa/state_tracker/st_cb_feedback.c | 2 +- src/mesa/state_tracker/st_cb_rasterpos.c | 2 +- 28 files changed, 377 insertions(+), 262 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pipe.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 62f46c6db1e..4fffd11b770 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -5,6 +5,7 @@ LIBNAME = draw C_SOURCES = \ draw_context.c \ + draw_pipe.c \ draw_pipe_aaline.c \ draw_pipe_aapoint.c \ draw_pipe_clip.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 6012bc155ef..fa6791fa0b2 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -35,6 +35,8 @@ #include "draw_context.h" #include "draw_vbuf.h" #include "draw_vs.h" +#include "draw_pt.h" +#include "draw_pipe.h" struct draw_context *draw_create( void ) @@ -49,32 +51,6 @@ struct draw_context *draw_create( void ) draw->use_sse = FALSE; #endif - /* create pipeline stages */ - draw->pipeline.wide_line = draw_wide_line_stage( draw ); - draw->pipeline.wide_point = draw_wide_point_stage( draw ); - draw->pipeline.stipple = draw_stipple_stage( draw ); - draw->pipeline.unfilled = draw_unfilled_stage( draw ); - draw->pipeline.twoside = draw_twoside_stage( draw ); - draw->pipeline.offset = draw_offset_stage( draw ); - draw->pipeline.clip = draw_clip_stage( draw ); - draw->pipeline.flatshade = draw_flatshade_stage( draw ); - draw->pipeline.cull = draw_cull_stage( draw ); - draw->pipeline.validate = draw_validate_stage( draw ); - draw->pipeline.first = draw->pipeline.validate; - - if (!draw->pipeline.wide_line || - !draw->pipeline.wide_point || - !draw->pipeline.stipple || - !draw->pipeline.unfilled || - !draw->pipeline.twoside || - !draw->pipeline.offset || - !draw->pipeline.clip || - !draw->pipeline.flatshade || - !draw->pipeline.cull || - !draw->pipeline.validate) - goto fail; - - ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -83,11 +59,6 @@ struct draw_context *draw_create( void ) ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ draw->nr_planes = 6; - /* these defaults are oriented toward the needs of softpipe */ - draw->wide_point_threshold = 1000000.0; /* infinity */ - draw->wide_line_threshold = 1.0; - draw->line_stipple = TRUE; - draw->point_sprite = TRUE; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -100,6 +71,8 @@ struct draw_context *draw_create( void ) draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + if (!draw_pipeline_init( draw )) + goto fail; if (!draw_pt_init( draw )) goto fail; @@ -117,39 +90,13 @@ void draw_destroy( struct draw_context *draw ) if (!draw) return; - if (draw->pipeline.wide_line) - draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); - if (draw->pipeline.wide_point) - draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); - if (draw->pipeline.stipple) - draw->pipeline.stipple->destroy( draw->pipeline.stipple ); - if (draw->pipeline.unfilled) - draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); - if (draw->pipeline.twoside) - draw->pipeline.twoside->destroy( draw->pipeline.twoside ); - if (draw->pipeline.offset) - draw->pipeline.offset->destroy( draw->pipeline.offset ); - if (draw->pipeline.clip) - draw->pipeline.clip->destroy( draw->pipeline.clip ); - if (draw->pipeline.flatshade) - draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); - if (draw->pipeline.cull) - draw->pipeline.cull->destroy( draw->pipeline.cull ); - if (draw->pipeline.validate) - draw->pipeline.validate->destroy( draw->pipeline.validate ); - if (draw->pipeline.aaline) - draw->pipeline.aaline->destroy( draw->pipeline.aaline ); - if (draw->pipeline.aapoint) - draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); - if (draw->pipeline.pstipple) - draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); - if (draw->pipeline.rasterize) - draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); if (draw->machine.Inputs) align_free(draw->machine.Inputs); + if (draw->machine.Outputs) align_free(draw->machine.Outputs); + tgsi_exec_machine_free_data(&draw->machine); /* Not so fast -- we're just borrowing this at the moment. @@ -158,6 +105,7 @@ void draw_destroy( struct draw_context *draw ) draw->render->destroy( draw->render ); */ + draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); FREE( draw ); @@ -284,7 +232,7 @@ void draw_wide_point_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->wide_point_threshold = threshold; + draw->pipeline.wide_point_threshold = threshold; } @@ -296,7 +244,7 @@ void draw_wide_line_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->wide_line_threshold = threshold; + draw->pipeline.wide_line_threshold = threshold; } @@ -307,7 +255,7 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->line_stipple = enable; + draw->pipeline.line_stipple = enable; } @@ -318,7 +266,7 @@ void draw_enable_point_sprites(struct draw_context *draw, boolean enable) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->point_sprite = enable; + draw->pipeline.point_sprite = enable; } @@ -373,36 +321,6 @@ draw_num_vs_outputs(struct draw_context *draw) } -/** - * Allocate space for temporary post-transform vertices, such as for clipping. - */ -void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) -{ - assert(!stage->tmp); - - stage->nr_tmps = nr; - - if (nr) { - ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); - unsigned i; - - stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); - - for (i = 0; i < nr; i++) - stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); - } -} - - -void draw_free_temp_verts( struct draw_stage *stage ) -{ - if (stage->tmp) { - FREE( stage->tmp[0] ); - FREE( stage->tmp ); - stage->tmp = NULL; - } -} - boolean draw_use_sse(struct draw_context *draw) { @@ -410,23 +328,6 @@ boolean draw_use_sse(struct draw_context *draw) } -void draw_reset_vertex_ids(struct draw_context *draw) -{ - struct draw_stage *stage = draw->pipeline.first; - - while (stage) { - unsigned i; - - for (i = 0; i < stage->nr_tmps; i++) - stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; - - stage = stage->next; - } - - draw_pt_reset_vertex_ids(draw); -} - - void draw_set_render( struct draw_context *draw, struct vbuf_render *render ) { @@ -467,3 +368,23 @@ draw_set_mapped_element_buffer( struct draw_context *draw, draw->user.elts = elements; draw->user.eltSize = eltSize; } + + + +/* Revamp me please: + */ +void draw_do_flush( struct draw_context *draw, unsigned flags ) +{ + if (!draw->flushing) + { + draw->flushing = TRUE; + + if (flags >= DRAW_FLUSH_STATE_CHANGE) { + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; + draw->reduced_prim = ~0; /* is reduced_prim needed any more? */ + } + + draw->flushing = FALSE; + } +} diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c new file mode 100644 index 00000000000..9d62cb2c650 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_util.h" +#include "draw/draw_private.h" +#include "draw/draw_pipe.h" + + +boolean draw_pipeline_init( struct draw_context *draw ) +{ + /* create pipeline stages */ + draw->pipeline.wide_line = draw_wide_line_stage( draw ); + draw->pipeline.wide_point = draw_wide_point_stage( draw ); + draw->pipeline.stipple = draw_stipple_stage( draw ); + draw->pipeline.unfilled = draw_unfilled_stage( draw ); + draw->pipeline.twoside = draw_twoside_stage( draw ); + draw->pipeline.offset = draw_offset_stage( draw ); + draw->pipeline.clip = draw_clip_stage( draw ); + draw->pipeline.flatshade = draw_flatshade_stage( draw ); + draw->pipeline.cull = draw_cull_stage( draw ); + draw->pipeline.validate = draw_validate_stage( draw ); + draw->pipeline.first = draw->pipeline.validate; + + if (!draw->pipeline.wide_line || + !draw->pipeline.wide_point || + !draw->pipeline.stipple || + !draw->pipeline.unfilled || + !draw->pipeline.twoside || + !draw->pipeline.offset || + !draw->pipeline.clip || + !draw->pipeline.flatshade || + !draw->pipeline.cull || + !draw->pipeline.validate) + return FALSE; + + /* these defaults are oriented toward the needs of softpipe */ + draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */ + draw->pipeline.wide_line_threshold = 1.0; + draw->pipeline.line_stipple = TRUE; + draw->pipeline.point_sprite = TRUE; + + return TRUE; +} + + +void draw_pipeline_destroy( struct draw_context *draw ) +{ + if (draw->pipeline.wide_line) + draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); + if (draw->pipeline.wide_point) + draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); + if (draw->pipeline.stipple) + draw->pipeline.stipple->destroy( draw->pipeline.stipple ); + if (draw->pipeline.unfilled) + draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); + if (draw->pipeline.twoside) + draw->pipeline.twoside->destroy( draw->pipeline.twoside ); + if (draw->pipeline.offset) + draw->pipeline.offset->destroy( draw->pipeline.offset ); + if (draw->pipeline.clip) + draw->pipeline.clip->destroy( draw->pipeline.clip ); + if (draw->pipeline.flatshade) + draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); + if (draw->pipeline.cull) + draw->pipeline.cull->destroy( draw->pipeline.cull ); + if (draw->pipeline.validate) + draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); + if (draw->pipeline.pstipple) + draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); + if (draw->pipeline.rasterize) + draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); +} + + + + +/* This is only used for temporary verts. + */ +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +/** + * Allocate space for temporary post-transform vertices, such as for clipping. + */ +void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) +{ + assert(!stage->tmp); + + stage->nr_tmps = nr; + + if (nr) { + ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); + unsigned i; + + stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); + + for (i = 0; i < nr; i++) + stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + } +} + + +void draw_free_temp_verts( struct draw_stage *stage ) +{ + if (stage->tmp) { + FREE( stage->tmp[0] ); + FREE( stage->tmp ); + stage->tmp = NULL; + } +} + +void draw_reset_vertex_ids(struct draw_context *draw) +{ + struct draw_stage *stage = draw->pipeline.first; + + while (stage) { + unsigned i; + + for (i = 0; i < stage->nr_tmps; i++) + stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; + + stage = stage->next; + } + + draw_pt_reset_vertex_ids(draw); +} + + diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h new file mode 100644 index 00000000000..5b97ca5c8cf --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef DRAW_PIPE_H +#define DRAW_PIPE_H + +#include "pipe/p_compiler.h" +#include "draw_private.h" /* for sizeof(vertex_header) */ + + + +/** + * Base class for all primitive drawing stages. + */ +struct draw_stage +{ + struct draw_context *draw; /**< parent context */ + + struct draw_stage *next; /**< next stage in pipeline */ + + struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ + unsigned nr_tmps; + + void (*point)( struct draw_stage *, + struct prim_header * ); + + void (*line)( struct draw_stage *, + struct prim_header * ); + + void (*tri)( struct draw_stage *, + struct prim_header * ); + + void (*flush)( struct draw_stage *, + unsigned flags ); + + void (*reset_stipple_counter)( struct draw_stage * ); + + void (*destroy)( struct draw_stage * ); +}; + + +extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); +extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); +extern struct draw_stage *draw_offset_stage( struct draw_context *context ); +extern struct draw_stage *draw_clip_stage( struct draw_context *context ); +extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); +extern struct draw_stage *draw_cull_stage( struct draw_context *context ); +extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); +extern struct draw_stage *draw_validate_stage( struct draw_context *context ); + + +extern void draw_free_temp_verts( struct draw_stage *stage ); + +extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); + + + + +/** + * Get a writeable copy of a vertex. + * \param stage drawing stage info + * \param vert the vertex to copy (source) + * \param idx index into stage's tmp[] array to put the copy (dest) + * \return pointer to the copied vertex + */ +static INLINE struct vertex_header * +dup_vert( struct draw_stage *stage, + const struct vertex_header *vert, + unsigned idx ) +{ + struct vertex_header *tmp = stage->tmp[idx]; + const uint vsize = sizeof(struct vertex_header) + + stage->draw->num_vs_outputs * 4 * sizeof(float); + memcpy(tmp, vert, vsize); + tmp->vertex_id = UNDEFINED_VERTEX_ID; + return tmp; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index e8d2a45102c..24bc87d4f8e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -43,6 +43,7 @@ #include "draw_context.h" #include "draw_private.h" +#include "draw_pipe.h" /** diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index e84d380e500..9f878f6c02f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -49,6 +49,7 @@ #include "draw_context.h" #include "draw_vs.h" +#include "draw_pipe.h" /* diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 0ac3a240e5e..6780f275d96 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -35,8 +35,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "draw_context.h" #include "draw_vs.h" +#include "draw_pipe.h" #ifndef IS_NEGATIVE @@ -204,7 +204,14 @@ static void emit_poly( struct draw_stage *stage, } } - +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} /* Clip a triangle against the viewport and user clip planes. diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 8177b0ac86e..c406f89d056 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -35,7 +35,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" -#include "draw_private.h" +#include "draw_pipe.h" struct cull_stage { diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 54baa1fbc98..bdb8b49dc4d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -31,6 +31,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" +#include "draw_pipe.h" /** subclass of draw_stage */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index dbc676deae4..dbdece45bb7 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -33,7 +33,7 @@ */ #include "pipe/p_util.h" -#include "draw_private.h" +#include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 4dddb72906f..4903ba21336 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -44,7 +44,7 @@ #include "tgsi/util/tgsi_dump.h" #include "draw_context.h" -#include "draw_private.h" +#include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 506f33512c8..49429ee9e16 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -39,7 +39,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_pipe.h" /** Subclass of draw_stage */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 01d905c1534..09a9d23d57e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -32,7 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" - +#include "draw_pipe.h" struct twoside_stage { struct draw_stage stage; diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index b07860cd9ea..31e24f6a147 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -36,6 +36,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "draw_private.h" +#include "draw_pipe.h" struct unfilled_stage { diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index e163e078f0f..ffddc2f62c2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -31,6 +31,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "draw_private.h" +#include "draw_pipe.h" static boolean points( unsigned prim ) { @@ -66,11 +67,11 @@ draw_need_pipeline(const struct draw_context *draw, if (lines(prim)) { /* line stipple */ - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) + if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) return TRUE; /* wide lines */ - if (draw->rasterizer->line_width > draw->wide_line_threshold) + if (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold) return TRUE; /* AA lines */ @@ -81,7 +82,7 @@ draw_need_pipeline(const struct draw_context *draw, if (points(prim)) { /* large points */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) + if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) return TRUE; /* AA points */ @@ -89,7 +90,7 @@ draw_need_pipeline(const struct draw_context *draw, return TRUE; /* point sprites */ - if (draw->rasterizer->point_sprite && draw->point_sprite) + if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite) return TRUE; } @@ -145,15 +146,15 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) stage->next = next; /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold + wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold && !draw->rasterizer->line_smooth); /* drawing large points? */ - if (draw->rasterizer->point_sprite && draw->point_sprite) + if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite) wide_points = TRUE; else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) wide_points = FALSE; - else if (draw->rasterizer->point_size > draw->wide_point_threshold) + else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) wide_points = TRUE; else wide_points = FALSE; @@ -186,7 +187,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.wide_point; } - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) { + if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; precalc_flat = 1; /* only needed for lines really */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 30dceeb43de..c835727e323 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -40,6 +40,7 @@ #include "draw_vbuf.h" #include "draw_private.h" #include "draw_vertex.h" +#include "draw_pipe.h" #include "translate/translate.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 9a168ce8bdc..329b5d0fb02 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" +#include "draw_pipe.h" struct wideline_stage { diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 3d0add0c1af..7a439178a0e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" +#include "draw_pipe.h" struct widepoint_stage { @@ -203,8 +204,8 @@ static void widepoint_first_point( struct draw_stage *stage, } /* XXX we won't know the real size if it's computed by the vertex shader! */ - if ((draw->rasterizer->point_size > draw->wide_point_threshold) || - (draw->rasterizer->point_sprite && draw->point_sprite)) { + if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) || + (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)) { stage->point = widepoint_point; } else { diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 37ffdbf9028..b2b2f82b8ff 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -51,12 +51,11 @@ struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; - -struct draw_pt_middle_end; -struct draw_pt_front_end; struct draw_vertex_shader; +struct draw_context; +struct draw_stage; +struct vbuf_render; -#define MAX_SHADER_VERTICES 128 /** * Basic vertex info. @@ -70,17 +69,14 @@ struct vertex_header { float clip[4]; - float data[][4]; /* Note variable size */ + /* This will probably become float (*data)[4] soon: + */ + float data[][4]; }; /* NOTE: It should match vertex_id size above */ #define UNDEFINED_VERTEX_ID 0xffff -/* XXX This is too large */ -#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) -#define MAX_VERTEX_ALLOCATION ((MAX_VERTEX_SIZE + 0x0f) & ~0x0f) - - /** * Basic info for a point/line/triangle primitive. @@ -95,41 +91,6 @@ struct prim_header { -struct draw_context; - -/** - * Base class for all primitive drawing stages. - */ -struct draw_stage -{ - struct draw_context *draw; /**< parent context */ - - struct draw_stage *next; /**< next stage in pipeline */ - - struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ - unsigned nr_tmps; - - void (*point)( struct draw_stage *, - struct prim_header * ); - - void (*line)( struct draw_stage *, - struct prim_header * ); - - void (*tri)( struct draw_stage *, - struct prim_header * ); - - void (*flush)( struct draw_stage *, - unsigned flags ); - - void (*reset_stipple_counter)( struct draw_stage * ); - - void (*destroy)( struct draw_stage * ); -}; - - - -struct vbuf_render; - #define PT_SHADE 0x1 #define PT_CLIPTEST 0x2 @@ -161,6 +122,12 @@ struct draw_context struct draw_stage *wide_line; struct draw_stage *wide_point; struct draw_stage *rasterize; + + float wide_point_threshold; /**< convert pnts to tris if larger than this */ + float wide_line_threshold; /**< convert lines to tris if wider than this */ + boolean line_stipple; /**< do line stipple? */ + boolean point_sprite; /**< convert points to quads for sprites? */ + } pipeline; @@ -225,10 +192,6 @@ struct draw_context float plane[12][4]; unsigned nr_planes; - float wide_point_threshold; /**< convert pnts to tris if larger than this */ - float wide_line_threshold; /**< convert lines to tris if wider than this */ - boolean line_stipple; /**< do line stipple? */ - boolean point_sprite; /**< convert points to quads for sprites? */ boolean use_sse; /* If a prim stage introduces new vertex attributes, they'll be stored here @@ -252,44 +215,29 @@ struct draw_context -extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); -extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); -extern struct draw_stage *draw_offset_stage( struct draw_context *context ); -extern struct draw_stage *draw_clip_stage( struct draw_context *context ); -extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); -extern struct draw_stage *draw_cull_stage( struct draw_context *context ); -extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); -extern struct draw_stage *draw_validate_stage( struct draw_context *context ); - - -extern void draw_free_temp_verts( struct draw_stage *stage ); - -extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); extern void draw_reset_vertex_ids( struct draw_context *draw ); -extern int draw_vertex_cache_check_space( struct draw_context *draw, - unsigned nr_verts ); -extern void draw_vertex_cache_invalidate( struct draw_context *draw ); -extern void draw_vertex_cache_unreference( struct draw_context *draw ); -extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ); +/******************************************************************************* + * Vertex processing (was passthrough) code: + */ +boolean draw_pt_init( struct draw_context *draw ); +void draw_pt_destroy( struct draw_context *draw ); +void draw_pt_reset_vertex_ids( struct draw_context *draw ); -extern void draw_update_vertex_fetch( struct draw_context *draw ); +/******************************************************************************* + * Primitive processing (pipelnie) code: + */ -extern boolean draw_need_pipeline(const struct draw_context *draw, - unsigned prim ); +boolean draw_pipeline_init( struct draw_context *draw ); +void draw_pipeline_destroy( struct draw_context *draw ); +boolean draw_need_pipeline(const struct draw_context *draw, + unsigned prim ); -/* Passthrough mode (second attempt): - */ -boolean draw_pt_init( struct draw_context *draw ); -void draw_pt_destroy( struct draw_context *draw ); -void draw_pt_reset_vertex_ids( struct draw_context *draw ); #define DRAW_FLUSH_STATE_CHANGE 0x8 #define DRAW_FLUSH_BACKEND 0x10 @@ -301,36 +249,6 @@ boolean draw_get_edgeflag( struct draw_context *draw, unsigned idx ); -/** - * Get a writeable copy of a vertex. - * \param stage drawing stage info - * \param vert the vertex to copy (source) - * \param idx index into stage's tmp[] array to put the copy (dest) - * \return pointer to the copied vertex - */ -static INLINE struct vertex_header * -dup_vert( struct draw_stage *stage, - const struct vertex_header *vert, - unsigned idx ) -{ - struct vertex_header *tmp = stage->tmp[idx]; - const uint vsize = sizeof(struct vertex_header) - + stage->draw->num_vs_outputs * 4 * sizeof(float); - memcpy(tmp, vert, vsize); - tmp->vertex_id = UNDEFINED_VERTEX_ID; - return tmp; -} - -static INLINE float -dot4(const float *a, const float *b) -{ - float result = (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); - - return result; -} #endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f153a3ee2cc..965269251f7 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -165,21 +165,3 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw_pt_arrays(draw, prim, start, count); } - -/* Revamp me please: - */ -void draw_do_flush( struct draw_context *draw, unsigned flags ) -{ - if (!draw->flushing) - { - draw->flushing = TRUE; - - if (flags >= DRAW_FLUSH_STATE_CHANGE) { - draw->pipeline.first->flush( draw->pipeline.first, flags ); - draw->pipeline.first = draw->pipeline.validate; - draw->reduced_prim = ~0; - } - - draw->flushing = FALSE; - } -} diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c index 1a9a3adb03d..922344e4488 100644 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_pipeline.c @@ -35,6 +35,7 @@ #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_pipe.h" static void do_point( struct draw_context *draw, const char *v0 ) diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index b3ecec6ece7..581f044dae1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -44,6 +44,17 @@ struct pt_post_vs { +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + + + static INLINE unsigned compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) { diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 168036eee84..a42adaafb1a 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -72,6 +72,4 @@ draw_compute_vertex_size(struct vertex_info *vinfo) assert(0); } } - - assert(vinfo->size * 4 <= MAX_VERTEX_SIZE); } diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 58a5854f0d9..4bef21619c2 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -142,7 +142,7 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, */ i915->draw = draw_create(); assert(i915->draw); - if (GETENV("I915_VBUF")) { + if (!GETENV("I915_NO_VBUF")) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index b6fb0a6d88f..9ffa4601380 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "pipe/p_util.h" #include "i915_context.h" @@ -78,9 +78,6 @@ emit_hw_vertex( struct i915_context *i915, const uint j = vinfo->src_index[i]; const float *attrib = vertex->data[j]; switch (vinfo->emit[i]) { - case EMIT_OMIT: - /* no-op */ - break; case EMIT_1F: OUT_BATCH( fui(attrib[0]) ); count++; diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 0ddb06764af..feb35d492a3 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -39,7 +39,7 @@ #include "sp_setup.h" #include "sp_state.h" #include "sp_prim_setup.h" -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "draw/draw_vertex.h" #include "pipe/p_util.h" diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index 605bfee7434..1b50792bd1c 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -56,7 +56,7 @@ #include "cso_cache/cso_cache.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" /** diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 2ed228778e0..3cb7b68bea5 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -46,7 +46,7 @@ #include "st_cb_rasterpos.h" #include "st_draw.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "shader/prog_instruction.h" #include "vbo/vbo.h" -- cgit v1.2.3 From e7bdf047f28ea9b928b3890c37d7d20db1e67e96 Mon Sep 17 00:00:00 2001 From: Peter Winters <peter@whoei.org> Date: Sun, 20 Apr 2008 16:07:04 +0200 Subject: nv10: fix random stuff --- src/gallium/drivers/nv10/nv10_context.c | 36 ++++++++++++++++++++++++++--- src/gallium/drivers/nv10/nv10_context.h | 5 +++- src/gallium/drivers/nv10/nv10_prim_vbuf.c | 4 ++-- src/gallium/drivers/nv10/nv10_state.c | 20 +++++++++------- src/gallium/drivers/nv10/nv10_state_emit.c | 37 +++++++++++++++++------------- 5 files changed, 72 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 42c496b9590..bbd307d5d98 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -12,6 +12,8 @@ nv10_flush(struct pipe_context *pipe, unsigned flags, { struct nv10_context *nv10 = nv10_context(pipe); + draw_flush(nv10->draw); + FIRE_RING(fence); } @@ -31,6 +33,7 @@ static void nv10_init_hwctx(struct nv10_context *nv10) struct nv10_screen *screen = nv10->screen; struct nouveau_winsys *nvws = screen->nvws; int i; + float projectionmatrix[16]; BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); OUT_RING (screen->sync->handle); @@ -93,13 +96,21 @@ static void nv10_init_hwctx(struct nv10_context *nv10) BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); OUT_RING (0); OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_RC_OUT_ALPHA(0), 6); + + BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); + OUT_RING (0x30141010); + OUT_RING (0); + OUT_RING (0x20040000); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); OUT_RING (0x00000c00); OUT_RING (0); OUT_RING (0x00000c00); OUT_RING (0x18000000); - OUT_RING (0x300c0000); - OUT_RING (0x00001c80); + OUT_RING (0x300e0300); + OUT_RING (0x0c091c80); + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); OUT_RING (0); BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); @@ -219,6 +230,25 @@ static void nv10_init_hwctx(struct nv10_context *nv10) BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); OUT_RING (1); + memset(projectionmatrix, 0, sizeof(projectionmatrix)); + BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); + projectionmatrix[0*4+0] = 1.0; + projectionmatrix[1*4+1] = 1.0; + projectionmatrix[2*4+2] = 1.0; + projectionmatrix[3*4+3] = 1.0; + for (i=0;i<16;i++) { + OUT_RINGf (projectionmatrix[i]); + } + + BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RING (0.0); + OUT_RINGf (16777216.0); + + BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (-2048.0); + OUT_RINGf (-2048.0); + OUT_RINGf (16777215.0 * 0.5); + OUT_RING (0); FIRE_RING (NULL); } diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index b80f36ad34c..1b794c18724 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -65,7 +65,10 @@ struct nv10_context { struct pipe_viewport_state *viewport; struct pipe_scissor_state *scissor; struct pipe_framebuffer_state *framebuffer; - struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + + //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + float *constbuf[PIPE_SHADER_TYPES][32][4]; + struct vertex_info vertex_info; struct { diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 412ee9a23f9..930536b9468 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -205,8 +205,8 @@ nv10_vbuf_render_create( struct nv10_context *nv10 ) nv10_render->nv10 = nv10; - nv10_render->base.max_vertex_buffer_bytes = 1024*1024; - nv10_render->base.max_indices = 64*1024; + nv10_render->base.max_vertex_buffer_bytes = 16*1024; + nv10_render->base.max_indices = 1024; nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info; nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices; nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive; diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 1d9a202dd7c..4dcb9a31ab1 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -459,14 +459,18 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, const struct pipe_constant_buffer *buf ) { struct nv10_context *nv10 = nv10_context(pipe); + struct pipe_winsys *ws = pipe->winsys; - if (shader == PIPE_SHADER_VERTEX) { - nv10->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; - nv10->dirty |= NV10_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nv10->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; - nv10->dirty |= NV10_NEW_FRAGPROG; + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + if (buf) { + void *mapped; + if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + { + memcpy(nv10->constbuf[shader], mapped, buf->size); + ws->buffer_unmap(ws, buf->buffer); + } } } @@ -507,7 +511,7 @@ nv10_set_viewport_state(struct pipe_context *pipe, nv10->viewport = (struct pipe_viewport_state*)vpt; - draw_set_viewport_state(nv10->draw, &nv10->viewport); + draw_set_viewport_state(nv10->draw, nv10->viewport); nv10->dirty |= NV10_NEW_VIEWPORT; } diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index edc194588d1..b7ae57d500a 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -63,29 +63,34 @@ static void nv10_state_emit_dsa(struct nv10_context* nv10) { struct nv10_depth_stencil_alpha_state *d = nv10->dsa; - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 3); - OUT_RINGp ((uint32_t *)&d->depth, 3); + BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (d->depth.func); + + BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (d->depth.write_enable); + + BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (d->depth.test_enable); + +#if 0 BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); OUT_RING (d->stencil.enable); BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 3); - OUT_RINGp ((uint32_t *)&d->alpha.enabled, 3); +#endif + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (d->alpha.enabled); + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (d->alpha.func); + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); + OUT_RING (d->alpha.ref); } static void nv10_state_emit_viewport(struct nv10_context* nv10) { - struct pipe_viewport_state *vpt = nv10->viewport; - -/* OUT_RINGf (vpt->translate[0]); - OUT_RINGf (vpt->translate[1]); - OUT_RINGf (vpt->translate[2]); - OUT_RINGf (vpt->translate[3]);*/ - BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); - OUT_RINGf (vpt->scale[0]); - OUT_RINGf (vpt->scale[1]); - OUT_RINGf (vpt->scale[2]); - OUT_RINGf (vpt->scale[3]); } static void nv10_state_emit_scissor(struct nv10_context* nv10) @@ -100,7 +105,7 @@ static void nv10_state_emit_scissor(struct nv10_context* nv10) static void nv10_state_emit_framebuffer(struct nv10_context* nv10) { struct pipe_framebuffer_state* fb = nv10->framebuffer; - struct pipe_surface *rt, *zeta; + struct pipe_surface *rt, *zeta = NULL; uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; -- cgit v1.2.3 From b20acef90695d6e5975f538b6e9cb812b05f0cf6 Mon Sep 17 00:00:00 2001 From: Peter Winters <peter@whoei.org> Date: Sun, 20 Apr 2008 22:48:50 +0200 Subject: nv10: enable viewport clipping --- src/gallium/drivers/nv10/nv10_state_emit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index b7ae57d500a..41422c88825 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -161,8 +161,8 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) OUT_RING ((h << 16) | 0); OUT_RING (rt_format); BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); + OUT_RING (((w - 1) << 16) | 0 | 0x08000800); + OUT_RING (((h - 1) << 16) | 0 | 0x08000800); } static void nv10_vertex_layout(struct nv10_context *nv10) -- cgit v1.2.3 From 09b668615852eb28cb6289baf84faaf3b6ccc3c2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Tue, 22 Apr 2008 10:57:06 +0100 Subject: softpipe: make NUM_ENTRIES 32 so that div/mods are easier --- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 19f71887e7e..edafd93d8b8 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -39,7 +39,7 @@ #include "sp_surface.h" #include "sp_tile_cache.h" -#define NUM_ENTRIES 30 +#define NUM_ENTRIES 32 /** XXX move these */ -- cgit v1.2.3 From a5b87f249ef79b1a8d8b9dbe72879b7ac9eb133c Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Tue, 22 Apr 2008 11:01:41 +0100 Subject: softpipe: implement SP_NO_RAST env var --- src/gallium/drivers/softpipe/sp_clear.c | 3 +++ src/gallium/drivers/softpipe/sp_context.c | 3 +++ src/gallium/drivers/softpipe/sp_context.h | 2 ++ src/gallium/drivers/softpipe/sp_setup.c | 10 ++++++++++ 4 files changed, 18 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index 39aed151c73..12367068917 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -49,6 +49,9 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, struct softpipe_context *softpipe = softpipe_context(pipe); uint i; + if (softpipe->no_rast) + return; + #if 0 softpipe_update_derived(softpipe); /* not needed?? */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 8c84ddbe196..200fb415acf 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -219,6 +219,9 @@ softpipe_create( struct pipe_screen *screen, assert(softpipe->draw); softpipe->setup = sp_draw_render_stage(softpipe); + if (GETENV( "SP_NO_RAST" ) != NULL) + softpipe->no_rast = TRUE; + if (GETENV( "SP_VBUF" ) != NULL) { sp_init_vbuf(softpipe); } diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 0e1d5e561d9..b3e2b2e4355 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -86,6 +86,8 @@ struct softpipe_context { unsigned num_vertex_elements; unsigned num_vertex_buffers; + boolean no_rast; + /* Counter for occlusion queries. Note this supports overlapping * queries. */ diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 813d7031083..0bf2b2dc3ed 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -726,6 +726,9 @@ void setup_tri( struct setup_context *setup, { float det = calc_det(v0, v1, v2); + if (setup->softpipe->no_rast) + return; + /* debug_printf("%s\n", __FUNCTION__ ); */ @@ -934,6 +937,9 @@ setup_line(struct setup_context *setup, int dy = y1 - y0; int xstep, ystep; + if (setup->softpipe->no_rast) + return; + if (dx == 0 && dy == 0) return; @@ -1052,6 +1058,10 @@ setup_point( struct setup_context *setup, const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; + + if (softpipe->no_rast) + return; + /* For points, all interpolants are constant-valued. * However, for point sprites, we'll need to setup texcoords appropriately. * XXX: which coefficients are the texcoords??? -- cgit v1.2.3 From f9b1d47d652778012fd35552ffc51717ac0b6f79 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Tue, 22 Apr 2008 15:15:21 +0100 Subject: softpipe: do something sensible on an error path, squash warning --- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 34da6356d70..5b63f979977 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -651,7 +651,9 @@ shadow_compare(uint compare_func, k = 0; break; default: + k = 0; assert(0); + break; } rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; -- cgit v1.2.3 From 1f0f029ba6f22ef4ada01fcdc153da91571a7963 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Tue, 22 Apr 2008 20:32:22 +0100 Subject: softpipe: squash warning --- src/gallium/drivers/softpipe/sp_state_fs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 2921066ce36..525cea9eed9 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -82,7 +82,6 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) void softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { - struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state = fs; assert(fs != softpipe->fs); -- cgit v1.2.3 From 4ebede8c7f43a83adfc73dcca783de2e9efcd9ba Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Tue, 22 Apr 2008 21:17:21 +0100 Subject: Revert "softpipe: squash warning" This reverts commit 1f0f029ba6f22ef4ada01fcdc153da91571a7963. Which broke the debug build. --- src/gallium/drivers/softpipe/sp_state_fs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 525cea9eed9..2921066ce36 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -82,6 +82,7 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) void softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { + struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state = fs; assert(fs != softpipe->fs); -- cgit v1.2.3 From 465bc9473a0122d8f66ac1b4a69459e9bd889799 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 22 Apr 2008 16:15:49 -0600 Subject: gallium: move the vertex print/debug code --- src/gallium/drivers/softpipe/sp_setup.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0bf2b2dc3ed..7df8fc5f679 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -299,13 +299,6 @@ static boolean setup_sort_vertices( struct setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { -#if DEBUG_VERTS - debug_printf("Triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif - setup->vprovoke = v2; /* determine bottom to top order of vertices */ @@ -738,7 +731,12 @@ void setup_tri( struct setup_context *setup, setup->numFragsWritten = 0; #endif - +#if DEBUG_VERTS + debug_printf("Triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif if (cull_tri( setup, det )) return; -- cgit v1.2.3 From a3265958994ce4107da2a3954c04b496e29cd8aa Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 23 Apr 2008 12:41:42 +1000 Subject: nouveau: fix build --- src/gallium/drivers/nv30/nv30_draw.c | 2 +- src/gallium/drivers/nv40/nv40_draw.c | 2 +- src/gallium/drivers/nv50/nv50_draw.c | 2 +- src/gallium/drivers/nv50/nv50_state.c | 1 - 4 files changed, 3 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c index 8ec0835225b..aeeaf58f20a 100644 --- a/src/gallium/drivers/nv30/nv30_draw.c +++ b/src/gallium/drivers/nv30/nv30_draw.c @@ -1,4 +1,4 @@ -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "pipe/p_util.h" #include "nv30_context.h" diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index dd5cc8fc998..a9a939af0c0 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -3,7 +3,7 @@ #include "draw/draw_context.h" #include "draw/draw_vertex.h" -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "nv40_context.h" #define NV40_SHADER_NO_FUCKEDNESS diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c index 790408c6df8..d185d999502 100644 --- a/src/gallium/drivers/nv50/nv50_draw.c +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -1,4 +1,4 @@ -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "pipe/p_util.h" #include "nv50_context.h" diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index a614ea03358..f42bae0c28d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -113,7 +113,6 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_rasterizer_stateobj *rso = CALLOC_STRUCT(nv50_rasterizer_stateobj); - unsigned i; /*XXX: ignored * - light_twosize -- cgit v1.2.3 From 76d39f0c19ee0673b65d6ad09ab338c8b750251a Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 23 Apr 2008 11:22:18 -0600 Subject: gallium: more vertex debug code --- src/gallium/drivers/softpipe/sp_setup.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 7df8fc5f679..df7be01fcd5 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -719,6 +719,13 @@ void setup_tri( struct setup_context *setup, { float det = calc_det(v0, v1, v2); +#if DEBUG_VERTS + debug_printf("Setup triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif + if (setup->softpipe->no_rast) return; @@ -731,13 +738,6 @@ void setup_tri( struct setup_context *setup, setup->numFragsWritten = 0; #endif -#if DEBUG_VERTS - debug_printf("Triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif - if (cull_tri( setup, det )) return; @@ -935,6 +935,12 @@ setup_line(struct setup_context *setup, int dy = y1 - y0; int xstep, ystep; +#if DEBUG_VERTS + debug_printf("Setup line:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); +#endif + if (setup->softpipe->no_rast) return; @@ -1056,6 +1062,10 @@ setup_point( struct setup_context *setup, const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; +#if DEBUG_VERTS + debug_printf("Setup point:\n"); + print_vertex(setup, v0); +#endif if (softpipe->no_rast) return; -- cgit v1.2.3 From 2221cb9f74ceee826efb09840188711f408e5428 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 23 Apr 2008 11:23:12 -0600 Subject: gallium: fix broken PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE --- src/gallium/drivers/softpipe/sp_quad_blend.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 802472df45f..ca266ec8e6d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -294,11 +294,12 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: { const float *alpha = quadColor[3]; - float diff[4]; + float diff[4], temp[4]; VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(source[0], alpha, diff); /* R */ - VEC4_MIN(source[1], alpha, diff); /* G */ - VEC4_MIN(source[2], alpha, diff); /* B */ + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(source[0], quadColor[0], temp); /* R */ + VEC4_MUL(source[1], quadColor[1], temp); /* G */ + VEC4_MUL(source[2], quadColor[2], temp); /* B */ } break; case PIPE_BLENDFACTOR_CONST_COLOR: @@ -426,12 +427,8 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(source[3], alpha, diff); /* A */ - } + /* multiply alpha by 1.0 */ + VEC4_COPY(source[3], quadColor[3]); /* A */ break; case PIPE_BLENDFACTOR_CONST_COLOR: /* fall-through */ -- cgit v1.2.3 From 809bc8f9ad3667c297afa2652c6688f98d6451b0 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 23 Apr 2008 11:24:42 -0600 Subject: gallium: move logicop test outside of loop --- src/gallium/drivers/softpipe/sp_quad_blend.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index ca266ec8e6d..74c6bff84ae 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -232,6 +232,11 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_context *softpipe = qs->softpipe; uint cbuf; + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quad); + return; + } + /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; @@ -242,11 +247,6 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) float (*quadColor)[4] = quad->outputs.color[cbuf]; uint i, j; - if (softpipe->blend->logicop_enable) { - logicop_quad(qs, quad); - return; - } - /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); -- cgit v1.2.3 From 7342688286cc3b7c938af2dfeac22df4fa8c8464 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Wed, 23 Apr 2008 22:38:49 +0200 Subject: nv30: add stuff to init swtnl --- src/gallium/drivers/nv30/nv30_context.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index d38713e98ad..57b0e71dad7 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -63,7 +63,12 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) nv30_init_state_functions(nv30); nv30_init_miptree_functions(nv30); + /* Create, configure, and install fallback swtnl path */ nv30->draw = draw_create(); + draw_wide_point_threshold(nv30->draw, 9999999.0); + draw_wide_line_threshold(nv30->draw, 9999999.0); + draw_enable_line_stipple(nv30->draw, FALSE); + draw_enable_point_sprites(nv30->draw, FALSE); draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); return &nv30->pipe; -- cgit v1.2.3 From cefa367b5d909f26d943101efb042562de778c5f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 24 Apr 2008 17:07:45 +0200 Subject: i915: Fix for wrong texture in texobj with VBUF --- src/gallium/drivers/i915simple/i915_state.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 24d31ad740e..3d94b523660 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -549,6 +549,9 @@ static void i915_set_sampler_textures(struct pipe_context *pipe, !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) return; + /* Fixes wrong texture in texobj with VBUF */ + draw_flush(i915->draw); + for (i = 0; i < num; i++) pipe_texture_reference((struct pipe_texture **) &i915->texture[i], texture[i]); -- cgit v1.2.3 From 98165318621cd3e01919b2b9ff140ce7a4e12beb Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 24 Apr 2008 15:59:08 -0600 Subject: gallium: plug in softpipe_set_edgeflags() function --- src/gallium/drivers/softpipe/sp_context.c | 2 ++ src/gallium/drivers/softpipe/sp_draw_arrays.c | 9 +++++++++ src/gallium/drivers/softpipe/sp_state.h | 3 +++ 3 files changed, 14 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 200fb415acf..f2ce0fdc6e9 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -178,6 +178,8 @@ softpipe_create( struct pipe_screen *screen, softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; + softpipe->pipe.set_edgeflags = softpipe_set_edgeflags; + softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 778291dded7..6c58f9909da 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -170,3 +170,12 @@ softpipe_draw_elements(struct pipe_context *pipe, return TRUE; } + + +void +softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ + struct softpipe_context *sp = softpipe_context(pipe); + draw_set_edgeflags(sp->draw, edgeflags); +} + diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 6e6501f5bc4..45056502b8e 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -171,6 +171,9 @@ boolean softpipe_draw_elements(struct pipe_context *pipe, unsigned indexSize, unsigned mode, unsigned start, unsigned count); +void +softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); + void softpipe_map_surfaces(struct softpipe_context *sp); -- cgit v1.2.3 From 2193578851b3b5a99c078b28187cf3158f4218f6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 27 Apr 2008 18:12:14 +1000 Subject: nouveau: stub set_edgeflags for all nv pipe drivers --- src/gallium/drivers/nv10/nv10_context.c | 6 ++++++ src/gallium/drivers/nv30/nv30_context.c | 7 +++++++ src/gallium/drivers/nv40/nv40_context.c | 6 ++++++ src/gallium/drivers/nv50/nv50_context.c | 7 +++++++ 4 files changed, 26 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index bbd307d5d98..a25c082a5d1 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -253,6 +253,11 @@ static void nv10_init_hwctx(struct nv10_context *nv10) FIRE_RING (NULL); } +static void +nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + struct pipe_context * nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -274,6 +279,7 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) nv10->pipe.winsys = ws; nv10->pipe.screen = pscreen; nv10->pipe.destroy = nv10_destroy; + nv10->pipe.set_edgeflags = nv10_set_edgeflags; nv10->pipe.draw_arrays = nv10_draw_arrays; nv10->pipe.draw_elements = nv10_draw_elements; nv10->pipe.clear = nv10_clear; diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 57b0e71dad7..4b0892971ec 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -32,6 +32,12 @@ nv30_destroy(struct pipe_context *pipe) FREE(nv30); } + +static void +nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + struct pipe_context * nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -53,6 +59,7 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) nv30->pipe.winsys = ws; nv30->pipe.screen = pscreen; nv30->pipe.destroy = nv30_destroy; + nv30->pipe.set_edgeflags = nv30_set_edgeflags; nv30->pipe.draw_arrays = nv30_draw_arrays; nv30->pipe.draw_elements = nv30_draw_elements; nv30->pipe.clear = nv30_clear; diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index f9c93f7a2d7..f1d4f3c76f1 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -32,6 +32,11 @@ nv40_destroy(struct pipe_context *pipe) FREE(nv40); } +static void +nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + struct pipe_context * nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -53,6 +58,7 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) nv40->pipe.winsys = ws; nv40->pipe.screen = pscreen; nv40->pipe.destroy = nv40_destroy; + nv40->pipe.set_edgeflags = nv40_set_edgeflags; nv40->pipe.draw_arrays = nv40_draw_arrays; nv40->pipe.draw_elements = nv40_draw_elements; nv40->pipe.clear = nv40_clear; diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index e822d863945..6eb1878b847 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -24,6 +24,12 @@ nv50_destroy(struct pipe_context *pipe) free(nv50); } + +static void +nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + struct pipe_context * nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -42,6 +48,7 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.destroy = nv50_destroy; + nv50->pipe.set_edgeflags = nv50_set_edgeflags; nv50->pipe.draw_arrays = nv50_draw_arrays; nv50->pipe.draw_elements = nv50_draw_elements; nv50->pipe.clear = nv50_clear; -- cgit v1.2.3 From 58b1bcaa094ed07a54bd7e4cdbddbcdcf1f629a7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 28 Apr 2008 15:42:19 +1000 Subject: nv40: do full swtnl fallback when edge flags present. This isn't necessary, with some effort we can do this on the hw. However, until I encounter something "real" that uses them there's not a lot of point. --- src/gallium/drivers/nv40/nv40_context.c | 6 ------ src/gallium/drivers/nv40/nv40_context.h | 1 + src/gallium/drivers/nv40/nv40_state.c | 11 +++++++++++ src/gallium/drivers/nv40/nv40_state_emit.c | 1 + src/gallium/drivers/nv40/nv40_vbo.c | 5 +++++ 5 files changed, 18 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index f1d4f3c76f1..f9c93f7a2d7 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -32,11 +32,6 @@ nv40_destroy(struct pipe_context *pipe) FREE(nv40); } -static void -nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) -{ -} - struct pipe_context * nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -58,7 +53,6 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) nv40->pipe.winsys = ws; nv40->pipe.screen = pscreen; nv40->pipe.destroy = nv40_destroy; - nv40->pipe.set_edgeflags = nv40_set_edgeflags; nv40->pipe.draw_arrays = nv40_draw_arrays; nv40->pipe.draw_elements = nv40_draw_elements; nv40->pipe.clear = nv40_clear; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 2f10540ff0c..24e8cd23379 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -155,6 +155,7 @@ struct nv40_context { unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; unsigned vtxelt_nr; + const unsigned *edgeflags; }; static INLINE struct nv40_context * diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 997beca883a..2d921d2b8a4 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -680,6 +680,16 @@ nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, nv40->draw_dirty |= NV40_NEW_ARRAYS; } +static void +nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->edgeflags = bitfield; + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + void nv40_init_state_functions(struct nv40_context *nv40) { @@ -719,6 +729,7 @@ nv40_init_state_functions(struct nv40_context *nv40) nv40->pipe.set_scissor_state = nv40_set_scissor_state; nv40->pipe.set_viewport_state = nv40_set_viewport_state; + nv40->pipe.set_edgeflags = nv40_set_edgeflags; nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; } diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 864dfc2e0c3..ab88dc416e5 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -167,6 +167,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40) draw_set_viewport_state(draw, &nv40->viewport); if (nv40->draw_dirty & NV40_NEW_ARRAYS) { + draw_set_edgeflags(draw, nv40->edgeflags); draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt); } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 06374184b12..e5f9bd5668a 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -488,6 +488,11 @@ nv40_vbo_validate(struct nv40_context *nv40) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; + if (nv40->edgeflags) { + nv40->fallback_swtnl |= NV40_NEW_ARRAYS; + return FALSE; + } + vtxbuf = so_new(20, 18); so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); vtxfmt = so_new(17, 0); -- cgit v1.2.3 From 58d3dff0d3115ddd5397b7f77b5bcf4f9ca616b6 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@ubuntu-vbox.(none)> Date: Mon, 28 Apr 2008 18:50:27 +0200 Subject: gallium: Generate SSE code to swizzle and unswizzle vs inputs and outputs. Change SSE_SWIZZLES #define to 0 to disable it. --- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 52 ++++++-- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 14 ++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 + src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 142 ++++++++++++++++++++- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 4 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- 7 files changed, 204 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index f0763dad8d7..4ec20493c4c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -109,9 +109,10 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; unsigned opt = fpme->opt; + unsigned alloc_count = align_int( fetch_count, 4 ); struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count); + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); if (!pipeline_verts) { /* Not much we can do here - just skip the rendering. diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index b1e9f671147..07f85bc448f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -47,14 +47,29 @@ #include "tgsi/util/tgsi_parse.h" #define SSE_MAX_VERTICES 4 +#define SSE_SWIZZLES 1 +#if SSE_SWIZZLES +typedef void (XSTDCALL *codegen_function) ( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], + struct tgsi_exec_vector *temporary, + float (*immediates)[4], + const float (*aos_input)[4], + uint num_inputs, + uint input_stride, + float (*aos_output)[4], + uint num_outputs, + uint output_stride ); +#else typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, float (*constant)[4], struct tgsi_exec_vector *temporary, float (*immediates)[4] ); - +#endif struct draw_sse_vertex_shader { struct draw_vertex_shader base; @@ -91,12 +106,31 @@ vs_sse_run_linear( struct draw_vertex_shader *base, { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = shader->machine; - unsigned int i, j; - unsigned slot; + unsigned int i; for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); +#if SSE_SWIZZLES + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + shader->immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); + + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); +#else + unsigned int j, slot; + /* Swizzle inputs. */ for (j = 0; j < max_vertices; j++) { @@ -105,10 +139,10 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; - } + } input = (const float (*)[4])((const char *)input + input_stride); - } + } /* run compiled shader */ @@ -118,7 +152,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Temps, shader->immediates); - /* Unswizzle all output results. */ for (j = 0; j < max_vertices; j++) { @@ -127,10 +160,11 @@ vs_sse_run_linear( struct draw_vertex_shader *base, output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } + } output = (float (*)[4])((char *)output + output_stride); - } + } +#endif } } @@ -176,7 +210,7 @@ draw_create_vs_sse(struct draw_context *draw, x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program, vs->immediates )) + &vs->sse2_program, vs->immediates, SSE_SWIZZLES )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index e6cbe9967fa..d7e22305573 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -853,6 +853,20 @@ void sse_shufps( struct x86_function *p, emit_1ub(p, shuf); } +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x15 ); + emit_modrm( p, dst, src ); +} + +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x14 ); + emit_modrm( p, dst, src ); +} + void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 1962b07bc5b..ad79b1facf8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -203,6 +203,8 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, unsigned char shuf ); +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 9061e00b635..86ca16c246b 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1788,7 +1788,6 @@ emit_instruction( break; case TGSI_OPCODE_RET: - case TGSI_OPCODE_END: #ifdef WIN32 emit_retw( func, 16 ); #else @@ -1796,6 +1795,9 @@ emit_instruction( #endif break; + case TGSI_OPCODE_END: + break; + case TGSI_OPCODE_SSG: return 0; break; @@ -2027,6 +2029,127 @@ emit_declaration( } } +static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_input; + struct x86_reg aos_input; + struct x86_reg num_inputs; + struct x86_reg temp; + unsigned char *inner_loop; + + soa_input = x86_make_reg( file_REG32, reg_AX ); + aos_input = x86_make_reg( file_REG32, reg_BX ); + num_inputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, soa_input, get_argument( soa + 1 ) ); + x86_mov( func, aos_input, get_argument( aos + 1 ) ); + x86_mov( func, num_inputs, get_argument( num + 1 ) ); + + inner_loop = x86_get_label( func ); + + x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_mov_reg_imm( func, temp, 16 ); + x86_add( func, aos_input, temp ); + x86_mov_reg_imm( func, temp, 64 ); + x86_add( func, soa_input, temp ); + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); +} + +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + unsigned char *inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, soa_output, get_argument( soa + 1 ) ); + x86_mov( func, aos_output, get_argument( aos + 1 ) ); + x86_mov( func, num_outputs, get_argument( num + 1 ) ); + + inner_loop = x86_get_label( func ); + + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_mov_reg_imm( func, temp, 16 ); + x86_add( func, aos_output, temp ); + x86_mov_reg_imm( func, temp, 64 ); + x86_add( func, soa_output, temp ); + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); +} /** * Translate a TGSI vertex/fragment shader to SSE2 code. @@ -2048,7 +2171,8 @@ unsigned tgsi_emit_sse2( const struct tgsi_token *tokens, struct x86_function *func, - float (*immediates)[4]) + float (*immediates)[4], + boolean do_swizzles ) { struct tgsi_parse_context parse; boolean instruction_phase = FALSE; @@ -2089,6 +2213,9 @@ tgsi_emit_sse2( else { assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + if (do_swizzles) + aos_to_soa( func, 5, 0, 6, 7 ); + x86_mov( func, get_input_base(), @@ -2176,6 +2303,17 @@ tgsi_emit_sse2( } } + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 8, 1, 9, 10 ); + } + +#ifdef WIN32 + emit_retw( func, 16 ); +#else + emit_ret( func ); +#endif + tgsi_parse_free( &parse ); return ok; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 063287dc5e9..e66d1152836 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -12,8 +12,8 @@ unsigned tgsi_emit_sse2( const struct tgsi_token *tokens, struct x86_function *function, - float (*immediates)[4] - ); + float (*immediates)[4], + boolean do_swizzles ); #if defined __cplusplus } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f857d26143e..4d569e1e22f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -133,7 +133,7 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, - shader->immediates)) { + shader->immediates, FALSE )) { FREE(shader); return NULL; } -- cgit v1.2.3 From 480ab1b7893290505efba925ea95d3b173aa97d4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 29 Apr 2008 08:56:41 +1000 Subject: nv40: enable DXTn formats GL state tracker capable enough for progs/tests/texcompress2 at least. --- src/gallium/drivers/nv40/nv40_screen.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index a408d7262f3..c64c3b1c39d 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -39,7 +39,7 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_GLSL: return 0; case PIPE_CAP_S3TC: - return 0; + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: @@ -118,12 +118,10 @@ nv40_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_U_A8_L8: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z24S8_UNORM: -#if 0 /* state tracker not up to the task just yet. */ case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: -#endif return TRUE; default: break; -- cgit v1.2.3 From 62a29412b90008a247fd3b61f1b882df2e5e81c1 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 25 Apr 2008 16:50:56 -0600 Subject: gallium: test for new PIPE_ARCH_X86 --- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index f2ce0fdc6e9..edf91ecafa3 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -127,7 +127,7 @@ softpipe_create( struct pipe_screen *screen, struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); uint i; -#if defined(__i386__) || defined(__386__) +#ifdef PIPE_ARCH_X86 softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; #else softpipe->use_sse = FALSE; diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 4d569e1e22f..25fdfea4915 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -40,7 +40,7 @@ #include "tgsi/exec/tgsi_sse2.h" -#if defined(__i386__) || defined(__386__) +#ifdef PIPE_ARCH_X86 #include "rtasm/rtasm_x86sse.h" -- cgit v1.2.3 From 653da2d0698d18a8d3dcad1b1590437dee7bb403 Mon Sep 17 00:00:00 2001 From: Zack Rusin <zack@tungstengraphics.com> Date: Wed, 30 Apr 2008 14:05:49 -0400 Subject: plug a memleak, destroy setup context --- src/gallium/drivers/softpipe/sp_prim_setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index feb35d492a3..1cf9ffa632f 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -150,6 +150,8 @@ static void reset_stipple_counter( struct draw_stage *stage ) static void render_destroy( struct draw_stage *stage ) { + struct setup_stage *ssetup = setup_stage(stage); + setup_destroy_context(ssetup->setup); FREE( stage ); } -- cgit v1.2.3 From 18f4d962653cdbb76f08e8498fbbec6e1759f21e Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 30 Apr 2008 16:41:53 -0600 Subject: gallium: replace old PIPE_FORMAT_U_S8 with PIPE_FORMAT_S8_UNORM --- src/gallium/drivers/softpipe/sp_quad_stencil.c | 4 ++-- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index 92a0da00833..b4c7e942fa5 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -243,7 +243,7 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) stencilVals[j] = tile->data.depth32[y][x] & 0xff; } break; - case PIPE_FORMAT_U_S8: + case PIPE_FORMAT_S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->x0 % TILE_SIZE + (j & 1); int y = quad->y0 % TILE_SIZE + (j >> 1); @@ -311,7 +311,7 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) tile->data.depth32[y][x] = z24s8; } break; - case PIPE_FORMAT_U_S8: + case PIPE_FORMAT_S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->x0 % TILE_SIZE + (j & 1); int y = quad->y0 % TILE_SIZE + (j >> 1); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index edafd93d8b8..a88aad5d097 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -168,7 +168,7 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM || ps->format == PIPE_FORMAT_Z32_UNORM || - ps->format == PIPE_FORMAT_U_S8); + ps->format == PIPE_FORMAT_S8_UNORM); } } -- cgit v1.2.3 From 54f94a790e4488445347abcff9a636a9c440d7f9 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 30 Apr 2008 16:50:17 -0600 Subject: gallium: use the newer PIPE_FORMAT_x_UNORM format names --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 2 +- src/gallium/auxiliary/util/p_tile.c | 24 ++++++++-------- src/gallium/auxiliary/util/u_gen_mipmap.c | 8 +++--- src/gallium/drivers/i915simple/i915_screen.c | 8 +++--- .../drivers/i915simple/i915_state_sampler.c | 8 +++--- src/gallium/drivers/i965simple/brw_screen.c | 24 ++++++++-------- .../drivers/i965simple/brw_wm_surface_state.c | 8 +++--- src/mesa/state_tracker/st_cb_bitmap.c | 4 +-- src/mesa/state_tracker/st_format.c | 32 +++++++++++----------- 10 files changed, 60 insertions(+), 60 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index f05641dee66..f501b2aed41 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -392,7 +392,7 @@ aaline_create_texture(struct aaline_stage *aaline) memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index d1d63d73bea..c4de9d2698e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -417,7 +417,7 @@ pstip_create_texture(struct pstip_stage *pstip) memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = 0; texTemp.width[0] = 32; texTemp.height[0] = 32; diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 13175ca46e6..63e1cc6013a 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -385,7 +385,7 @@ z16_get_tile_rgba(ushort *src, -/*** PIPE_FORMAT_U_L8 ***/ +/*** PIPE_FORMAT_L8_UNORM ***/ static void l8_get_tile_rgba(ubyte *src, @@ -408,7 +408,7 @@ l8_get_tile_rgba(ubyte *src, } -/*** PIPE_FORMAT_U_A8 ***/ +/*** PIPE_FORMAT_A8_UNORM ***/ static void a8_get_tile_rgba(ubyte *src, @@ -476,7 +476,7 @@ r16g16b16a16_put_tile_rgba(short *dst, -/*** PIPE_FORMAT_U_I8 ***/ +/*** PIPE_FORMAT_I8_UNORM ***/ static void i8_get_tile_rgba(ubyte *src, @@ -499,7 +499,7 @@ i8_get_tile_rgba(ubyte *src, } -/*** PIPE_FORMAT_U_A8_L8 ***/ +/*** PIPE_FORMAT_A8L8_UNORM ***/ static void a8_l8_get_tile_rgba(ushort *src, @@ -708,16 +708,16 @@ pipe_get_tile_rgba(struct pipe_context *pipe, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: l8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: a8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: i8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: a8_l8_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); break; case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -787,16 +787,16 @@ pipe_put_tile_rgba(struct pipe_context *pipe, break; case PIPE_FORMAT_R8G8B8A8_UNORM: break; - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: /*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: /*a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: /*i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_R16G16B16A16_SNORM: diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b8dc6c66c01..0348629ab82 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -493,13 +493,13 @@ format_to_type_comps(enum pipe_format pformat, *datatype = USHORT_5_6_5; *comps = 3; return; - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: *datatype = UBYTE; *comps = 1; return; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: *datatype = UBYTE; *comps = 2; return; diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 9ae594ce54c..631642e1b6b 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -154,10 +154,10 @@ i915_is_format_supported( struct pipe_screen *screen, PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_U_L8, - PIPE_FORMAT_U_A8, - PIPE_FORMAT_U_I8, - PIPE_FORMAT_U_A8_L8, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_YCBCR, PIPE_FORMAT_YCBCR_REV, PIPE_FORMAT_S8Z24_UNORM, diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 84f6529a3ad..982eec4a1b0 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -131,13 +131,13 @@ static uint translate_texture_format(enum pipe_format pipeFormat) { switch (pipeFormat) { - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: return MAPSURF_8BIT | MT_8BIT_L8; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: return MAPSURF_8BIT | MT_8BIT_I8; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: return MAPSURF_8BIT | MT_8BIT_A8; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: return MAPSURF_16BIT | MT_16BIT_AY88; case PIPE_FORMAT_R5G6B5_UNORM: return MAPSURF_16BIT | MT_16BIT_RGB565; diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index 6845c7abde2..b700f7e4f5d 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -141,13 +141,13 @@ brw_is_format_supported( struct pipe_screen *screen, #if 0 /* XXX: This is broken -- rewrite if still needed. */ static const unsigned tex_supported[] = { - PIPE_FORMAT_U_R8_G8_B8_A8, - PIPE_FORMAT_U_A8_R8_G8_B8, - PIPE_FORMAT_U_R5_G6_B5, - PIPE_FORMAT_U_L8, - PIPE_FORMAT_U_A8, - PIPE_FORMAT_U_I8, - PIPE_FORMAT_U_L8_A8, + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_L8A8_UNORM, PIPE_FORMAT_YCBCR, PIPE_FORMAT_YCBCR_REV, PIPE_FORMAT_S8_Z24, @@ -157,16 +157,16 @@ brw_is_format_supported( struct pipe_screen *screen, /* Actually a lot more than this - add later: */ static const unsigned render_supported[] = { - PIPE_FORMAT_U_A8_R8_G8_B8, - PIPE_FORMAT_U_R5_G6_B5, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, }; /* */ static const unsigned z_stencil_supported[] = { - PIPE_FORMAT_U_Z16, - PIPE_FORMAT_U_Z32, - PIPE_FORMAT_S8_Z24, + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, + PIPE_FORMAT_S8Z24_UNORM, }; switch (type) { diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index 853c743ccf4..69e56dc8bd3 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -57,16 +57,16 @@ static unsigned translate_tex_target( enum pipe_texture_target target ) static unsigned translate_tex_format( enum pipe_format pipe_format ) { switch( pipe_format ) { - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: return BRW_SURFACEFORMAT_L8_UNORM; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: return BRW_SURFACEFORMAT_I8_UNORM; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: return BRW_SURFACEFORMAT_A8_UNORM; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: return BRW_SURFACEFORMAT_L8A8_UNORM; case PIPE_FORMAT_R8G8B8_UNORM: diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index f0a3b753570..ce8fefe703f 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -722,8 +722,8 @@ st_init_bitmap(struct st_context *st) st->bitmap.rasterizer.bypass_vs = 1; /* find a usable texture format */ - if (screen->is_format_supported(screen, PIPE_FORMAT_U_I8, PIPE_TEXTURE)) { - st->bitmap.tex_format = PIPE_FORMAT_U_I8; + if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, PIPE_TEXTURE)) { + st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM; } else { /* XXX support more formats */ diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 8b5f84cd56d..9a385a04579 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -261,13 +261,13 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat) case MESA_FORMAT_RGB565: return PIPE_FORMAT_R5G6B5_UNORM; case MESA_FORMAT_AL88: - return PIPE_FORMAT_U_A8_L8; + return PIPE_FORMAT_A8L8_UNORM; case MESA_FORMAT_A8: - return PIPE_FORMAT_U_A8; + return PIPE_FORMAT_A8_UNORM; case MESA_FORMAT_L8: - return PIPE_FORMAT_U_L8; + return PIPE_FORMAT_L8_UNORM; case MESA_FORMAT_I8: - return PIPE_FORMAT_U_I8; + return PIPE_FORMAT_I8_UNORM; case MESA_FORMAT_Z16: return PIPE_FORMAT_Z16_UNORM; case MESA_FORMAT_Z32: @@ -409,8 +409,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_ALPHA12: case GL_ALPHA16: case GL_COMPRESSED_ALPHA: - if (screen->is_format_supported( screen, PIPE_FORMAT_U_A8, surfType )) - return PIPE_FORMAT_U_A8; + if (screen->is_format_supported( screen, PIPE_FORMAT_A8_UNORM, surfType )) + return PIPE_FORMAT_A8_UNORM; return default_rgba_format( screen, surfType ); case 1: @@ -420,8 +420,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_LUMINANCE12: case GL_LUMINANCE16: case GL_COMPRESSED_LUMINANCE: - if (screen->is_format_supported( screen, PIPE_FORMAT_U_L8, surfType )) - return PIPE_FORMAT_U_L8; + if (screen->is_format_supported( screen, PIPE_FORMAT_L8_UNORM, surfType )) + return PIPE_FORMAT_L8_UNORM; return default_rgba_format( screen, surfType ); case 2: @@ -433,8 +433,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_LUMINANCE12_ALPHA12: case GL_LUMINANCE16_ALPHA16: case GL_COMPRESSED_LUMINANCE_ALPHA: - if (screen->is_format_supported( screen, PIPE_FORMAT_U_A8_L8, surfType )) - return PIPE_FORMAT_U_A8_L8; + if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_UNORM, surfType )) + return PIPE_FORMAT_A8L8_UNORM; return default_rgba_format( screen, surfType ); case GL_INTENSITY: @@ -443,8 +443,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_INTENSITY12: case GL_INTENSITY16: case GL_COMPRESSED_INTENSITY: - if (screen->is_format_supported( screen, PIPE_FORMAT_U_I8, surfType )) - return PIPE_FORMAT_U_I8; + if (screen->is_format_supported( screen, PIPE_FORMAT_I8_UNORM, surfType )) + return PIPE_FORMAT_I8_UNORM; return default_rgba_format( screen, surfType ); case GL_YCBCR_MESA: @@ -547,13 +547,13 @@ translate_gallium_format_to_mesa_format(enum pipe_format format) return &_mesa_texformat_argb4444; case PIPE_FORMAT_R5G6B5_UNORM: return &_mesa_texformat_rgb565; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: return &_mesa_texformat_al88; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: return &_mesa_texformat_a8; - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: return &_mesa_texformat_l8; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: return &_mesa_texformat_i8; case PIPE_FORMAT_Z16_UNORM: return &_mesa_texformat_z16; -- cgit v1.2.3 From c9ed86a96483063f3d6789ed16645a3dca77d726 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 1 May 2008 11:07:21 +0100 Subject: gallium: tex surface checkpoint --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 12 ++-- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 15 +++-- src/gallium/auxiliary/util/p_tile.c | 50 ++++++++++---- src/gallium/auxiliary/util/u_blit.c | 3 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 21 ++++-- src/gallium/drivers/failover/fo_context.c | 2 +- src/gallium/drivers/i915simple/i915_screen.c | 31 +++++++++ src/gallium/drivers/i915simple/i915_surface.c | 23 +++++-- src/gallium/drivers/i915simple/i915_texture.c | 13 ++-- src/gallium/drivers/i965simple/brw_surface.c | 51 ++++++--------- src/gallium/drivers/i965simple/brw_tex_layout.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 6 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 6 +- src/gallium/drivers/softpipe/sp_flush.c | 35 +++++----- src/gallium/drivers/softpipe/sp_screen.c | 22 ++++--- src/gallium/drivers/softpipe/sp_surface.c | 23 +++++-- src/gallium/drivers/softpipe/sp_texture.c | 87 ++++++++++++++++++------- src/gallium/drivers/softpipe/sp_texture.h | 1 - src/gallium/drivers/softpipe/sp_tile_cache.c | 36 ++++++---- src/gallium/drivers/softpipe/sp_tile_cache.h | 2 +- src/gallium/include/pipe/p_context.h | 6 -- src/gallium/include/pipe/p_inlines.h | 31 ++++----- src/gallium/include/pipe/p_screen.h | 17 ++++- src/gallium/include/pipe/p_state.h | 4 ++ src/gallium/include/pipe/p_util.h | 3 + src/gallium/include/pipe/p_winsys.h | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 2 + src/mesa/state_tracker/st_atom_pixeltransfer.c | 9 +-- src/mesa/state_tracker/st_cb_accum.c | 12 ++-- src/mesa/state_tracker/st_cb_bitmap.c | 23 +++++-- src/mesa/state_tracker/st_cb_drawpixels.c | 56 +++++++++------- src/mesa/state_tracker/st_cb_fbo.c | 27 +++++--- src/mesa/state_tracker/st_cb_readpixels.c | 5 +- src/mesa/state_tracker/st_cb_texture.c | 48 +++++++------- src/mesa/state_tracker/st_gen_mipmap.c | 6 +- src/mesa/state_tracker/st_texture.c | 34 ++++++---- src/mesa/state_tracker/st_texture.h | 6 +- 37 files changed, 465 insertions(+), 267 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index f501b2aed41..6dc20f2c90a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -415,8 +415,11 @@ aaline_create_texture(struct aaline_stage *aaline) assert(aaline->texture->width[level] == aaline->texture->height[level]); - surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); - data = pipe_surface_map(surface); + /* This texture is new, no need to flush. + */ + surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + data = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); if (data == NULL) return FALSE; @@ -440,9 +443,8 @@ aaline_create_texture(struct aaline_stage *aaline) } /* unmap */ - pipe_surface_unmap(surface); - pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); + screen->surface_unmap(screen, surface); + screen->tex_surface_release(screen, &surface); } return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index c4de9d2698e..3aa326acc7f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -376,8 +376,14 @@ pstip_update_texture(struct pstip_stage *pstip) uint i, j; ubyte *data; - surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0); - data = pipe_surface_map(surface); + /* XXX: want to avoid flushing just because we use stipple: + */ + pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); + + surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + data = screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); /* * Load alpha texture. @@ -399,9 +405,8 @@ pstip_update_texture(struct pstip_stage *pstip) } /* unmap */ - pipe_surface_unmap(surface); - pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pstip->texture, 0, 0x1); + screen->surface_unmap(screen, surface); + screen->tex_surface_release(screen, &surface); } diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 63e1cc6013a..5728757d2fb 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -50,6 +50,7 @@ pipe_get_tile_raw(struct pipe_context *pipe, uint x, uint y, uint w, uint h, void *p, int dst_stride) { + struct pipe_screen *screen = pipe->screen; const uint cpp = ps->cpp; const ubyte *pSrc; const uint src_stride = ps->pitch * cpp; @@ -63,7 +64,11 @@ pipe_get_tile_raw(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - pSrc = (const ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp; + pSrc = (const ubyte *) screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_CPU_READ); + assert(pSrc); /* XXX: proper error handling! */ + + pSrc += (y * ps->pitch + x) * cpp; pDest = (ubyte *) p; for (i = 0; i < h; i++) { @@ -72,7 +77,7 @@ pipe_get_tile_raw(struct pipe_context *pipe, pSrc += src_stride; } - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } @@ -86,6 +91,7 @@ pipe_put_tile_raw(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const void *p, int src_stride) { + struct pipe_screen *screen = pipe->screen; const uint cpp = ps->cpp; const ubyte *pSrc; const uint dst_stride = ps->pitch * cpp; @@ -100,7 +106,11 @@ pipe_put_tile_raw(struct pipe_context *pipe, return; pSrc = (const ubyte *) p; - pDest = (ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp; + + pDest = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(pDest); /* XXX: proper error handling */ + + pDest += (y * ps->pitch + x) * cpp; for (i = 0; i < h; i++) { memcpy(pDest, pSrc, w * cpp); @@ -108,7 +118,7 @@ pipe_put_tile_raw(struct pipe_context *pipe, pSrc += src_stride; } - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } @@ -834,18 +844,26 @@ pipe_get_tile_z(struct pipe_context *pipe, uint x, uint y, uint w, uint h, uint *z) { + struct pipe_screen *screen = pipe->screen; const uint dstStride = w; + void *map; uint *pDest = z; uint i, j; if (pipe_clip_tile(x, y, &w, &h, ps)) return; + map = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + if (!map) { + assert(0); + return; + } + switch (ps->format) { case PIPE_FORMAT_Z32_UNORM: { const uint *pSrc - = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + = (const uint *)map + (y * ps->pitch + x); for (i = 0; i < h; i++) { memcpy(pDest, pSrc, 4 * w); pDest += dstStride; @@ -857,7 +875,7 @@ pipe_get_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_X8Z24_UNORM: { const uint *pSrc - = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + = (const uint *)map + (y * ps->pitch + x); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 24-bit Z to 32-bit Z */ @@ -871,7 +889,7 @@ pipe_get_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_Z16_UNORM: { const ushort *pSrc - = (const ushort *) pipe_surface_map(ps) + (y * ps->pitch + x); + = (const ushort *)map + (y * ps->pitch + x); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 16-bit Z to 32-bit Z */ @@ -886,7 +904,7 @@ pipe_get_tile_z(struct pipe_context *pipe, assert(0); } - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } @@ -896,17 +914,25 @@ pipe_put_tile_z(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const uint *zSrc) { + struct pipe_screen *screen = pipe->screen; const uint srcStride = w; const uint *pSrc = zSrc; + void *map; uint i, j; if (pipe_clip_tile(x, y, &w, &h, ps)) return; + map = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + if (!map) { + assert(0); + return; + } + switch (ps->format) { case PIPE_FORMAT_Z32_UNORM: { - uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + uint *pDest = (uint *) map + (y * ps->pitch + x); for (i = 0; i < h; i++) { memcpy(pDest, pSrc, 4 * w); pDest += ps->pitch; @@ -917,7 +943,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: { - uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + uint *pDest = (uint *) map + (y * ps->pitch + x); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z (0 stencil) */ @@ -930,7 +956,7 @@ pipe_put_tile_z(struct pipe_context *pipe, break; case PIPE_FORMAT_Z16_UNORM: { - ushort *pDest = (ushort *) pipe_surface_map(ps) + (y * ps->pitch + x); + ushort *pDest = (ushort *) map + (y * ps->pitch + x); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 16-bit Z */ @@ -945,7 +971,7 @@ pipe_put_tile_z(struct pipe_context *pipe, assert(0); } - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 9e9912c6e46..257473ab26f 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -287,7 +287,8 @@ util_blit_pixels(struct blit_state *ctx, if (!tex) return; - texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0); + texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); /* load temp texture */ pipe->surface_copy(pipe, FALSE, diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 0348629ab82..6ed5503c9ae 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -586,8 +586,11 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_surface *srcSurf, *dstSurf; void *srcMap, *dstMap; - srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); - dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) @@ -626,8 +629,10 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, struct pipe_surface *srcSurf, *dstSurf; ubyte *srcMap, *dstMap; - srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); - dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) @@ -888,10 +893,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; + struct pipe_surface *surf = + screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_GPU_WRITE); + /* * Setup framebuffer / dest surface */ - fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + fb.cbufs[0] = surf; fb.width = pt->width[dstLevel]; fb.height = pt->height[dstLevel]; cso_set_framebuffer(ctx->cso, &fb); @@ -922,7 +931,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* need to signal that the texture has changed _after_ rendering to it */ - pipe->texture_update(pipe, pt, face, (1 << dstLevel)); + pipe_surface_reference( &surf, NULL ); } /* restore state we changed */ diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index cb95ba516f9..014a3e31d50 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -147,8 +147,8 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.texture_create = hw->texture_create; failover->pipe.texture_release = hw->texture_release; failover->pipe.get_tex_surface = hw->get_tex_surface; -#endif failover->pipe.texture_update = hw->texture_update; +#endif failover->pipe.flush = hw->flush; diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 631642e1b6b..dcd349e478b 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -200,6 +200,35 @@ i915_destroy_screen( struct pipe_screen *screen ) } +static void * +i915_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + char *map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + if (map == NULL) + return NULL; + + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + { + /* Do something to notify contexts of a texture change. + */ + /* i915_screen(screen)->timestamp++; */ + } + + return map + surface->offset; +} + +static void +i915_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); +} + + + /** * Create a new i915_screen object */ @@ -243,6 +272,8 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id) i915screen->screen.get_param = i915_get_param; i915screen->screen.get_paramf = i915_get_paramf; i915screen->screen.is_format_supported = i915_is_format_supported; + i915screen->screen.surface_map = i915_surface_map; + i915screen->screen.surface_unmap = i915_surface_unmap; i915_init_screen_texture_functions(&i915screen->screen); diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index f4fbedbe9bc..98367ac0739 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -51,17 +51,25 @@ i915_surface_copy(struct pipe_context *pipe, assert( dst->cpp == src->cpp ); if (0) { - pipe_copy_rect(pipe_surface_map(dst), + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_CPU_READ ); + + pipe_copy_rect(dst_map, dst->cpp, dst->pitch, dstx, dsty, width, height, - pipe_surface_map(src), + src_map, do_flip ? -(int) src->pitch : src->pitch, srcx, do_flip ? 1 - srcy - height : srcy); - pipe_surface_unmap(src); - pipe_surface_unmap(dst); + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); } else { i915_copy_blit( i915_context(pipe), @@ -92,7 +100,10 @@ i915_surface_fill(struct pipe_context *pipe, { if (0) { unsigned i, j; - void *dst_map = pipe_surface_map(dst); + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + switch (dst->cpp) { case 1: { @@ -126,7 +137,7 @@ i915_surface_fill(struct pipe_context *pipe, break; } - pipe_surface_unmap( dst ); + pipe->screen->surface_unmap(pipe->screen, dst); } else { i915_fill_blit( i915_context(pipe), diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index c39e747705b..7b9359a0fea 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -541,13 +541,6 @@ i915_texture_release_screen(struct pipe_screen *screen, } -static void -i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, - uint face, uint levelsMask) -{ - /* no-op? */ -} - /* * XXX note: same as code in sp_surface.c @@ -555,7 +548,8 @@ i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, static struct pipe_surface * i915_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) + unsigned face, unsigned level, unsigned zslice, + unsigned flags) { struct i915_texture *tex = (struct i915_texture *)pt; struct pipe_winsys *ws = screen->winsys; @@ -586,6 +580,7 @@ i915_get_tex_surface_screen(struct pipe_screen *screen, ps->height = pt->height[level]; ps->pitch = tex->pitch; ps->offset = offset; + ps->usage = flags; } return ps; } @@ -594,7 +589,7 @@ i915_get_tex_surface_screen(struct pipe_screen *screen, void i915_init_texture_functions(struct i915_context *i915) { - i915->pipe.texture_update = i915_texture_update; +// i915->pipe.texture_update = i915_texture_update; } diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index c99a91dcf76..3e3736b2806 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -35,27 +35,6 @@ #include "util/p_tile.h" -/* Upload data to a rectangular sub-region. Lots of choices how to do this: - * - * - memcpy by span to current destination - * - upload data as new buffer and blit - * - * Currently always memcpy. - */ -static void -brw_surface_data(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - const void *src, unsigned src_pitch, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - pipe_copy_rect(pipe_surface_map(dst) + dst->offset, - dst->cpp, dst->pitch, - dstx, dsty, width, height, src, src_pitch, srcx, srcy); - - pipe_surface_unmap(dst); -} - /* Assumes all values are within bounds -- no checking at this level - * do it higher up if required. @@ -72,17 +51,25 @@ brw_surface_copy(struct pipe_context *pipe, assert(dst->cpp == src->cpp); if (0) { - pipe_copy_rect(pipe_surface_map(dst) + dst->offset, + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_CPU_READ ); + + pipe_copy_rect(dst_map, dst->cpp, dst->pitch, - dstx, dsty, - width, height, - pipe_surface_map(src) + src->offset, - do_flip ? -src->pitch : src->pitch, + dstx, dsty, + width, height, + src_map, + do_flip ? -(int) src->pitch : src->pitch, srcx, do_flip ? 1 - srcy - height : srcy); - pipe_surface_unmap(src); - pipe_surface_unmap(dst); + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); } else { brw_copy_blit(brw_context(pipe), @@ -113,7 +100,10 @@ brw_surface_fill(struct pipe_context *pipe, { if (0) { unsigned i, j; - void *dst_map = pipe_surface_map(dst); + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + switch (dst->cpp) { case 1: { @@ -147,7 +137,7 @@ brw_surface_fill(struct pipe_context *pipe, break; } - pipe_surface_unmap( dst ); + pipe->screen->surface_unmap(pipe->screen, dst); } else { brw_fill_blit(brw_context(pipe), @@ -164,7 +154,6 @@ brw_surface_fill(struct pipe_context *pipe, void brw_init_surface_functions(struct brw_context *brw) { - (void) brw_surface_data; /* silence warning */ brw->pipe.surface_copy = brw_surface_copy; brw->pipe.surface_fill = brw_surface_fill; } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index b580f98204c..ba4c4a7bcf5 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -407,7 +407,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, void brw_init_texture_functions(struct brw_context *brw) { - brw->pipe.texture_update = brw_texture_update; +// brw->pipe.texture_update = brw_texture_update; } diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index edf91ecafa3..ee74826763c 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -192,11 +192,11 @@ softpipe_create( struct pipe_screen *screen, * Must be before quad stage setup! */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - softpipe->cbuf_cache[i] = sp_create_tile_cache(); - softpipe->zsbuf_cache = sp_create_tile_cache(); + softpipe->cbuf_cache[i] = sp_create_tile_cache( screen ); + softpipe->zsbuf_cache = sp_create_tile_cache( screen ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tile_cache(); + softpipe->tex_cache[i] = sp_create_tile_cache( screen ); /* setup quad rendering stages */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 6c58f9909da..355c120d187 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -50,7 +50,7 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) for (i = 0; i < 2; i++) { if (sp->constants[i].size) sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + PIPE_BUFFER_USAGE_GPU_READ); } draw_set_mapped_constant_buffer(sp->draw, @@ -133,14 +133,14 @@ softpipe_draw_elements(struct pipe_context *pipe, void *buf = pipe->winsys->buffer_map(pipe->winsys, sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + PIPE_BUFFER_USAGE_GPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + PIPE_BUFFER_USAGE_GPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } else { diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 0625b69099b..e03994b63b7 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -50,25 +50,28 @@ softpipe_flush( struct pipe_context *pipe, draw_flush(softpipe->draw); - /* - flush the quad pipeline - * - flush the texture cache - * - flush the render cache - */ + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + for (i = 0; i < softpipe->num_textures; i++) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]); + } + } - for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) - if (softpipe->cbuf_cache[i]) - sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); + if (flags & PIPE_FLUSH_RENDER_CACHE) { + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); - if (softpipe->zsbuf_cache) - sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); - /* Need this call for hardware buffers before swapbuffers. - * - * there should probably be another/different flush-type function - * that's called before swapbuffers because we don't always want - * to unmap surfaces when flushing. - */ - softpipe_unmap_surfaces(softpipe); + /* Need this call for hardware buffers before swapbuffers. + * + * there should probably be another/different flush-type function + * that's called before swapbuffers because we don't always want + * to unmap surfaces when flushing. + */ + softpipe_unmap_surfaces(softpipe); + } if (fence) *fence = NULL; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 7dacb1c4613..e9926bf41f9 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -33,6 +33,7 @@ #include "sp_texture.h" #include "sp_winsys.h" +#include "sp_screen.h" static const char * @@ -137,6 +138,7 @@ softpipe_destroy_screen( struct pipe_screen *screen ) } + /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no softpipe_screen). @@ -144,22 +146,22 @@ softpipe_destroy_screen( struct pipe_screen *screen ) struct pipe_screen * softpipe_create_screen(struct pipe_winsys *winsys) { - struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + struct softpipe_screen *screen = CALLOC_STRUCT(softpipe_screen); if (!screen) return NULL; - screen->winsys = winsys; + screen->base.winsys = winsys; - screen->destroy = softpipe_destroy_screen; + screen->base.destroy = softpipe_destroy_screen; - screen->get_name = softpipe_get_name; - screen->get_vendor = softpipe_get_vendor; - screen->get_param = softpipe_get_param; - screen->get_paramf = softpipe_get_paramf; - screen->is_format_supported = softpipe_is_format_supported; + screen->base.get_name = softpipe_get_name; + screen->base.get_vendor = softpipe_get_vendor; + screen->base.get_param = softpipe_get_param; + screen->base.get_paramf = softpipe_get_paramf; + screen->base.is_format_supported = softpipe_is_format_supported; - softpipe_init_screen_texture_funcs(screen); + softpipe_init_screen_texture_funcs(&screen->base); - return screen; + return &screen->base; } diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 653449c4f18..b5cc0535481 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -47,18 +47,27 @@ sp_surface_copy(struct pipe_context *pipe, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { assert( dst->cpp == src->cpp ); + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_GPU_WRITE ); - pipe_copy_rect(pipe_surface_map(dst), + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_GPU_READ ); + + assert(src_map && dst_map); + + pipe_copy_rect(dst_map, dst->cpp, dst->pitch, dstx, dsty, width, height, - pipe_surface_map(src), + src_map, do_flip ? -(int) src->pitch : src->pitch, srcx, do_flip ? 1 - srcy - height : srcy); - pipe_surface_unmap(src); - pipe_surface_unmap(dst); + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); } @@ -83,7 +92,9 @@ sp_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { unsigned i, j; - void *dst_map = pipe_surface_map(dst); + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_GPU_WRITE ); assert(dst->pitch > 0); assert(width <= dst->pitch); @@ -147,7 +158,7 @@ sp_surface_fill(struct pipe_context *pipe, break; } - pipe_surface_unmap( dst ); + pipe->screen->surface_unmap(pipe->screen, dst); } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 256586ec886..ee3fa994f94 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -40,6 +40,7 @@ #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" +#include "sp_screen.h" /* Simple, maximally packed layout. @@ -116,19 +117,10 @@ softpipe_texture_release(struct pipe_screen *screen, if (!*pt) return; - /* - DBG("%s %p refcount will be %d\n", - __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); - */ if (--(*pt)->refcount <= 0) { struct softpipe_texture *spt = softpipe_texture(*pt); - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); - */ - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); - FREE(spt); } *pt = NULL; @@ -138,7 +130,8 @@ softpipe_texture_release(struct pipe_screen *screen, static struct pipe_surface * softpipe_get_tex_surface(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) + unsigned face, unsigned level, unsigned zslice, + unsigned usage) { struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = softpipe_texture(pt); @@ -157,6 +150,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->height = pt->height[level]; ps->pitch = ps->width; ps->offset = spt->level_offset[level]; + ps->usage = usage; if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * @@ -167,30 +161,74 @@ softpipe_get_tex_surface(struct pipe_screen *screen, assert(face == 0); assert(zslice == 0); } + + if (usage & (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_WRITE)) { + /* XXX if writing to the texture, invalidate the texcache entries!!! */ + assert(0); + } } return ps; } -static void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture, - uint face, uint levelsMask) +static void +softpipe_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) { - struct softpipe_context *softpipe = softpipe_context(pipe); - uint unit; - for (unit = 0; unit < softpipe->num_textures; unit++) { - if (softpipe->texture[unit] == texture) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); - } + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For softpipe, nothing to do. + */ + assert ((*s)->texture); + + screen->winsys->surface_release(screen->winsys, s); +} + + +static void * +softpipe_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + ubyte *map; + + if (flags & ~surface->usage) { + assert(0); + return NULL; + } + + map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + if (map == NULL) + return NULL; + + /* May want to different things here depending on read/write nature + * of the map: + */ + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_GPU_WRITE)) + { + /* Do something to notify sharing contexts of a texture change. + * In softpipe, that would mean flushing the texture cache. + */ + softpipe_screen(screen)->timestamp++; } + + return map + surface->offset; +} + + +static void +softpipe_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); } void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ) +softpipe_init_texture_funcs(struct softpipe_context *sp) { - softpipe->pipe.texture_update = softpipe_texture_update; } @@ -199,5 +237,10 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; screen->texture_release = softpipe_texture_release; + screen->get_tex_surface = softpipe_get_tex_surface; + screen->tex_surface_release = softpipe_tex_surface_release; + + screen->surface_map = softpipe_surface_map; + screen->surface_unmap = softpipe_surface_unmap; } diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index a7322144e6f..2ba093320dc 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -61,7 +61,6 @@ softpipe_texture(struct pipe_texture *pt) extern void softpipe_init_texture_funcs( struct softpipe_context *softpipe ); - extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index a88aad5d097..a3fd375a2d1 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -49,6 +49,7 @@ struct softpipe_tile_cache { + struct pipe_screen *screen; struct pipe_surface *surface; /**< the surface we're caching */ void *surface_map; struct pipe_texture *texture; /**< if caching a texture */ @@ -109,13 +110,14 @@ clear_clear_flag(uint *bitvec, int x, int y) struct softpipe_tile_cache * -sp_create_tile_cache(void) +sp_create_tile_cache( struct pipe_screen *screen ) { struct softpipe_tile_cache *tc; uint pos; tc = CALLOC_STRUCT( softpipe_tile_cache ); if (tc) { + tc->screen = screen; for (pos = 0; pos < NUM_ENTRIES; pos++) { tc->entries[pos].x = tc->entries[pos].y = -1; @@ -154,16 +156,17 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, assert(!tc->texture); if (tc->surface_map) { - /*assert(tc->surface != ps);*/ - pipe_surface_unmap(tc->surface); + tc->screen->surface_unmap(tc->screen, tc->surface); tc->surface_map = NULL; } pipe_surface_reference(&tc->surface, ps); - if (ps) { - if (tc->surface_map) - tc->surface_map = pipe_surface_map(ps); + if (tc->surface) { + if (tc->surface_map) /* XXX: this is always NULL!? */ + tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM || @@ -187,10 +190,13 @@ void sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) { if (tc->surface && !tc->surface_map) - tc->surface_map = pipe_surface_map(tc->surface); + tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ); if (tc->tex_surf && !tc->tex_surf_map) - tc->tex_surf_map = pipe_surface_map(tc->tex_surf); + tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf, + PIPE_BUFFER_USAGE_GPU_READ); } @@ -198,12 +204,12 @@ void sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) { if (tc->surface_map) { - pipe_surface_unmap(tc->surface); + tc->screen->surface_unmap(tc->screen, tc->surface); tc->surface_map = NULL; } if (tc->tex_surf_map) { - pipe_surface_unmap(tc->tex_surf); + tc->screen->surface_unmap(tc->screen, tc->tex_surf); tc->tex_surf_map = NULL; } } @@ -224,7 +230,7 @@ sp_tile_cache_set_texture(struct pipe_context *pipe, pipe_texture_reference(&tc->texture, texture); if (tc->tex_surf_map) { - pipe_surface_unmap(tc->tex_surf); + tc->screen->surface_unmap(tc->screen, tc->tex_surf); tc->tex_surf_map = NULL; } pipe_surface_reference(&tc->tex_surf, NULL); @@ -514,10 +520,12 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, /* get new surface (view into texture) */ if (tc->tex_surf_map) - pipe_surface_unmap(tc->tex_surf); + tc->screen->surface_unmap(tc->screen, tc->tex_surf); - tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z); - tc->tex_surf_map = pipe_surface_map(tc->tex_surf); + tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z, + PIPE_BUFFER_USAGE_GPU_READ); + tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf, + PIPE_BUFFER_USAGE_GPU_READ); tc->tex_face = face; tc->tex_level = level; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 2631e29a3a6..bc96c941f61 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -61,7 +61,7 @@ struct softpipe_cached_tile extern struct softpipe_tile_cache * -sp_create_tile_cache(void); +sp_create_tile_cache( struct pipe_screen *screen ); extern void sp_destroy_tile_cache(struct softpipe_tile_cache *tc); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index f3a9c2cd8b2..0f68f592f77 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -198,12 +198,6 @@ struct pipe_context { /*@}*/ - /** Called when texture data is changed */ - void (*texture_update)(struct pipe_context *pipe, - struct pipe_texture *texture, - uint face, uint dirtyLevelsMask); - - /** Flush rendering (flags = bitmask of PIPE_FLUSH_x tokens) */ void (*flush)( struct pipe_context *pipe, unsigned flags, diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 8eb604e73f1..592c3c87c26 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -39,20 +39,6 @@ extern "C" { #endif -static INLINE void * -pipe_surface_map(struct pipe_surface *surface) -{ - return (char *)surface->winsys->buffer_map( surface->winsys, surface->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_CPU_READ ) - + surface->offset; -} - -static INLINE void -pipe_surface_unmap(struct pipe_surface *surface) -{ - surface->winsys->buffer_unmap( surface->winsys, surface->buffer ); -} /** * Set 'ptr' to point to 'surf' and update reference counting. @@ -66,9 +52,20 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) if (surf) surf->refcount++; - if (*ptr /* && --(*ptr)->refcount == 0 */) { - struct pipe_winsys *winsys = (*ptr)->winsys; - winsys->surface_release(winsys, ptr); + if (*ptr) { + + /* There are currently two sorts of surfaces... This needs to be + * fixed so that all surfaces are views into a texture. + */ + if ((*ptr)->texture) { + struct pipe_screen *screen = (*ptr)->texture->screen; + screen->tex_surface_release( screen, ptr ); + } + else { + struct pipe_winsys *winsys = (*ptr)->winsys; + winsys->surface_release(winsys, ptr); + } + assert(!*ptr); } diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 26ac99d287d..c080579c26e 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -96,7 +96,22 @@ struct pipe_screen { struct pipe_surface *(*get_tex_surface)(struct pipe_screen *, struct pipe_texture *texture, unsigned face, unsigned level, - unsigned zslice); + unsigned zslice, + unsigned usage ); + + /* Surfaces allocated by the above must be released here: + */ + void (*tex_surface_release)( struct pipe_screen *, + struct pipe_surface ** ); + + + void *(*surface_map)( struct pipe_screen *, + struct pipe_surface *surface, + unsigned flags ); + + void (*surface_unmap)( struct pipe_screen *, + struct pipe_surface *surface ); + }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 912d84e7b99..62b05a403b3 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -273,7 +273,11 @@ struct pipe_surface unsigned pitch; /**< in pixels */ unsigned offset; /**< offset from start of buffer, in bytes */ unsigned refcount; + unsigned usage; /**< PIPE_BUFFER_USAGE_* */ + struct pipe_winsys *winsys; /**< winsys which owns/created the surface */ + + struct pipe_texture *texture; /**< optional texture into which this is a view */ }; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 0e7e2466662..0d8ed167b2a 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -204,7 +204,10 @@ mem_dup(const void *src, uint size) #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) +#ifndef Elements #define Elements(x) (sizeof(x)/sizeof((x)[0])) +#endif + #define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) /** diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 3005ec2d941..87a66b66d75 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -90,7 +90,7 @@ struct pipe_winsys void (*surface_release)(struct pipe_winsys *ws, struct pipe_surface **s); - + /** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 8a89278cde6..fd2f56eff24 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -508,6 +508,7 @@ xm_surface_alloc_storage(struct pipe_winsys *winsys, surf->format = format; surf->cpp = pf_get_size(format); surf->pitch = round_up(width, alignment / surf->cpp); + surf->usage = flags; #ifdef GALLIUM_CELL /* XXX a bit of a hack */ height = round_up(height, TILE_SIZE); @@ -562,6 +563,7 @@ static void xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) { struct pipe_surface *surf = *s; + assert(!surf->texture); surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 76356bbad76..e7186a85da8 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -148,8 +148,10 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) uint *dest; uint i, j; - surface = screen->get_tex_surface(screen, pt, 0, 0, 0); - dest = (uint *) pipe_surface_map(surface); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + dest = (uint *) screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); /* Pack four 1D maps into a 2D texture: * R map is placed horizontally, indexed by S, in channel 0 @@ -168,9 +170,8 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) } } - pipe_surface_unmap(surface); + screen->surface_unmap(screen, surface); pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pt, 0, 0x1); } diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 1636bed91a5..e4ef3e16b7d 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -106,13 +106,15 @@ st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) { struct st_renderbuffer *acc_strb = st_renderbuffer(rb); struct pipe_surface *acc_ps = acc_strb->surface; + struct pipe_screen *screen = ctx->st->pipe->screen; const GLint xpos = ctx->DrawBuffer->_Xmin; const GLint ypos = ctx->DrawBuffer->_Ymin; const GLint width = ctx->DrawBuffer->_Xmax - xpos; const GLint height = ctx->DrawBuffer->_Ymax - ypos; GLvoid *map; - map = pipe_surface_map(acc_ps); + map = screen->surface_map(screen, acc_ps, + PIPE_BUFFER_USAGE_CPU_WRITE); /* note acc_strb->format might not equal acc_ps->format */ switch (acc_strb->format) { @@ -140,7 +142,7 @@ st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) _mesa_problem(ctx, "unexpected format in st_clear_accum_buffer()"); } - pipe_surface_unmap(acc_ps); + screen->surface_unmap(screen, acc_ps); } @@ -150,10 +152,12 @@ accum_mad(GLcontext *ctx, GLfloat scale, GLfloat bias, GLint xpos, GLint ypos, GLint width, GLint height, struct st_renderbuffer *acc_strb) { + struct pipe_screen *screen = ctx->st->pipe->screen; struct pipe_surface *acc_ps = acc_strb->surface; GLvoid *map; - map = pipe_surface_map(acc_ps); + map = screen->surface_map(screen, acc_ps, + PIPE_BUFFER_USAGE_CPU_WRITE); /* note acc_strb->format might not equal acc_ps->format */ switch (acc_strb->format) { @@ -174,7 +178,7 @@ accum_mad(GLcontext *ctx, GLfloat scale, GLfloat bias, _mesa_problem(NULL, "unexpected format in st_clear_accum_buffer()"); } - pipe_surface_unmap(acc_ps); + screen->surface_unmap(screen, acc_ps); } diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index ce8fefe703f..873b765c2c2 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -327,10 +327,11 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, return NULL; } - surface = screen->get_tex_surface(screen, pt, 0, 0, 0); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); /* map texture surface */ - dest = pipe_surface_map(surface); + dest = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); /* Put image into texture surface */ memset(dest, 0xff, height * surface->pitch); @@ -340,9 +341,8 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, _mesa_unmap_bitmap_pbo(ctx, unpack); /* Release surface */ - pipe_surface_unmap(surface); + screen->surface_unmap(screen, surface); pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pt, 0, 0x1); return pt; } @@ -544,8 +544,10 @@ reset_cache(struct st_context *st) /* Map the texture surface. * Subsequent glBitmap calls will write into the texture image. */ - cache->surf = screen->get_tex_surface(screen, cache->texture, 0, 0, 0); - cache->buffer = pipe_surface_map(cache->surf); + cache->surf = screen->get_tex_surface(screen, cache->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + cache->buffer = screen->surface_map(screen, cache->surf, + PIPE_BUFFER_USAGE_CPU_WRITE); /* init image to all 0xff */ memset(cache->buffer, 0xff, BITMAP_CACHE_WIDTH * BITMAP_CACHE_HEIGHT); @@ -562,6 +564,7 @@ st_flush_bitmap_cache(struct st_context *st) if (st->ctx->DrawBuffer) { struct bitmap_cache *cache = st->bitmap.cache; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; assert(cache->xmin <= cache->xmax); /* @@ -574,12 +577,18 @@ st_flush_bitmap_cache(struct st_context *st) /* The texture surface has been mapped until now. * So unmap and release the texture surface before drawing. */ +#if 0 pipe_surface_unmap(cache->surf); pipe_surface_reference(&cache->surf, NULL); +#else + screen->surface_unmap(screen, cache->surf); + screen->tex_surface_release(screen, &cache->surf); +#endif +#if 0 /* XXX is this needed? */ pipe->texture_update(pipe, cache->texture, 0, 0x1); - +#endif draw_bitmap_quad(st->ctx, cache->xpos, cache->ypos, diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 65bfd6cfcc0..9ae53c95f86 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -362,10 +362,12 @@ make_texture(struct st_context *st, /* we'll do pixel transfer in a fragment shader */ ctx->_ImageTransferState = 0x0; - surface = screen->get_tex_surface(screen, pt, 0, 0, 0); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); /* map texture surface */ - dest = pipe_surface_map(surface); + dest = screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); /* Put image into texture surface. * Note that the image is actually going to be upside down in @@ -384,9 +386,8 @@ make_texture(struct st_context *st, unpack); /* unmap */ - pipe_surface_unmap(surface); + screen->surface_unmap(screen, surface); pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pt, 0, 0x1); assert(success); @@ -731,6 +732,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_surface *ps = st->state.framebuffer.zsbuf; const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; GLint skipPixels; @@ -739,7 +741,8 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* map the stencil buffer */ - stmap = pipe_surface_map(ps); + stmap = screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_CPU_WRITE); /* if width > MAX_WIDTH, have to process image in chunks */ skipPixels = 0; @@ -796,7 +799,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, } /* unmap the stencil buffer */ - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } @@ -869,6 +872,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, GLint dstx, GLint dsty) { struct st_renderbuffer *rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer); + struct pipe_screen *screen = ctx->st->pipe->screen; struct pipe_surface *psDraw = rbDraw->surface; ubyte *drawMap; ubyte *buffer; @@ -885,7 +889,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, &ctx->DefaultPacking, buffer); /* map the stencil buffer */ - drawMap = pipe_surface_map(psDraw); + drawMap = screen->surface_map(screen, psDraw, PIPE_BUFFER_USAGE_CPU_WRITE); /* draw */ /* XXX PixelZoom not handled yet */ @@ -925,7 +929,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, free(buffer); /* unmap the stencil buffer */ - pipe_surface_unmap(psDraw); + screen->surface_unmap(screen, psDraw); } @@ -994,13 +998,14 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, if (!pt) return; - psTex = screen->get_tex_surface(screen, pt, 0, 0, 0); - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { srcy = ctx->DrawBuffer->Height - srcy - height; } if (srcFormat == texFormat) { + psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE ); + /* copy source framebuffer surface into mipmap/texture */ pipe->surface_copy(pipe, FALSE, @@ -1009,21 +1014,26 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, psRead, srcx, srcy, width, height); } - else if (type == GL_COLOR) { - /* alternate path using get/put_tile() */ - GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + else { + psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE ); - pipe_get_tile_rgba(pipe, psRead, srcx, srcy, width, height, buf); - pipe_put_tile_rgba(pipe, psTex, 0, 0, width, height, buf); + if (type == GL_COLOR) { + /* alternate path using get/put_tile() */ + GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - free(buf); - } - else { - /* GL_DEPTH */ - GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); - pipe_get_tile_z(pipe, psRead, srcx, srcy, width, height, buf); - pipe_put_tile_z(pipe, psTex, 0, 0, width, height, buf); - free(buf); + pipe_get_tile_rgba(pipe, psRead, srcx, srcy, width, height, buf); + pipe_put_tile_rgba(pipe, psTex, 0, 0, width, height, buf); + + free(buf); + } + else { + /* GL_DEPTH */ + GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); + pipe_get_tile_z(pipe, psRead, srcx, srcy, width, height, buf); + pipe_put_tile_z(pipe, psTex, 0, 0, width, height, buf); + free(buf); + } } /* draw textured quad */ diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index fc8a5ea7f62..7fdc0bddd67 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -91,9 +91,14 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, struct pipe_context *pipe = ctx->st->pipe; struct st_renderbuffer *strb = st_renderbuffer(rb); enum pipe_format pipeFormat; - GLbitfield flags = 0x0; /* XXX needed? */ + unsigned flags = (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ); int ret; + pipe_surface_reference( &strb->surface, NULL ); + if (!strb->surface) { /* first time surface creation */ strb->surface = pipe->winsys->surface_alloc(pipe->winsys); @@ -103,11 +108,16 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, if (!strb->surface) return GL_FALSE; } +#if 0 else if (strb->surface->buffer) { /* release/discard the old surface buffer */ pipe_reference_buffer(pipe, &strb->surface->buffer, NULL); } - +#else + else { + assert(0); + } +#endif /* Determine surface format here */ if (strb->format != PIPE_FORMAT_NONE) { assert(strb->format != 0); @@ -368,7 +378,11 @@ st_render_texture(GLcontext *ctx, strb->surface = screen->get_tex_surface(screen, pt, att->CubeMapFace, att->TextureLevel, - att->Zoffset); + att->Zoffset, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); assert(strb->surface); assert(screen->is_format_supported(screen, strb->surface->format, PIPE_TEXTURE)); assert(screen->is_format_supported(screen, strb->surface->format, PIPE_SURFACE)); @@ -396,22 +410,19 @@ static void st_finish_render_texture(GLcontext *ctx, struct gl_renderbuffer_attachment *att) { + struct pipe_screen *screen = ctx->st->pipe->screen; struct st_renderbuffer *strb = st_renderbuffer(att->Renderbuffer); assert(strb); ctx->st->pipe->flush(ctx->st->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - ctx->st->pipe->texture_update(ctx->st->pipe, - st_get_texobj_texture(att->Texture), - att->CubeMapFace, 1 << att->TextureLevel); + screen->tex_surface_release( screen, &strb->surface ); /* printf("FINISH RENDER TO TEXTURE surf=%p\n", strb->surface); */ - pipe_surface_reference(&strb->surface, NULL); - _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); /* restore previous framebuffer state */ diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index ddbe36106c8..e242195e7a9 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -61,13 +61,14 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, GLvoid *pixels) { struct gl_framebuffer *fb = ctx->ReadBuffer; + struct pipe_screen *screen = ctx->st->pipe->screen; struct st_renderbuffer *strb = st_renderbuffer(fb->_StencilBuffer); struct pipe_surface *ps = strb->surface; ubyte *stmap; GLint j; /* map the stencil buffer */ - stmap = pipe_surface_map(ps); + stmap = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); /* width should never be > MAX_WIDTH since we did clipping earlier */ ASSERT(width <= MAX_WIDTH); @@ -124,7 +125,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, /* unmap the stencil buffer */ - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 981246221b1..05e0339e0e1 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -478,7 +478,6 @@ st_TexImage(GLcontext * ctx, struct gl_texture_image *texImage, GLsizei imageSize, int compressed) { - struct pipe_context *pipe = ctx->st->pipe; struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); GLint postConvWidth, postConvHeight; @@ -635,7 +634,8 @@ st_TexImage(GLcontext * ctx, return; if (stImage->pt) { - texImage->Data = st_texture_image_map(ctx->st, stImage, 0); + texImage->Data = st_texture_image_map(ctx->st, stImage, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); dstRowStride = stImage->surface->pitch * stImage->surface->cpp; } else { @@ -684,8 +684,9 @@ st_TexImage(GLcontext * ctx, } if (stImage->pt && i < depth) { - st_texture_image_unmap(stImage); - texImage->Data = st_texture_image_map(ctx->st, stImage, i); + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, i, + PIPE_BUFFER_USAGE_CPU_WRITE); src += srcImageStride; } } @@ -694,13 +695,10 @@ st_TexImage(GLcontext * ctx, _mesa_unmap_teximage_pbo(ctx, unpack); if (stImage->pt) { - st_texture_image_unmap(stImage); + st_texture_image_unmap(ctx->st, stImage); texImage->Data = NULL; } - if (stObj->pt) - pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { ctx->Driver.GenerateMipmap(ctx, target, texObj); } @@ -793,7 +791,8 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, /* Image is stored in hardware format in a buffer managed by the * kernel. Need to explicitly map and unmap it. */ - texImage->Data = st_texture_image_map(ctx->st, stImage, 0); + texImage->Data = st_texture_image_map(ctx->st, stImage, 0, + PIPE_BUFFER_USAGE_CPU_READ); texImage->RowStride = stImage->surface->pitch; } else { @@ -823,8 +822,9 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, } if (stImage->pt && i < depth) { - st_texture_image_unmap(stImage); - texImage->Data = st_texture_image_map(ctx->st, stImage, i); + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, i, + PIPE_BUFFER_USAGE_CPU_READ); dest += dstImageStride; } } @@ -833,7 +833,7 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, /* Unmap */ if (stImage->pt) { - st_texture_image_unmap(stImage); + st_texture_image_unmap(ctx->st, stImage); texImage->Data = NULL; } } @@ -874,8 +874,6 @@ st_TexSubimage(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - struct pipe_context *pipe = ctx->st->pipe; - struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); GLuint dstRowStride; GLuint srcImageStride = _mesa_image_image_stride(packing, width, height, @@ -897,7 +895,8 @@ st_TexSubimage(GLcontext * ctx, * from uploading the buffer under us. */ if (stImage->pt) { - texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset); + texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset, + PIPE_BUFFER_USAGE_CPU_WRITE); dstRowStride = stImage->surface->pitch * stImage->surface->cpp; } @@ -922,8 +921,9 @@ st_TexSubimage(GLcontext * ctx, if (stImage->pt && i < depth) { /* map next slice of 3D texture */ - st_texture_image_unmap(stImage); - texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset + i); + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset + i, + PIPE_BUFFER_USAGE_CPU_WRITE); src += srcImageStride; } } @@ -935,11 +935,9 @@ st_TexSubimage(GLcontext * ctx, _mesa_unmap_teximage_pbo(ctx, packing); if (stImage->pt) { - st_texture_image_unmap(stImage); + st_texture_image_unmap(ctx->st, stImage); texImage->Data = NULL; } - - pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); } @@ -1058,7 +1056,8 @@ fallback_copy_texsubimage(GLcontext *ctx, src_surf = strb->surface; - dest_surf = screen->get_tex_surface(screen, pt, face, level, destZ); + dest_surf = screen->get_tex_surface(screen, pt, face, level, destZ, + PIPE_BUFFER_USAGE_CPU_WRITE); assert(width <= MAX_WIDTH); @@ -1119,7 +1118,6 @@ do_copy_texsubimage(GLcontext *ctx, struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level); struct st_texture_image *stImage = st_texture_image(texImage); - struct st_texture_object *stObj = st_texture_object(texObj); GLenum baseFormat = texImage->InternalFormat; struct gl_framebuffer *fb = ctx->ReadBuffer; struct st_renderbuffer *strb; @@ -1157,7 +1155,8 @@ do_copy_texsubimage(GLcontext *ctx, dest_format = stImage->pt->format; dest_surface = screen->get_tex_surface(screen, stImage->pt, stImage->face, - stImage->level, destZ); + stImage->level, destZ, + PIPE_BUFFER_USAGE_CPU_WRITE); if (ctx->_ImageTransferState == 0x0 && strb->surface->buffer && @@ -1223,8 +1222,6 @@ do_copy_texsubimage(GLcontext *ctx, pipe_surface_reference(&dest_surface, NULL); - pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { ctx->Driver.GenerateMipmap(ctx, target, texObj); } @@ -1529,7 +1526,6 @@ st_finalize_texture(GLcontext *ctx, if (stImage && stObj->pt != stImage->pt) { copy_image_data_to_texture(ctx->st, stObj, level, stImage); *needFlush = GL_TRUE; - pipe->texture_update(pipe, stObj->pt, face, (1 << level)); } } } diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 1a0e19c2f92..cfacfdd04cd 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -123,8 +123,10 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, const ubyte *srcData; ubyte *dstData; - srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); - dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); srcData = (ubyte *) pipe_buffer_map(pipe, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index f68bef1207c..482a054f64c 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -184,25 +184,30 @@ st_texture_image_offset(const struct pipe_texture * pt, */ GLubyte * st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, - GLuint zoffset) + GLuint zoffset, + GLuint flags ) { struct pipe_screen *screen = st->pipe->screen; struct pipe_texture *pt = stImage->pt; DBG("%s \n", __FUNCTION__); stImage->surface = screen->get_tex_surface(screen, pt, stImage->face, - stImage->level, zoffset); + stImage->level, zoffset, + flags); - return pipe_surface_map(stImage->surface); + return screen->surface_map(screen, stImage->surface, flags); } void -st_texture_image_unmap(struct st_texture_image *stImage) +st_texture_image_unmap(struct st_context *st, + struct st_texture_image *stImage) { + struct pipe_screen *screen = st->pipe->screen; + DBG("%s\n", __FUNCTION__); - pipe_surface_unmap(stImage->surface); + screen->surface_unmap(screen, stImage->surface); pipe_surface_reference(&stImage->surface, NULL); } @@ -224,12 +229,15 @@ st_surface_data(struct pipe_context *pipe, const void *src, unsigned src_pitch, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - pipe_copy_rect(pipe_surface_map(dst), + struct pipe_screen *screen = pipe->screen; + void *map = screen->surface_map(screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE); + + pipe_copy_rect(map, dst->cpp, dst->pitch, dstx, dsty, width, height, src, src_pitch, srcx, srcy); - pipe_surface_unmap(dst); + screen->surface_unmap(screen, dst); } @@ -256,7 +264,8 @@ st_texture_image_data(struct pipe_context *pipe, if(dst->compressed) height /= 4; - dst_surface = screen->get_tex_surface(screen, dst, face, level, i); + dst_surface = screen->get_tex_surface(screen, dst, face, level, i, + PIPE_BUFFER_USAGE_CPU_WRITE); st_surface_data(pipe, dst_surface, 0, 0, /* dstx, dsty */ @@ -265,7 +274,7 @@ st_texture_image_data(struct pipe_context *pipe, 0, 0, /* source x, y */ dst->width[level], height); /* width, height */ - pipe_surface_reference(&dst_surface, NULL); + screen->tex_surface_release(screen, &dst_surface); srcUB += src_image_pitch * dst->cpp; } @@ -304,8 +313,11 @@ st_texture_image_copy(struct pipe_context *pipe, assert(src->width[srcLevel] == width); assert(src->height[srcLevel] == height); - dst_surface = screen->get_tex_surface(screen, dst, face, dstLevel, i); - src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i); + dst_surface = screen->get_tex_surface(screen, dst, face, dstLevel, i, + PIPE_BUFFER_USAGE_GPU_WRITE); + + src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i, + PIPE_BUFFER_USAGE_GPU_READ); pipe->surface_copy(pipe, FALSE, diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index 7abccb3a69f..f6d5733e210 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -121,10 +121,12 @@ st_texture_match_image(const struct pipe_texture *pt, extern GLubyte * st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, - GLuint zoffset); + GLuint zoffset, + GLuint flags); extern void -st_texture_image_unmap(struct st_texture_image *stImage); +st_texture_image_unmap(struct st_context *st, + struct st_texture_image *stImage); /* Return pointers to each 2d slice within an image. Indexed by depth -- cgit v1.2.3 From 27e46611f04108765fa99890822a474820d5c563 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 1 May 2008 11:28:47 +0100 Subject: softpipe: use CPU flags for mapping But when creating surfaces, adjust incoming flags from GPU->CPU usage. --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 6 ++--- src/gallium/drivers/softpipe/sp_surface.c | 6 ++--- src/gallium/drivers/softpipe/sp_texture.c | 34 ++++++++++++++++++++++++--- src/gallium/drivers/softpipe/sp_tile_cache.c | 14 +++++------ 4 files changed, 44 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 355c120d187..6c58f9909da 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -50,7 +50,7 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) for (i = 0; i < 2; i++) { if (sp->constants[i].size) sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, - PIPE_BUFFER_USAGE_GPU_READ); + PIPE_BUFFER_USAGE_CPU_READ); } draw_set_mapped_constant_buffer(sp->draw, @@ -133,14 +133,14 @@ softpipe_draw_elements(struct pipe_context *pipe, void *buf = pipe->winsys->buffer_map(pipe->winsys, sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_GPU_READ); + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, - PIPE_BUFFER_USAGE_GPU_READ); + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } else { diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index b5cc0535481..b82b1a8f37d 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -49,11 +49,11 @@ sp_surface_copy(struct pipe_context *pipe, assert( dst->cpp == src->cpp ); void *dst_map = pipe->screen->surface_map( pipe->screen, dst, - PIPE_BUFFER_USAGE_GPU_WRITE ); + PIPE_BUFFER_USAGE_CPU_WRITE ); const void *src_map = pipe->screen->surface_map( pipe->screen, src, - PIPE_BUFFER_USAGE_GPU_READ ); + PIPE_BUFFER_USAGE_CPU_READ ); assert(src_map && dst_map); @@ -94,7 +94,7 @@ sp_surface_fill(struct pipe_context *pipe, unsigned i, j; void *dst_map = pipe->screen->surface_map( pipe->screen, dst, - PIPE_BUFFER_USAGE_GPU_WRITE ); + PIPE_BUFFER_USAGE_CPU_WRITE ); assert(dst->pitch > 0); assert(width <= dst->pitch); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index ee3fa994f94..2b31cd4f251 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -151,6 +151,23 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->pitch = ps->width; ps->offset = spt->level_offset[level]; ps->usage = usage; + + /* Because we are softpipe, anything that the state tracker + * thought was going to be done with the GPU will actually get + * done with the CPU. Let's adjust the flags to take that into + * account. + */ + if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) + ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE; + + if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) + ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; + + + pipe_texture_reference(&ps->texture, pt); + ps->face = face; + ps->level = level; + ps->zslice = zslice; if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * @@ -164,8 +181,18 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (usage & (PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE)) { - /* XXX if writing to the texture, invalidate the texcache entries!!! */ - assert(0); + /* XXX if writing to the texture, invalidate the texcache entries!!! + * + * Actually, no. Flushing dependent contexts is still done + * explicitly and separately. Hardware drivers won't insert + * FLUSH commands into a command stream at this point, + * neither should softpipe try to flush caches. + * + * Those contexts could be living in separate threads & doing + * all sorts of unrelated stuff... Context<->texture + * dependency tracking needs to happen elsewhere. + */ + /* assert(0); */ } } return ps; @@ -181,6 +208,7 @@ softpipe_tex_surface_release(struct pipe_screen *screen, * where it would happen. For softpipe, nothing to do. */ assert ((*s)->texture); + pipe_texture_reference(&(*s)->texture, NULL); screen->winsys->surface_release(screen->winsys, s); } @@ -206,7 +234,7 @@ softpipe_surface_map( struct pipe_screen *screen, * of the map: */ if (surface->texture && - (flags & PIPE_BUFFER_USAGE_GPU_WRITE)) + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) { /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index a3fd375a2d1..142faf50745 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -165,8 +165,8 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, if (tc->surface) { if (tc->surface_map) /* XXX: this is always NULL!? */ tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, - PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM || @@ -191,12 +191,12 @@ sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) { if (tc->surface && !tc->surface_map) tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, - PIPE_BUFFER_USAGE_GPU_WRITE | - PIPE_BUFFER_USAGE_GPU_READ); + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_CPU_READ); if (tc->tex_surf && !tc->tex_surf_map) tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf, - PIPE_BUFFER_USAGE_GPU_READ); + PIPE_BUFFER_USAGE_CPU_READ); } @@ -523,9 +523,9 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, tc->screen->surface_unmap(tc->screen, tc->tex_surf); tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z, - PIPE_BUFFER_USAGE_GPU_READ); + PIPE_BUFFER_USAGE_CPU_READ); tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf, - PIPE_BUFFER_USAGE_GPU_READ); + PIPE_BUFFER_USAGE_CPU_READ); tc->tex_face = face; tc->tex_level = level; -- cgit v1.2.3 From 228aaa6cab9ebb32eb23b85fc8a5f05c1dbe975a Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 1 May 2008 12:21:48 +0100 Subject: softpipe: missing file --- src/gallium/drivers/softpipe/sp_screen.h | 58 ++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 src/gallium/drivers/softpipe/sp_screen.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.h b/src/gallium/drivers/softpipe/sp_screen.h new file mode 100644 index 00000000000..3d4bfd3e840 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_SCREEN_H +#define SP_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" + + + +struct softpipe_screen { + struct pipe_screen base; + + /* Increments whenever textures are modified. Contexts can track + * this. + */ + unsigned timestamp; +}; + + + + +static INLINE struct softpipe_screen * +softpipe_screen( struct pipe_screen *pipe ) +{ + return (struct softpipe_screen *)pipe; +} + + +#endif /* SP_SCREEN_H */ -- cgit v1.2.3 From f30285e99c1e158971855b12331df3da38555004 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 1 May 2008 18:49:07 +0100 Subject: softpipe: fix warning --- src/gallium/drivers/softpipe/sp_state_fs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 2921066ce36..9e77b7e91bc 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -82,10 +82,9 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) void softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { - struct softpipe_context *softpipe = softpipe_context(pipe); struct sp_fragment_shader *state = fs; - assert(fs != softpipe->fs); + assert(fs != softpipe_context(pipe)->fs); state->delete( state ); } -- cgit v1.2.3 From 26bcef898af4e6dfd578783ed33818a2bd38b06d Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 1 May 2008 18:49:52 +0100 Subject: i915: fix warning --- src/gallium/drivers/i915simple/i915_debug_fp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c index 37a3508fe14..c024a051a58 100644 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -333,12 +333,11 @@ void i915_disassemble_program(struct debug_stream *stream, const unsigned * program, unsigned sz) { - unsigned size = program[0] & 0x1ff; unsigned i; PRINTF(stream, "\t\tBEGIN\n"); - assert(size + 2 == sz); + assert((program[0] & 0x1ff) + 2 == sz); program++; for (i = 1; i < sz; i += 3, program += 3) { -- cgit v1.2.3 From b8936ca1c22de7b0cb695ee3b392e4473fd17aa0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 1 May 2008 18:50:33 +0100 Subject: i915: avoid crashing on bad parameter --- src/gallium/drivers/i915simple/i915_screen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 631642e1b6b..646cfd921d1 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -182,6 +182,7 @@ i915_is_format_supported( struct pipe_screen *screen, break; default: assert(0); + return FALSE; } for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { -- cgit v1.2.3 From c1abd758c51247ebaf3d4808a77513d7814205cd Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 1 May 2008 15:19:00 -0600 Subject: gallium: remove the unused softpipe_winsys code The struct is still there though until all winsys layers are updated --- src/gallium/drivers/softpipe/sp_context.c | 4 +- src/gallium/drivers/softpipe/sp_context.h | 2 - src/gallium/drivers/softpipe/sp_winsys.h | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 81 +++++++++---------------------- 4 files changed, 25 insertions(+), 64 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index edf91ecafa3..fe9cd8375e3 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -122,7 +122,7 @@ static void softpipe_destroy( struct pipe_context *pipe ) struct pipe_context * softpipe_create( struct pipe_screen *screen, struct pipe_winsys *pipe_winsys, - struct softpipe_winsys *softpipe_winsys ) + void *unused ) { struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); uint i; @@ -212,8 +212,6 @@ softpipe_create( struct pipe_screen *screen, softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); softpipe->quad.output = sp_quad_output_stage(softpipe); - softpipe->winsys = softpipe_winsys; - /* * Create drawing context and plug our rendering stage into it. */ diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index b3e2b2e4355..62eabfb30e8 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -57,8 +57,6 @@ struct sp_vertex_shader; struct softpipe_context { struct pipe_context pipe; /**< base class */ - struct softpipe_winsys *winsys; /**< window system interface */ - /* The most recent drawing state as set by the driver: */ diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h index 291825dfe22..4ab666486c4 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -59,7 +59,7 @@ struct pipe_context; struct pipe_context *softpipe_create( struct pipe_screen *, struct pipe_winsys *, - struct softpipe_winsys * ); + void *unused ); struct pipe_screen * diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 8a89278cde6..71ba076bffd 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -37,6 +37,7 @@ #include "xmesaP.h" #undef ASSERT +#undef Elements #include "pipe/p_winsys.h" #include "pipe/p_format.h" @@ -88,18 +89,6 @@ struct xmesa_surface }; -/** - * Derived from softpipe_winsys. - * We just need one extra field which indicates the pixel format to use for - * drawing surfaces so that we're compatible with the XVisual/window format. - */ -struct xmesa_softpipe_winsys -{ - struct softpipe_winsys spws; - enum pipe_format pixelformat; -}; - - struct xmesa_pipe_winsys { struct pipe_winsys base; @@ -119,12 +108,7 @@ xmesa_surface(struct pipe_surface *ps) return (struct xmesa_surface *) ps; } -/** cast wrapper */ -static INLINE struct xmesa_softpipe_winsys * -xmesa_softpipe_winsys(struct softpipe_winsys *spws) -{ - return (struct xmesa_softpipe_winsys *) spws; -} + /** * Turn the softpipe opaque buffer pointer into a dri_bufmgr opaque @@ -525,8 +509,7 @@ xm_surface_alloc_storage(struct pipe_winsys *winsys, /** - * Called via pipe->surface_alloc() to create new surfaces (textures, - * renderbuffers, etc. + * Called via winsys->surface_alloc() to create new surfaces. */ static struct pipe_surface * xm_surface_alloc(struct pipe_winsys *ws) @@ -610,10 +593,19 @@ xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis) { static struct xmesa_pipe_winsys *ws = NULL; - if (!ws && getenv("XM_AUB")) { + if (!ws) { ws = (struct xmesa_pipe_winsys *) xmesa_create_pipe_winsys_aub(); } - else if (!ws) { + return &ws->base; +} + + +struct pipe_winsys * +xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis) +{ + static struct xmesa_pipe_winsys *ws = NULL; + + if (!ws) { ws = CALLOC_STRUCT(xmesa_pipe_winsys); ws->xm_visual = xm_vis; @@ -644,45 +636,19 @@ xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis) } -/** - * Called via softpipe_winsys->is_format_supported(). - * This function is only called to test formats for front/back color surfaces. - * The winsys being queried will have been created at glXCreateContext - * time, with a pixel format corresponding to the context's visual. - */ -static boolean -xmesa_is_format_supported(struct softpipe_winsys *sws, - enum pipe_format format) -{ - struct xmesa_softpipe_winsys *xmws = xmesa_softpipe_winsys(sws); - return (format == xmws->pixelformat); -} - - -/** - * Return pointer to a softpipe_winsys object. - */ -static struct softpipe_winsys * -xmesa_get_softpipe_winsys(uint pixelformat) -{ - struct xmesa_softpipe_winsys *xmws - = CALLOC_STRUCT(xmesa_softpipe_winsys); - if (!xmws) - return NULL; - - xmws->spws.is_format_supported = xmesa_is_format_supported; - xmws->pixelformat = pixelformat; - - return &xmws->spws; -} - - struct pipe_context * xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat) { - struct pipe_winsys *pws = xmesa_get_pipe_winsys_aub(xmesa->xm_visual); + struct pipe_winsys *pws; struct pipe_context *pipe; + if (getenv("XM_AUB")) { + pws = xmesa_get_pipe_winsys_aub(xmesa->xm_visual); + } + else { + pws = xmesa_get_pipe_winsys(xmesa->xm_visual); + } + #ifdef GALLIUM_CELL if (!getenv("GALLIUM_NOCELL")) { struct cell_winsys *cws = cell_get_winsys(pixelformat); @@ -693,10 +659,9 @@ xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat) else #endif { - struct softpipe_winsys *spws = xmesa_get_softpipe_winsys(pixelformat); struct pipe_screen *screen = softpipe_create_screen(pws); - pipe = softpipe_create(screen, pws, spws); + pipe = softpipe_create(screen, pws, NULL); } if (pipe) -- cgit v1.2.3 From 731e7b961cd081ac6a64b636937716ce3a623c2c Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 1 May 2008 18:13:46 +0100 Subject: re-add pipe_surface map/unmap inlines --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 2 +- src/gallium/include/pipe/p_inlines.h | 33 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 90926aec850..07f85bc448f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -47,7 +47,7 @@ #include "tgsi/util/tgsi_parse.h" #define SSE_MAX_VERTICES 4 -#define SSE_SWIZZLES 0 +#define SSE_SWIZZLES 1 #if SSE_SWIZZLES typedef void (XSTDCALL *codegen_function) ( diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index b82b1a8f37d..29a1e92416e 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -46,7 +46,6 @@ sp_surface_copy(struct pipe_context *pipe, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - assert( dst->cpp == src->cpp ); void *dst_map = pipe->screen->surface_map( pipe->screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); @@ -55,6 +54,7 @@ sp_surface_copy(struct pipe_context *pipe, src, PIPE_BUFFER_USAGE_CPU_READ ); + assert( dst->cpp == src->cpp ); assert(src_map && dst_map); pipe_copy_rect(dst_map, diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 592c3c87c26..1e4b98edb48 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -39,6 +39,39 @@ extern "C" { #endif +/* XXX: these are a kludge. will fix when all surfaces are views into + * textures, and free-floating winsys surfaces go away. + */ +static INLINE void * +pipe_surface_map( struct pipe_surface *surf, unsigned flags ) +{ + if (surf->texture) { + struct pipe_screen *screen = surf->texture->screen; + return surf->texture->screen->surface_map( screen, surf, flags ); + } + else { + struct pipe_winsys *winsys = surf->winsys; + char *map = (char *)winsys->buffer_map( winsys, surf->buffer, flags ); + if (map == NULL) + return NULL; + return (void *)(map + surf->offset); + } +} + +static INLINE void +pipe_surface_unmap( struct pipe_surface *surf ) +{ + if (surf->texture) { + struct pipe_screen *screen = surf->texture->screen; + surf->texture->screen->surface_unmap( screen, surf ); + } + else { + struct pipe_winsys *winsys = surf->winsys; + winsys->buffer_unmap( winsys, surf->buffer ); + } +} + + /** * Set 'ptr' to point to 'surf' and update reference counting. -- cgit v1.2.3 From cc2af38f2afa0e6003c8338d51c4f5fbabde40e1 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 2 May 2008 09:26:17 -0600 Subject: gallium: fix typos, comments, whitespace --- src/gallium/drivers/softpipe/sp_tile_cache.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index a88aad5d097..1117c0ad4c7 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -131,7 +131,7 @@ sp_destroy_tile_cache(struct softpipe_tile_cache *tc) uint pos; for (pos = 0; pos < NUM_ENTRIES; pos++) { - //assert(tc->entries[pos].x < 0); + /*assert(tc->entries[pos].x < 0);*/ } if (tc->surface) { pipe_surface_reference(&tc->surface, NULL); @@ -332,8 +332,8 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, for (x = 0; x < w; x += TILE_SIZE) { if (is_clear_flag_set(tc->clear_flags, x, y)) { pipe_put_tile_raw(pipe, ps, - x, y, TILE_SIZE, TILE_SIZE, - tc->tile.data.color32, 0/*STRIDE*/); + x, y, TILE_SIZE, TILE_SIZE, + tc->tile.data.color32, 0/*STRIDE*/); /* do this? */ clear_clear_flag(tc->clear_flags, x, y); @@ -367,8 +367,8 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, if (tile->x >= 0) { if (tc->depth_stencil) { pipe_put_tile_raw(pipe, ps, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, - tile->data.depth32, 0/*STRIDE*/); + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(pipe, ps, @@ -385,7 +385,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, #endif } else if (tc->texture) { - /* caching a texture, mark all entries as embpy */ + /* caching a texture, mark all entries as empty */ for (pos = 0; pos < NUM_ENTRIES; pos++) { tc->entries[pos].x = -1; } -- cgit v1.2.3 From a73ae3d5eb8419feab5aea26573aa41b72f941eb Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Fri, 2 May 2008 16:46:31 +0100 Subject: gallium: Add texture usage flags, special-case allocation of display targets For many envirionments it's necessary to allocate display targets in a window-system friendly manner. Add facilities so that a driver can tell if a texture is likely to be used to generate a display surface and if use special allocation paths if necessary. Hook up softpipe to call into the winsys->surface_alloc_storage() routine in this case, though we probably want to change that interface slightly also. --- src/gallium/drivers/softpipe/sp_texture.c | 101 ++++++++++++++++--------- src/gallium/drivers/softpipe/sp_texture.h | 2 +- src/gallium/include/pipe/p_state.h | 6 +- src/mesa/state_tracker/st_atom_pixeltransfer.c | 3 +- src/mesa/state_tracker/st_cb_bitmap.c | 6 +- src/mesa/state_tracker/st_cb_drawpixels.c | 6 +- src/mesa/state_tracker/st_cb_fbo.c | 23 ++++-- src/mesa/state_tracker/st_cb_texture.c | 10 ++- src/mesa/state_tracker/st_texture.c | 4 +- src/mesa/state_tracker/st_texture.h | 3 +- 10 files changed, 111 insertions(+), 53 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 2b31cd4f251..599ff2ac458 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -52,40 +52,87 @@ static unsigned minify( unsigned d ) } -static void -softpipe_texture_layout(struct softpipe_texture * spt) +/* Conventional allocation path for non-display textures: + */ +static boolean +softpipe_texture_layout(struct pipe_screen *screen, + struct softpipe_texture * spt) { + struct pipe_winsys *ws = screen->winsys; struct pipe_texture *pt = &spt->base; unsigned level; unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; - spt->buffer_size = 0; + unsigned buffer_size = 0; for (level = 0; level <= pt->last_level; level++) { pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; + spt->pitch[level] = width; - spt->level_offset[level] = spt->buffer_size; + spt->level_offset[level] = buffer_size; - spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - width * pt->cpp; + buffer_size += (((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp); width = minify(width); height = minify(height); depth = minify(depth); } + + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + buffer_size); + + return spt->buffer != NULL; } + +/* Hack it up to use the old winsys->surface_alloc_storage() + * method for now: + */ +static boolean +softpipe_displaytarget_layout(struct pipe_screen *screen, + struct softpipe_texture * spt) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface surf; + unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + + + memset(&surf, 0, sizeof(surf)); + + ws->surface_alloc_storage( ws, + &surf, + spt->base.width[0], + spt->base.height[0], + spt->base.format, + flags); + + /* Now extract the goodies: + */ + spt->buffer = surf.buffer; + spt->pitch[0] = surf.pitch; + + return spt->buffer != NULL; +} + + + + + static struct pipe_texture * softpipe_texture_create(struct pipe_screen *screen, const struct pipe_texture *templat) { - struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); if (!spt) return NULL; @@ -94,19 +141,21 @@ softpipe_texture_create(struct pipe_screen *screen, spt->base.refcount = 1; spt->base.screen = screen; - softpipe_texture_layout(spt); - - spt->buffer = ws->buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); - if (!spt->buffer) { - FREE(spt); - return NULL; + if (spt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if (!softpipe_displaytarget_layout(screen, spt)) + goto fail; } - + else { + if (!softpipe_texture_layout(screen, spt)) + goto fail; + } + assert(spt->base.refcount == 1); - return &spt->base; + + fail: + FREE(spt); + return NULL; } @@ -178,22 +227,6 @@ softpipe_get_tex_surface(struct pipe_screen *screen, assert(face == 0); assert(zslice == 0); } - - if (usage & (PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_GPU_WRITE)) { - /* XXX if writing to the texture, invalidate the texcache entries!!! - * - * Actually, no. Flushing dependent contexts is still done - * explicitly and separately. Hardware drivers won't insert - * FLUSH commands into a command stream at this point, - * neither should softpipe try to flush caches. - * - * Those contexts could be living in separate threads & doing - * all sorts of unrelated stuff... Context<->texture - * dependency tracking needs to happen elsewhere. - */ - /* assert(0); */ - } } return ps; } diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 2ba093320dc..779a9d8fc97 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -42,11 +42,11 @@ struct softpipe_texture struct pipe_texture base; unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; /* The data is held here: */ struct pipe_buffer *buffer; - unsigned long buffer_size; }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 277ee4b3190..d7565dff960 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -284,6 +284,10 @@ struct pipe_surface }; +#define PIPE_TEXTURE_USAGE_RENDER_TARGET 0x1 +#define PIPE_TEXTURE_USAGE_DISPLAY_TARGET 0x2 /* ie a backbuffer */ +#define PIPE_TEXTURE_USAGE_SAMPLER 0x4 + /** * Texture object. */ @@ -300,7 +304,7 @@ struct pipe_texture unsigned last_level:8; /**< Index of last mipmap level present/defined */ unsigned compressed:1; - unsigned usage; + unsigned tex_usage; /* PIPE_TEXTURE_USAGE_* */ /* These are also refcounted: */ diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 0c32d53c4ab..e500ac8684d 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -126,7 +126,8 @@ create_color_map_texture(GLcontext *ctx) /* create texture for color map/table */ pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 0); + texSize, texSize, 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); return pt; } diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 873b765c2c2..f816e591042 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -321,7 +321,8 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, * Create texture to hold bitmap pattern. */ pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, ctx->st->bitmap.tex_format, - 0, width, height, 1, 0); + 0, width, height, 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); if (!pt) { _mesa_unmap_bitmap_pbo(ctx, unpack); return NULL; @@ -539,7 +540,8 @@ reset_cache(struct st_context *st) cache->texture = st_texture_create(st, PIPE_TEXTURE_2D, st->bitmap.tex_format, 0, BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, - 1, 0); + 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); /* Map the texture surface. * Subsequent glBitmap calls will write into the texture image. diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 9ae53c95f86..8c775ad8864 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -346,7 +346,8 @@ make_texture(struct st_context *st, return NULL; pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, 0, width, height, - 1, 0); + 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); if (!pt) { _mesa_unmap_drawpix_pbo(ctx, unpack); return NULL; @@ -994,7 +995,8 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, } pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, texFormat, 0, - width, height, 1, 0); + width, height, 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); if (!pt) return; diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index b1747141710..21d61e21638 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -90,8 +90,8 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, { struct pipe_context *pipe = ctx->st->pipe; struct st_renderbuffer *strb = st_renderbuffer(rb); - struct pipe_texture template, *texture; + unsigned surface_usage; /* Free the old surface (and texture if we hold the last * reference): @@ -117,10 +117,15 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, template.height[0] = height; template.depth[0] = 1; template.last_level = 0; - template.usage = (PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE | - PIPE_BUFFER_USAGE_GPU_READ); + template.tex_usage = (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_RENDER_TARGET); + + /* Probably need dedicated flags for surface usage too: + */ + surface_usage = (PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); texture = pipe->screen->texture_create( pipe->screen, &template ); @@ -137,11 +142,13 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, * to tell the driver to go ahead and allocate the buffer, even * if HW doesn't support the format. */ - template.usage = (PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE); + template.tex_usage = 0; + surface_usage = (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); texture = pipe->screen->texture_create( pipe->screen, &template ); + } if (!texture) @@ -150,7 +157,7 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, strb->surface = pipe->screen->get_tex_surface( pipe->screen, texture, 0, 0, 0, - template.usage ); + surface_usage ); pipe_texture_reference( &texture, NULL ); diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 4cca3364c15..06caa06e776 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -333,7 +333,9 @@ guess_and_alloc_texture(struct st_context *st, width, height, depth, - comp_byte); + comp_byte, + ( PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_SAMPLER )); DBG("%s - success\n", __FUNCTION__); } @@ -1501,7 +1503,11 @@ st_finalize_texture(GLcontext *ctx, firstImage->base.Width2, firstImage->base.Height2, firstImage->base.Depth2, - comp_byte); + comp_byte, + + ( PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_SAMPLER )); + if (!stObj->pt) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); return GL_FALSE; diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index d6268fc80ca..2b3742d4e5b 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -75,7 +75,8 @@ st_texture_create(struct st_context *st, GLuint width0, GLuint height0, GLuint depth0, - GLuint compress_byte) + GLuint compress_byte, + GLuint usage ) { struct pipe_texture pt, *newtex; struct pipe_screen *screen = st->pipe->screen; @@ -98,6 +99,7 @@ st_texture_create(struct st_context *st, pt.depth[0] = depth0; pt.compressed = compress_byte ? 1 : 0; pt.cpp = pt.compressed ? compress_byte : st_sizeof_format(format); + pt.tex_usage = usage; newtex = screen->texture_create(screen, &pt); diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index f6d5733e210..6a9f08ec6b7 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -105,7 +105,8 @@ st_texture_create(struct st_context *st, GLuint width0, GLuint height0, GLuint depth0, - GLuint compress_byte); + GLuint compress_byte, + GLuint tex_usage ); /* Check if an image fits into an existing texture object. -- cgit v1.2.3 From 5cb29dae06a4d97dc40ac7573e7ae7211e329b3c Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Fri, 2 May 2008 16:56:06 +0100 Subject: i915: update to new display target allocation --- src/gallium/drivers/i915simple/i915_texture.c | 91 ++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 7b9359a0fea..f668e2e7d7a 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -105,6 +105,50 @@ i915_miptree_set_image_offset(struct i915_texture *tex, } +/* Hack it up to use the old winsys->surface_alloc_storage() + * method for now: + */ +static boolean +i915_displaytarget_layout(struct pipe_screen *screen, + struct i915_texture *tex) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface surf; + unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + + + memset(&surf, 0, sizeof(surf)); + + ws->surface_alloc_storage( ws, + &surf, + tex->base.width[0], + tex->base.height[0], + tex->base.format, + flags); + + /* Now extract the goodies: + */ + i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); + i915_miptree_set_level_info( tex, 0, 0, 0, 0, + tex->base.width[0], + tex->base.height[0], + 1 ); + + tex->buffer = surf.buffer; + tex->pitch = surf.pitch; + tex->total_height = 0; + + + return tex->buffer != NULL; +} + + + + + static void i945_miptree_layout_2d( struct i915_texture *tex ) { @@ -483,30 +527,45 @@ static struct pipe_texture * i915_texture_create_screen(struct pipe_screen *screen, const struct pipe_texture *templat) { + struct i915_screen *i915screen = i915_screen(screen); + struct pipe_winsys *ws = screen->winsys; struct i915_texture *tex = CALLOC_STRUCT(i915_texture); - if (tex) { - struct i915_screen *i915screen = i915_screen(screen); - struct pipe_winsys *ws = screen->winsys; - - tex->base = *templat; - tex->base.refcount = 1; - tex->base.screen = screen; + if (!tex) + return NULL; - if (i915screen->is_i945 ? i945_miptree_layout(tex) : - i915_miptree_layout(tex)) - tex->buffer = ws->buffer_create(ws, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + tex->base = *templat; + tex->base.refcount = 1; + tex->base.screen = screen; - if (!tex->buffer) { - FREE(tex); - return NULL; + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if (!i915_displaytarget_layout(screen, tex)) + goto fail; + } + else { + if (i915screen->is_i945) { + if (!i945_miptree_layout(tex)) + goto fail; } + else { + if (!i915_miptree_layout(tex)) + goto fail; + } + + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); + + if (!tex->buffer) + goto fail; } return &tex->base; + + fail: + FREE(tex); + return NULL; } -- cgit v1.2.3 From 3d53d38d5e35386de4793162b9dd32e171927059 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 2 May 2008 10:37:20 -0600 Subject: gallium: new debug code (disabled) --- src/gallium/drivers/softpipe/sp_tex_sample.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 5b63f979977..be0b57d9fa6 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1051,5 +1051,19 @@ sp_get_samples(struct tgsi_sampler *sampler, default: assert(0); } + +#if 0 /* DEBUG */ + { + int i; + printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]); + for (i = 0; i < 4; i++) { + printf("Frag %d: %f %f %f %f\n", i, + rgba[0][i], + rgba[1][i], + rgba[2][i], + rgba[3][i]); + } + } +#endif } -- cgit v1.2.3 From 7849ccb2a7dba3b9d751acaac9dd9aec3abe3b59 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Fri, 2 May 2008 17:55:22 +0100 Subject: brw: remove dead code --- src/gallium/drivers/i965simple/brw_tex_layout.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index ba4c4a7bcf5..78ae0b1223e 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -357,14 +357,6 @@ brw_texture_release_screen(struct pipe_screen *screen, } -static void -brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, - uint face, uint levelsMask) -{ - /* no-op? */ -} - - static struct pipe_surface * brw_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, -- cgit v1.2.3 From 612f44266cba78c4e5677a2f992581fdaa17f4e4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 5 May 2008 19:45:21 +1000 Subject: nouveau: bitmap texcoord bias has been removed --- src/gallium/drivers/nv10/nv10_screen.c | 2 -- src/gallium/drivers/nv30/nv30_screen.c | 2 -- src/gallium/drivers/nv40/nv40_screen.c | 2 -- src/gallium/drivers/nv50/nv50_screen.c | 2 -- 4 files changed, 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 45fbde62ea2..cf6b2fac84e 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -73,8 +73,6 @@ nv10_screen_get_paramf(struct pipe_screen *screen, int param) return 2.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 4.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0.0; diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index ce6c9ec523a..1de45079043 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -77,8 +77,6 @@ nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) return 16.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 4.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0.0; diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index c64c3b1c39d..7f68539a85c 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -82,8 +82,6 @@ nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) return 16.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 16.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0.0; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index adb724b9b7a..4902f16de3a 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -86,8 +86,6 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) return 16.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 4.0; - case PIPE_CAP_BITMAP_TEXCOORD_BIAS: - return 0.0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0.0; -- cgit v1.2.3 From c7ad942c54c3892a98d248a15af817f256260e75 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Wed, 7 May 2008 14:17:59 +0900 Subject: gallium: Propagate tex_usage flags down to winsys. --- src/gallium/drivers/i915simple/i915_texture.c | 3 ++- src/gallium/drivers/softpipe/sp_texture.c | 3 ++- src/gallium/include/pipe/p_winsys.h | 3 ++- src/gallium/winsys/dri/intel/intel_winsys_pipe.c | 3 ++- src/gallium/winsys/xlib/xm_winsys.c | 3 ++- src/gallium/winsys/xlib/xm_winsys_aub.c | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index f668e2e7d7a..3e23e540f9d 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -127,7 +127,8 @@ i915_displaytarget_layout(struct pipe_screen *screen, tex->base.width[0], tex->base.height[0], tex->base.format, - flags); + flags, + tex->base.tex_usage); /* Now extract the goodies: */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 599ff2ac458..1d7a1fffe4d 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -115,7 +115,8 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, spt->base.width[0], spt->base.height[0], spt->base.format, - flags); + flags, + spt->base.tex_usage); /* Now extract the goodies: */ diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 87a66b66d75..7ebc2851928 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -86,7 +86,8 @@ struct pipe_winsys struct pipe_surface *surf, unsigned width, unsigned height, enum pipe_format format, - unsigned flags); + unsigned flags, + unsigned tex_usage); void (*surface_release)(struct pipe_winsys *ws, struct pipe_surface **s); diff --git a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c index 77dec9488df..d15143acfd4 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c @@ -206,7 +206,8 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, struct pipe_surface *surf, unsigned width, unsigned height, enum pipe_format format, - unsigned flags) + unsigned flags, + unsigned tex_usage) { const unsigned alignment = 64; int ret; diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index a70752428af..b14758f3336 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -489,7 +489,8 @@ xm_surface_alloc_storage(struct pipe_winsys *winsys, struct pipe_surface *surf, unsigned width, unsigned height, enum pipe_format format, - unsigned flags) + unsigned flags, + unsigned tex_usage) { const unsigned alignment = 64; diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index f42f7fcc5f1..77376099f0d 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -276,7 +276,8 @@ aub_i915_surface_alloc_storage(struct pipe_winsys *winsys, struct pipe_surface *surf, unsigned width, unsigned height, enum pipe_format format, - unsigned flags) + unsigned flags, + unsigned tex_usage) { const unsigned alignment = 64; -- cgit v1.2.3 From 9002cdb48e65c063ea00e1cb4917d432b22ae0ad Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 8 May 2008 22:07:52 +0100 Subject: softpipe: don't calc det if NO_RAST set --- src/gallium/drivers/softpipe/sp_setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index df7be01fcd5..5370d85275f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -717,7 +717,7 @@ void setup_tri( struct setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { - float det = calc_det(v0, v1, v2); + float det; #if DEBUG_VERTS debug_printf("Setup triangle:\n"); @@ -728,7 +728,8 @@ void setup_tri( struct setup_context *setup, if (setup->softpipe->no_rast) return; - + + det = calc_det(v0, v1, v2); /* debug_printf("%s\n", __FUNCTION__ ); */ -- cgit v1.2.3 From 140b3f7f9cc682809170d7c311f89e0477dba5aa Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Sat, 10 May 2008 12:16:19 -0600 Subject: gallium: remove unused code --- src/gallium/drivers/softpipe/sp_quad_fs.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 625d0f9b489..8c88c192f8f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -53,7 +53,6 @@ struct quad_shade_stage struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; struct tgsi_exec_machine machine; struct tgsi_exec_vector *inputs, *outputs; - int colorOutSlot, depthOutSlot; }; @@ -156,20 +155,6 @@ static void shade_begin(struct quad_stage *qs) qss->samplers[i].texture = softpipe->texture[i]; } - /* find output slots for depth, color */ - qss->colorOutSlot = -1; - qss->depthOutSlot = -1; - for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { - switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - qss->depthOutSlot = i; - break; - case TGSI_SEMANTIC_COLOR: - qss->colorOutSlot = i; - break; - } - } - softpipe->fs->prepare( softpipe->fs, &qss->machine, qss->samplers ); -- cgit v1.2.3 From 6807b4f6b1fa6ef0412714622ff16fe9d1487a8e Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Sat, 10 May 2008 12:46:00 -0600 Subject: gallium: optimize the flush_spans() function --- src/gallium/drivers/softpipe/sp_setup.c | 81 ++++++++++++++++----------------- 1 file changed, 40 insertions(+), 41 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 5370d85275f..543d86a5cb9 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -208,78 +208,77 @@ static INLINE int block( int x ) } -/** - * Compute mask which indicates which pixels in the 2x2 quad are actually inside - * the triangle's bounds. - * - * this is pretty nasty... may need to rework flush_spans again to - * fix it, if possible. - */ -static unsigned calculate_mask( struct setup_context *setup, int x ) -{ - unsigned mask = 0x0; - - if (x >= setup->span.left[0] && x < setup->span.right[0]) - mask |= MASK_TOP_LEFT; - - if (x >= setup->span.left[1] && x < setup->span.right[1]) - mask |= MASK_BOTTOM_LEFT; - - if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) - mask |= MASK_TOP_RIGHT; - - if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) - mask |= MASK_BOTTOM_RIGHT; - - return mask; -} - - /** * Render a horizontal span of quads */ static void flush_spans( struct setup_context *setup ) { + const int xleft0 = setup->span.left[0]; + const int xleft1 = setup->span.left[1]; + const int xright0 = setup->span.right[0]; + const int xright1 = setup->span.right[1]; int minleft, maxright; int x; switch (setup->span.y_flags) { case 0x3: /* both odd and even lines written (both quad rows) */ - minleft = MIN2(setup->span.left[0], setup->span.left[1]); - maxright = MAX2(setup->span.right[0], setup->span.right[1]); + minleft = block(MIN2(xleft0, xleft1)); + maxright = block(MAX2(xright0, xright1)); + for (x = minleft; x <= maxright; x += 2) { + /* determine which of the four pixels is inside the span bounds */ + uint mask = 0x0; + if (x >= xleft0 && x < xright0) + mask |= MASK_TOP_LEFT; + if (x >= xleft1 && x < xright1) + mask |= MASK_BOTTOM_LEFT; + if (x+1 >= xleft0 && x+1 < xright0) + mask |= MASK_TOP_RIGHT; + if (x+1 >= xleft1 && x+1 < xright1) + mask |= MASK_BOTTOM_RIGHT; + emit_quad( setup, x, setup->span.y, mask ); + } break; case 0x1: /* only even line written (quad top row) */ - minleft = setup->span.left[0]; - maxright = setup->span.right[0]; + minleft = block(xleft0); + maxright = block(xright0); + for (x = minleft; x <= maxright; x += 2) { + uint mask = 0x0; + if (x >= xleft0 && x < xright0) + mask |= MASK_TOP_LEFT; + if (x+1 >= xleft0 && x+1 < xright0) + mask |= MASK_TOP_RIGHT; + emit_quad( setup, x, setup->span.y, mask ); + } break; case 0x2: /* only odd line written (quad bottom row) */ - minleft = setup->span.left[1]; - maxright = setup->span.right[1]; + minleft = block(xleft1); + maxright = block(xright1); + for (x = minleft; x <= maxright; x += 2) { + uint mask = 0x0; + if (x >= xleft1 && x < xright1) + mask |= MASK_BOTTOM_LEFT; + if (x+1 >= xleft1 && x+1 < xright1) + mask |= MASK_BOTTOM_RIGHT; + emit_quad( setup, x, setup->span.y, mask ); + } break; default: return; } - /* XXX this loop could be moved into the above switch cases and - * calculate_mask() could be simplified a bit... - */ - for (x = block(minleft); x <= block(maxright); x += 2) { - emit_quad( setup, x, setup->span.y, - calculate_mask( setup, x ) ); - } - setup->span.y = 0; setup->span.y_flags = 0; setup->span.right[0] = 0; setup->span.right[1] = 0; } + #if DEBUG_VERTS static void print_vertex(const struct setup_context *setup, const float (*v)[4]) -- cgit v1.2.3 From de818835de70961602bb9ceca86b98e9bbc63fc1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Mon, 12 May 2008 14:10:03 +0100 Subject: softpipe: add failure paths for context creation --- src/gallium/drivers/softpipe/sp_context.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index fe9cd8375e3..a48e5461391 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -88,7 +88,8 @@ static void softpipe_destroy( struct pipe_context *pipe ) struct pipe_winsys *ws = pipe->winsys; uint i; - draw_destroy( softpipe->draw ); + if (softpipe->draw) + draw_destroy( softpipe->draw ); softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); @@ -216,8 +217,12 @@ softpipe_create( struct pipe_screen *screen, * Create drawing context and plug our rendering stage into it. */ softpipe->draw = draw_create(); - assert(softpipe->draw); + if (!softpipe->draw) + goto fail; + softpipe->setup = sp_draw_render_stage(softpipe); + if (!softpipe->setup) + goto fail; if (GETENV( "SP_NO_RAST" ) != NULL) softpipe->no_rast = TRUE; @@ -241,4 +246,8 @@ softpipe_create( struct pipe_screen *screen, sp_init_surface_functions(softpipe); return &softpipe->pipe; + + fail: + softpipe_destroy(&softpipe->pipe); + return NULL; } -- cgit v1.2.3 From 90e86363de7dbcfda3490b5c31d701350a0fa2ef Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Mon, 12 May 2008 16:16:04 +0100 Subject: softpipe: make vbuf handle all primitive types --- src/gallium/drivers/softpipe/sp_prim_setup.c | 15 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 255 +++++++++++++++------------ 2 files changed, 155 insertions(+), 115 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 1cf9ffa632f..941ab62e00c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -64,16 +64,17 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) } +typedef const float (*cptrf4)[4]; static void do_tri(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); - + setup_tri( setup->setup, - prim->v[0]->data, - prim->v[1]->data, - prim->v[2]->data ); + (cptrf4)prim->v[0]->data, + (cptrf4)prim->v[1]->data, + (cptrf4)prim->v[2]->data ); } static void @@ -82,8 +83,8 @@ do_line(struct draw_stage *stage, struct prim_header *prim) struct setup_stage *setup = setup_stage( stage ); setup_line( setup->setup, - prim->v[0]->data, - prim->v[1]->data ); + (cptrf4)prim->v[0]->data, + (cptrf4)prim->v[1]->data ); } static void @@ -92,7 +93,7 @@ do_point(struct draw_stage *stage, struct prim_header *prim) struct setup_stage *setup = setup_stage( stage ); setup_point( setup->setup, - prim->v[0]->data ); + (cptrf4)prim->v[0]->data ); } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e063fe82efc..1399776ff04 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -129,17 +129,22 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) } +static INLINE cptrf4 get_vert( const void *vertex_buffer, + int index, + int stride ) +{ + return (cptrf4)((char *)vertex_buffer + index * stride); +} static void -sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) +sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; - unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); - unsigned i, j; - void *vertex_buffer = cvbr->vertex_buffer; - cptrf4 v[3]; + unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); + unsigned i; + const void *vertex_buffer = cvbr->vertex_buffer; /* XXX: break this dependency - make setup_context live under * softpipe, rename the old "setup" draw stage to something else. @@ -149,40 +154,98 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) switch (cvbr->prim) { - case PIPE_PRIM_TRIANGLES: - for (i = 0; i < nr_indices; i += 3) { - for (j = 0; j < 3; j++) - v[j] = (cptrf4)((char *)vertex_buffer + - indices[i+j] * vertex_size); - - setup_tri( setup_ctx, - v[0], - v[1], - v[2]); + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup_point( setup_ctx, + get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_LINES: - for (i = 0; i < nr_indices; i += 2) { - for (j = 0; j < 2; j++) - v[j] = (cptrf4)((char *)vertex_buffer + - indices[i+j] * vertex_size); + for (i = 1; i < nr; i += 2) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { setup_line( setup_ctx, - v[0], - v[1] ); + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; - case PIPE_PRIM_POINTS: - for (i = 0; i < nr_indices; i++) { - v[0] = (cptrf4)((char *)vertex_buffer + - indices[i] * vertex_size); + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + if (nr) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[nr-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; - setup_point( setup_ctx, - v[0] ); + + case PIPE_PRIM_TRIANGLES: + for (i = 2; i < nr; i += 3) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 2; i < nr; i += 3) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i+(i&1)-2], stride), + get_vert(vertex_buffer, indices[i-(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); } break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + for (i = 2; i < nr; i += 3) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + case PIPE_PRIM_QUADS: + for (i = 3; i < nr; i += 4) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 3; i < nr; i += 2) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-2], stride)); + } + break; + default: + assert(0); } /* XXX: why are we calling this??? If we had to call something, it @@ -203,130 +266,106 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; const void *vertex_buffer = cvbr->vertex_buffer; - const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); + const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i; struct setup_context *setup_ctx = sp_draw_setup_context(setup); - cptrf4 v[3]; - -#define VERTEX(I) \ - (cptrf4) ((char *) vertex_buffer + (I) * vertex_size) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - v[0] = VERTEX(i); - setup_point( setup_ctx, v[0] ); + setup_point( setup_ctx, + get_vert(vertex_buffer, i-0, stride) ); } break; + case PIPE_PRIM_LINES: - assert(nr % 2 == 0); - for (i = 0; i < nr; i += 2) { - v[0] = VERTEX(i); - v[1] = VERTEX(i + 1); - setup_line( setup_ctx, v[0], v[1] ); + for (i = 1; i < nr; i += 2) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; + case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < nr; i++) { - v[0] = VERTEX(i - 1); - v[1] = VERTEX(i); - setup_line( setup_ctx, v[0], v[1] ); + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + if (nr) { + setup_line( setup_ctx, + get_vert(vertex_buffer, nr-1, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: - assert(nr % 3 == 0); - for (i = 0; i < nr; i += 3) { - v[0] = VERTEX(i + 0); - v[1] = VERTEX(i + 1); - v[2] = VERTEX(i + 2); + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); } break; + case PIPE_PRIM_TRIANGLE_STRIP: - assert(nr >= 3); - for (i = 2; i < nr; i++) { - v[0] = VERTEX(i - 2); - v[1] = VERTEX(i - 1); - v[2] = VERTEX(i); + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i+(i&1)-2, stride), + get_vert(vertex_buffer, i-(i&1)-1, stride), + get_vert(vertex_buffer, i-0, stride)); } break; + case PIPE_PRIM_TRIANGLE_FAN: - assert(nr >= 3); - for (i = 2; i < nr; i++) { - v[0] = VERTEX(0); - v[1] = VERTEX(i - 1); - v[2] = VERTEX(i); + case PIPE_PRIM_POLYGON: + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); } break; case PIPE_PRIM_QUADS: assert(nr % 4 == 0); - for (i = 0; i < nr; i += 4) { - v[0] = VERTEX(i + 0); - v[1] = VERTEX(i + 1); - v[2] = VERTEX(i + 2); + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride)); - v[0] = VERTEX(i + 0); - v[1] = VERTEX(i + 2); - v[2] = VERTEX(i + 3); setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); } break; case PIPE_PRIM_QUAD_STRIP: assert(nr >= 4); - for (i = 2; i < nr; i += 2) { - v[0] = VERTEX(i - 2); - v[1] = VERTEX(i); - v[2] = VERTEX(i + 1); + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); - v[0] = VERTEX(i - 2); - v[1] = VERTEX(i + 1); - v[2] = VERTEX(i - 1); - setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); - } - break; - case PIPE_PRIM_POLYGON: - /* draw as tri fan */ - for (i = 2; i < nr; i++) { - v[0] = VERTEX(0); - v[1] = VERTEX(i - 1); - v[2] = VERTEX(i); setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-2, stride)); } break; default: - /* XXX finish remaining prim types */ assert(0); } - -#undef VERTEX } -- cgit v1.2.3 From f116a149160d50d43a23b02a3416725d6f895d51 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Mon, 12 May 2008 17:30:05 +0100 Subject: softpipe: more work to get non-reduced primitives working in vbuf --- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 45 ++++++++++++----------------- 1 file changed, 19 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 1399776ff04..e9fae951e0b 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -116,15 +116,8 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) - if (prim == PIPE_PRIM_TRIANGLES || - prim == PIPE_PRIM_LINES || - prim == PIPE_PRIM_POINTS) { - cvbr->prim = prim; - return TRUE; - } - else { - return FALSE; - } + cvbr->prim = prim; + return TRUE; } @@ -201,7 +194,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 2; i < nr; i += 3) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), @@ -211,7 +204,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_FAN: case PIPE_PRIM_POLYGON: - for (i = 2; i < nr; i += 3) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), @@ -223,10 +216,10 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride)); + get_vert(vertex_buffer, indices[i-0], stride)); setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride)); } @@ -234,14 +227,14 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUAD_STRIP: for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride)); setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-2], stride)); + get_vert(vertex_buffer, indices[i-0], stride)); } break; default: @@ -265,11 +258,13 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; - const void *vertex_buffer = cvbr->vertex_buffer; + const void *vertex_buffer = NULL; const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i; struct setup_context *setup_ctx = sp_draw_setup_context(setup); + vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride); + switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { @@ -318,7 +313,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 2; i < nr; i += 3) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), @@ -328,7 +323,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_FAN: case PIPE_PRIM_POLYGON: - for (i = 2; i < nr; i += 3) { + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), @@ -336,31 +331,29 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } break; case PIPE_PRIM_QUADS: - assert(nr % 4 == 0); for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride)); + get_vert(vertex_buffer, i-0, stride)); setup_tri( setup_ctx, - get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride)); } break; case PIPE_PRIM_QUAD_STRIP: - assert(nr >= 4); for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, + get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride)); setup_tri( setup_ctx, + get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-2, stride)); + get_vert(vertex_buffer, i-0, stride)); } break; default: -- cgit v1.2.3 From 32ed02bcfbe7a2132929658b1a73708ab16af006 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 5 May 2008 19:49:06 +1000 Subject: nv50: report some supported formats to keep the state tracker from asserting. --- src/gallium/drivers/nv50/nv50_screen.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 4902f16de3a..e4ca72c7171 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -14,6 +14,30 @@ static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, uint type) { + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_I8_UNORM: + return TRUE; + default: + break; + } + break; + default: + assert(0); + } + return FALSE; } -- cgit v1.2.3 From 9b0054c7f87e3cc89fc0e60408af41f3e86dfdff Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 5 May 2008 19:52:19 +1000 Subject: nv50: slightly less skeletal texture funcs, prevents fun segfaults --- src/gallium/drivers/nv50/nv50_miptree.c | 62 ++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 7474c657650..58584934b18 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -1,29 +1,83 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/p_screen.h" +#include "pipe/p_inlines.h" #include "nv50_context.h" +struct nv50_miptree { + struct pipe_texture base; + struct pipe_buffer *buffer; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_texture *pt) +{ + return (struct nv50_miptree *)pt; +} + static struct pipe_texture * nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { + struct pipe_winsys *ws = pscreen->winsys; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + NOUVEAU_ERR("unimplemented\n"); - return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + 512*32*4); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; } static void -nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) +nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) { + struct pipe_winsys *ws = pscreen->winsys; + struct pipe_texture *pt = *ppt; + NOUVEAU_ERR("unimplemented\n"); + + *ppt = NULL; + if (--pt->refcount <= 0) { + struct nv50_miptree *mt = nv50_miptree(pt); + + pipe_buffer_reference(ws, &mt->buffer, NULL); + FREE(mt); + } } static struct pipe_surface * nv50_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) { + struct pipe_winsys *ws = pscreen->winsys; + struct nv50_miptree *mt = nv50_miptree(pt); + struct pipe_surface *ps; + NOUVEAU_ERR("unimplemented\n"); - return NULL; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + + pipe_buffer_reference(ws, &ps->buffer, mt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = 0; + + return ps; } void -- cgit v1.2.3 From c962ad7cd5dbea12d13997b421a44b16af3c6662 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 13 May 2008 12:06:32 +1000 Subject: nouveau: create objnull during channel creation --- src/gallium/drivers/nouveau/nouveau_channel.h | 1 + src/gallium/winsys/dri/nouveau/nouveau_channel.c | 8 ++++++++ src/gallium/winsys/dri/nouveau/nouveau_context.c | 7 ------- src/gallium/winsys/dri/nouveau/nouveau_context.h | 1 - src/gallium/winsys/dri/nouveau/nouveau_grobj.c | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_channel.h b/src/gallium/drivers/nouveau/nouveau_channel.h index b99de9add86..cd99a676bdc 100644 --- a/src/gallium/drivers/nouveau/nouveau_channel.h +++ b/src/gallium/drivers/nouveau/nouveau_channel.h @@ -29,6 +29,7 @@ struct nouveau_channel { struct nouveau_pushbuf *pushbuf; + struct nouveau_grobj *nullobj; struct nouveau_grobj *vram; struct nouveau_grobj *gart; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_channel.c b/src/gallium/winsys/dri/nouveau/nouveau_channel.c index df80d04add5..3b4dcd1ecf2 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_channel.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_channel.c @@ -85,6 +85,13 @@ nouveau_channel_alloc(struct nouveau_device *dev, uint32_t fb_ctxdma, return ret; } + ret = nouveau_grobj_alloc(&nvchan->base, 0x00000000, 0x0030, + &nvchan->base.nullobj); + if (ret) { + nouveau_channel_free((void *)&nvchan); + return ret; + } + nouveau_dma_channel_init(&nvchan->base); nouveau_pushbuf_init(&nvchan->base); @@ -109,6 +116,7 @@ nouveau_channel_free(struct nouveau_channel **chan) nouveau_grobj_free(&nvchan->base.vram); nouveau_grobj_free(&nvchan->base.gart); + nouveau_grobj_free(&nvchan->base.nullobj); cf.channel = nvchan->drm.channel; drmCommandWrite(nvdev->fd, DRM_NOUVEAU_CHANNEL_FREE, &cf, sizeof(cf)); diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index e65b057335c..d9fc3f6ce1f 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -26,7 +26,6 @@ int __nouveau_debug = 0; static void nouveau_channel_context_destroy(struct nouveau_channel_context *nvc) { - nouveau_grobj_free(&nvc->NvNull); nouveau_grobj_free(&nvc->NvCtxSurf2D); nouveau_grobj_free(&nvc->NvImageBlit); nouveau_grobj_free(&nvc->NvGdiRect); @@ -59,12 +58,6 @@ nouveau_channel_context_create(struct nouveau_device *dev) return NULL; } - if ((ret = nouveau_grobj_alloc(nvc->channel, 0x00000000, 0x30, - &nvc->NvNull))) { - NOUVEAU_ERR("Error creating NULL object: %d\n", ret); - nouveau_channel_context_destroy(nvc); - return NULL; - } nvc->next_handle = 0x80000000; if ((ret = nouveau_notifier_alloc(nvc->channel, nvc->next_handle++, 1, diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.h b/src/gallium/winsys/dri/nouveau/nouveau_context.h index 9872d6b6914..b20107a94c6 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.h @@ -26,7 +26,6 @@ struct nouveau_channel_context { struct nouveau_notifier *sync_notifier; /* Common */ - struct nouveau_grobj *NvNull; struct nouveau_grobj *NvM2MF; /* NV04-NV40 */ struct nouveau_grobj *NvCtxSurf2D; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_grobj.c b/src/gallium/winsys/dri/nouveau/nouveau_grobj.c index 55dfeb99aa7..51523897d58 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_grobj.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_grobj.c @@ -50,7 +50,7 @@ nouveau_grobj_alloc(struct nouveau_channel *chan, uint32_t handle, ret = drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GROBJ_ALLOC, &g, sizeof(g)); if (ret) { - nouveau_grobj_free((void *)&grobj); + nouveau_grobj_free((void *)&nvgrobj); return ret; } -- cgit v1.2.3 From 2f80d4d2a705835b272cf2274ea578fe5bbe1919 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 13 May 2008 12:09:04 +1000 Subject: nouveau: make nouveau_device.h part of public API --- src/gallium/drivers/nouveau/nouveau_device.h | 30 ++++++++++++++++++++++ src/gallium/winsys/dri/nouveau/nouveau_context.h | 1 - src/gallium/winsys/dri/nouveau/nouveau_device.h | 30 ---------------------- src/gallium/winsys/dri/nouveau/nouveau_drmif.h | 2 +- src/gallium/winsys/dri/nouveau/nouveau_screen.c | 1 - src/gallium/winsys/dri/nouveau/nouveau_screen.h | 1 - .../winsys/dri/nouveau/nouveau_winsys_pipe.c | 1 - 7 files changed, 31 insertions(+), 35 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nouveau_device.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_device.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_device.h b/src/gallium/drivers/nouveau/nouveau_device.h new file mode 100644 index 00000000000..e25e89fedda --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_device.h @@ -0,0 +1,30 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_DEVICE_H__ +#define __NOUVEAU_DEVICE_H__ + +struct nouveau_device { + unsigned chipset; +}; + +#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.h b/src/gallium/winsys/dri/nouveau/nouveau_context.h index b20107a94c6..77e2147a2c7 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.h @@ -5,7 +5,6 @@ #include "xmlconfig.h" #include "nouveau/nouveau_winsys.h" -#include "nouveau_device.h" #include "nouveau_drmif.h" #include "nouveau_dma.h" diff --git a/src/gallium/winsys/dri/nouveau/nouveau_device.h b/src/gallium/winsys/dri/nouveau/nouveau_device.h deleted file mode 100644 index e25e89fedda..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_device.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_DEVICE_H__ -#define __NOUVEAU_DEVICE_H__ - -struct nouveau_device { - unsigned chipset; -}; - -#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_drmif.h b/src/gallium/winsys/dri/nouveau/nouveau_drmif.h index d5a0e25b573..a31c9a514b1 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_drmif.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_drmif.h @@ -27,7 +27,7 @@ #include <xf86drm.h> #include <nouveau_drm.h> -#include "nouveau_device.h" +#include "nouveau/nouveau_device.h" #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_grobj.h" #include "nouveau/nouveau_notifier.h" diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.c b/src/gallium/winsys/dri/nouveau/nouveau_screen.c index f06e1784831..9041275a88f 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.c @@ -7,7 +7,6 @@ #include "state_tracker/st_cb_fbo.h" #include "nouveau_context.h" -#include "nouveau_device.h" #include "nouveau_drm.h" #include "nouveau_dri.h" #include "nouveau_local.h" diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.h b/src/gallium/winsys/dri/nouveau/nouveau_screen.h index e9da2026904..388d6be9bbc 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.h @@ -2,7 +2,6 @@ #define __NOUVEAU_SCREEN_H__ #include "xmlconfig.h" -#include "nouveau_device.h" struct nouveau_screen { __DRIscreenPrivate *driScrnPriv; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c index 849e38d22b6..755c8b6997a 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c @@ -4,7 +4,6 @@ #include "pipe/p_inlines.h" #include "nouveau_context.h" -#include "nouveau_device.h" #include "nouveau_local.h" #include "nouveau_screen.h" #include "nouveau_swapbuffers.h" -- cgit v1.2.3 From 1ef08564d2a201a422db772a6bb23d1129888304 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 13 May 2008 12:16:35 +1000 Subject: nouveau: remove chipset fields in all nv pipe driver context/screen structs. --- src/gallium/drivers/nouveau/nouveau_winsys.h | 13 +++++-------- src/gallium/drivers/nv10/nv10_context.c | 2 -- src/gallium/drivers/nv10/nv10_context.h | 2 -- src/gallium/drivers/nv10/nv10_screen.c | 8 ++++---- src/gallium/drivers/nv10/nv10_screen.h | 1 - src/gallium/drivers/nv30/nv30_context.c | 2 -- src/gallium/drivers/nv30/nv30_context.h | 2 -- src/gallium/drivers/nv30/nv30_screen.c | 8 ++++---- src/gallium/drivers/nv30/nv30_screen.h | 1 - src/gallium/drivers/nv40/nv40_context.c | 2 -- src/gallium/drivers/nv40/nv40_context.h | 2 -- src/gallium/drivers/nv40/nv40_screen.c | 8 ++++---- src/gallium/drivers/nv40/nv40_screen.h | 1 - src/gallium/drivers/nv50/nv50_screen.c | 8 ++++---- src/gallium/drivers/nv50/nv50_screen.h | 1 - src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 5 ++--- 16 files changed, 23 insertions(+), 43 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index fbde7adc6ad..07a41dcf4a1 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -8,6 +8,7 @@ #include "nouveau/nouveau_bo.h" #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_class.h" +#include "nouveau/nouveau_device.h" #include "nouveau/nouveau_grobj.h" #include "nouveau/nouveau_notifier.h" #include "nouveau/nouveau_resource.h" @@ -56,29 +57,25 @@ struct nouveau_winsys { }; extern struct pipe_screen * -nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, - unsigned chipset); +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); extern struct pipe_context * nv10_create(struct pipe_screen *, unsigned pctx_id); extern struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, - unsigned chipset); +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); extern struct pipe_context * nv30_create(struct pipe_screen *, unsigned pctx_id); extern struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, - unsigned chipset); +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); extern struct pipe_context * nv40_create(struct pipe_screen *, unsigned pctx_id); extern struct pipe_screen * -nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *, - unsigned chipset); +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); extern struct pipe_context * nv50_create(struct pipe_screen *, unsigned pctx_id); diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index a25c082a5d1..79253f8a2b3 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -264,7 +264,6 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) struct nv10_screen *screen = nv10_screen(pscreen); struct pipe_winsys *ws = pscreen->winsys; struct nv10_context *nv10; - unsigned chipset = screen->chipset; struct nouveau_winsys *nvws = screen->nvws; nv10 = CALLOC(1, sizeof(struct nv10_context)); @@ -273,7 +272,6 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) nv10->screen = screen; nv10->pctx_id = pctx_id; - nv10->chipset = chipset; nv10->nvws = nvws; nv10->pipe.winsys = ws; diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 1b794c18724..433d04dc2a8 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -43,8 +43,6 @@ struct nv10_context { struct draw_context *draw; - int chipset; - uint32_t dirty; struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index cf6b2fac84e..fad96058b8e 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -8,9 +8,10 @@ static const char * nv10_screen_get_name(struct pipe_screen *screen) { struct nv10_screen *nv10screen = nv10_screen(screen); + struct nouveau_device *dev = nv10screen->nvws->channel->device; static char buffer[128]; - snprintf(buffer, sizeof(buffer), "NV%02X", nv10screen->chipset); + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); return buffer; } @@ -129,16 +130,15 @@ nv10_screen_destroy(struct pipe_screen *pscreen) } struct pipe_screen * -nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) { struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen); unsigned celsius_class; + unsigned chipset = nvws->channel->device->chipset; int ret; if (!screen) return NULL; - screen->chipset = chipset; screen->nvws = nvws; /* 3D object */ diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h index 4192fe11ef4..3f8750a13f7 100644 --- a/src/gallium/drivers/nv10/nv10_screen.h +++ b/src/gallium/drivers/nv10/nv10_screen.h @@ -7,7 +7,6 @@ struct nv10_screen { struct pipe_screen pipe; struct nouveau_winsys *nvws; - unsigned chipset; /* HW graphics objects */ struct nouveau_grobj *celsius; diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 4b0892971ec..7a2fee78751 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -44,7 +44,6 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) struct nv30_screen *screen = nv30_screen(pscreen); struct pipe_winsys *ws = pscreen->winsys; struct nv30_context *nv30; - unsigned chipset = screen->chipset; struct nouveau_winsys *nvws = screen->nvws; nv30 = CALLOC(1, sizeof(struct nv30_context)); @@ -53,7 +52,6 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) nv30->screen = screen; nv30->pctx_id = pctx_id; - nv30->chipset = chipset; nv30->nvws = nvws; nv30->pipe.winsys = ws; diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 1a016405e62..f49450f82d9 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -85,8 +85,6 @@ struct nv30_context { struct draw_context *draw; - int chipset; - uint32_t dirty; struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 1de45079043..9b0ac55ff88 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -12,9 +12,10 @@ static const char * nv30_screen_get_name(struct pipe_screen *pscreen) { struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; static char buffer[128]; - snprintf(buffer, sizeof(buffer), "NV%02X", screen->chipset); + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); return buffer; } @@ -140,17 +141,16 @@ nv30_screen_destroy(struct pipe_screen *pscreen) } struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) { struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); struct nouveau_stateobj *so; unsigned rankine_class = 0; + unsigned chipset = nvws->channel->device->chipset; int ret, i; if (!screen) return NULL; - screen->chipset = chipset; screen->nvws = nvws; /* 3D object */ diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h index 56f8776a173..816ece94c4a 100644 --- a/src/gallium/drivers/nv30/nv30_screen.h +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -7,7 +7,6 @@ struct nv30_screen { struct pipe_screen pipe; struct nouveau_winsys *nvws; - unsigned chipset; /* HW graphics objects */ struct nouveau_grobj *rankine; diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index f9c93f7a2d7..d9d9accea8d 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -38,7 +38,6 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) struct nv40_screen *screen = nv40_screen(pscreen); struct pipe_winsys *ws = pscreen->winsys; struct nv40_context *nv40; - unsigned chipset = screen->chipset; struct nouveau_winsys *nvws = screen->nvws; nv40 = CALLOC(1, sizeof(struct nv40_context)); @@ -47,7 +46,6 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) nv40->screen = screen; nv40->pctx_id = pctx_id; - nv40->chipset = chipset; nv40->nvws = nvws; nv40->pipe.winsys = ws; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 24e8cd23379..77b3da0ab91 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -111,8 +111,6 @@ struct nv40_context { struct draw_context *draw; - int chipset; - /* HW state derived from pipe states */ struct nv40_state state; struct { diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 7f68539a85c..1bf0931e4d9 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -12,9 +12,10 @@ static const char * nv40_screen_get_name(struct pipe_screen *pscreen) { struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; static char buffer[128]; - snprintf(buffer, sizeof(buffer), "NV%02X", screen->chipset); + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); return buffer; } @@ -149,17 +150,16 @@ nv40_screen_destroy(struct pipe_screen *pscreen) } struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) { struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); struct nouveau_stateobj *so; unsigned curie_class; + unsigned chipset = nvws->channel->device; int ret; if (!screen) return NULL; - screen->chipset = chipset; screen->nvws = nvws; /* 3D object */ diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h index 3ea78aadfd9..c04a1275a00 100644 --- a/src/gallium/drivers/nv40/nv40_screen.h +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -7,7 +7,6 @@ struct nv40_screen { struct pipe_screen pipe; struct nouveau_winsys *nvws; - unsigned chipset; unsigned cur_pctx; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index e4ca72c7171..d069639dc48 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -45,9 +45,10 @@ static const char * nv50_screen_get_name(struct pipe_screen *pscreen) { struct nv50_screen *screen = nv50_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; static char buffer[128]; - snprintf(buffer, sizeof(buffer), "NV%02X", screen->chipset); + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); return buffer; } @@ -123,17 +124,16 @@ nv50_screen_destroy(struct pipe_screen *pscreen) } struct pipe_screen * -nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws, - unsigned chipset) +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) { struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); struct nouveau_stateobj *so; unsigned tesla_class = 0, ret; + unsigned chipset = nvws->channel->device->chipset; int i; if (!screen) return NULL; - screen->chipset = chipset; screen->nvws = nvws; /* 3D object */ diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 6e4120d6693..5a2732e37f4 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -7,7 +7,6 @@ struct nv50_screen { struct pipe_screen pipe; struct nouveau_winsys *nvws; - unsigned chipset; unsigned cur_pctx; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c index 635fd478a94..5eabbc88939 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c @@ -91,8 +91,7 @@ nouveau_pipe_create(struct nouveau_context *nv) struct nouveau_channel_context *nvc = nv->nvc; struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); struct pipe_screen *(*hws_create)(struct pipe_winsys *, - struct nouveau_winsys *, - unsigned chipset); + struct nouveau_winsys *); struct pipe_context *(*hw_create)(struct pipe_screen *, unsigned); struct pipe_winsys *ws; unsigned chipset = nv->nv_screen->device->chipset; @@ -152,7 +151,7 @@ nouveau_pipe_create(struct nouveau_context *nv) ws = nouveau_create_pipe_winsys(nv); if (!nvc->pscreen) - nvc->pscreen = hws_create(ws, nvws, chipset); + nvc->pscreen = hws_create(ws, nvws); nvc->pctx[nv->pctx_id] = hw_create(nvc->pscreen, nv->pctx_id); return nvc->pctx[nv->pctx_id]; } -- cgit v1.2.3 From 9a01ee4424718e0c3015c1f0477cae63ee63d96b Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 14 May 2008 18:35:37 +1000 Subject: nv40: fix typo in one of the previous commits --- src/gallium/drivers/nv40/nv40_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 1bf0931e4d9..0336b84e5f2 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -155,7 +155,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); struct nouveau_stateobj *so; unsigned curie_class; - unsigned chipset = nvws->channel->device; + unsigned chipset = nvws->channel->device->chipset; int ret; if (!screen) -- cgit v1.2.3 From 9671f7ae476cadb46f9f8f9d0363f24aabaf9f87 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Sat, 17 May 2008 10:30:21 -0600 Subject: gallium: in drivers, make copy of tokens passed to pipe->create_vs/fs_state() The caller can then free the token array immediately. --- src/gallium/auxiliary/draw/draw_vs_exec.c | 11 ++-- src/gallium/auxiliary/draw/draw_vs_llvm.c | 7 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 5 +- src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 15 +++++ src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 31 ++++++++- src/gallium/drivers/cell/ppu/cell_state_shader.c | 84 +++++++++++++++--------- src/gallium/drivers/i915simple/i915_state.c | 5 +- src/gallium/drivers/i965simple/brw_state.c | 19 +++--- src/gallium/drivers/softpipe/sp_fs_exec.c | 5 +- src/gallium/drivers/softpipe/sp_state.h | 7 +- src/gallium/drivers/softpipe/sp_state_fs.c | 5 +- 11 files changed, 137 insertions(+), 57 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 54a2b2ab040..7a02f6334b1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -39,6 +39,7 @@ #include "draw_vs.h" #include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_scan.h" struct exec_vertex_shader { @@ -165,21 +166,23 @@ draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *state) { struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader ); - uint nt = tgsi_num_tokens(state->tokens); if (vs == NULL) return NULL; /* we make a private copy of the tokens */ - vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); - tgsi_scan_shader(state->tokens, &vs->base.info); + vs->base.state.tokens = tgsi_dup_tokens(state->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + tgsi_scan_shader(state->tokens, &vs->base.info); vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; vs->machine = &draw->machine; - return &vs->base; } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index dcada665143..be6907ed042 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -138,14 +138,17 @@ draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ) { struct draw_llvm_vertex_shader *vs; - uint nt = tgsi_num_tokens(templ->tokens); vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); if (vs == NULL) return NULL; /* we make a private copy of the tokens */ - vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index a57c938fbf1..5929ea76b23 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -188,7 +188,6 @@ draw_create_vs_sse(struct draw_context *draw, const struct pipe_shader_state *templ) { struct draw_sse_vertex_shader *vs; - uint nt = tgsi_num_tokens(templ->tokens); if (!rtasm_cpu_has_sse2()) return NULL; @@ -198,7 +197,9 @@ draw_create_vs_sse(struct draw_context *draw, return NULL; /* we make a private copy of the tokens */ - vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; tgsi_scan_shader(templ->tokens, &vs->base.info); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c index 5bea7738401..5c0b0bfd61b 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c @@ -330,3 +330,18 @@ tgsi_num_tokens(const struct tgsi_token *tokens) } return 0; } + + +/** + * Make a new copy of a token array. + */ +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens) +{ + unsigned n = tgsi_num_tokens(tokens); + unsigned bytes = n * sizeof(struct tgsi_token); + struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes); + if (new_tokens) + memcpy(new_tokens, tokens, bytes); + return new_tokens; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index 15e76feb7ca..41021010936 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -1,4 +1,31 @@ -#if !defined TGSI_PARSE_H +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PARSE_H #define TGSI_PARSE_H #include "pipe/p_shader_tokens.h" @@ -118,6 +145,8 @@ tgsi_parse_token( unsigned tgsi_num_tokens(const struct tgsi_token *tokens); +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens); #if defined __cplusplus } diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index fb2e940348a..c3a3fbd066d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -30,33 +30,50 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "draw/draw_context.h" -#if 0 -#include "pipe/p_shader_tokens.h" -#include "gallivm/gallivm.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/exec/tgsi_sse2.h" -#endif +#include "tgsi/util/tgsi_parse.h" #include "cell_context.h" #include "cell_state.h" + +/** cast wrapper */ +static INLINE struct cell_fragment_shader_state * +cell_fragment_shader_state(void *shader) +{ + return (struct pipe_shader_state *) shader; +} + + +/** cast wrapper */ +static INLINE struct cell_vertex_shader_state * +cell_vertex_shader_state(void *shader) +{ + return (struct pipe_shader_state *) shader; +} + + + static void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { /*struct cell_context *cell = cell_context(pipe);*/ - struct cell_fragment_shader_state *state; + struct cell_fragment_shader_state *cfs; - state = CALLOC_STRUCT(cell_fragment_shader_state); - if (!state) + cfs = CALLOC_STRUCT(cell_fragment_shader_state); + if (!cfs) return NULL; - state->shader = *templ; + cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (!cfs->shader.tokens) { + FREE(cfs); + return NULL; + } - tgsi_scan_shader(templ->tokens, &state->info); + tgsi_scan_shader(templ->tokens, &cfs->info); - return state; + return cfs; } @@ -65,7 +82,7 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) { struct cell_context *cell = cell_context(pipe); - cell->fs = (struct cell_fragment_shader_state *) fs; + cell->fs = cell_fragment_shader_state(fs); cell->dirty |= CELL_NEW_FS; } @@ -74,10 +91,10 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) static void cell_delete_fs_state(struct pipe_context *pipe, void *fs) { - struct cell_fragment_shader_state *state = - (struct cell_fragment_shader_state *) fs; + struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); - FREE( state ); + FREE((void *) cfs->shader.tokens); + FREE(cfs); } @@ -86,22 +103,28 @@ cell_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *state; + struct cell_vertex_shader_state *cvs; - state = CALLOC_STRUCT(cell_vertex_shader_state); - if (!state) + cvs = CALLOC_STRUCT(cell_vertex_shader_state); + if (!cvs) return NULL; - state->shader = *templ; - tgsi_scan_shader(templ->tokens, &state->info); + cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (!cvs->shader.tokens) { + FREE(cvs); + return NULL; + } - state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader); - if (state->draw_data == NULL) { - FREE( state ); + tgsi_scan_shader(templ->tokens, &cvs->info); + + cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); + if (cvs->draw_data == NULL) { + FREE( (void *) cvs->shader.tokens ); + FREE( cvs ); return NULL; } - return state; + return cvs; } @@ -110,7 +133,7 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) { struct cell_context *cell = cell_context(pipe); - cell->vs = (const struct cell_vertex_shader_state *) vs; + cell->vs = cell_vertex_shader_state(vs); draw_bind_vertex_shader(cell->draw, (cell->vs ? cell->vs->draw_data : NULL)); @@ -123,12 +146,11 @@ static void cell_delete_vs_state(struct pipe_context *pipe, void *vs) { struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); - struct cell_vertex_shader_state *state = - (struct cell_vertex_shader_state *) vs; - - draw_delete_vertex_shader(cell->draw, state->draw_data); - FREE( state ); + draw_delete_vertex_shader(cell->draw, cvs->draw_data); + FREE( (void *) cvs->shader.tokens ); + FREE( cvs ); } diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 3d94b523660..4adeb37e860 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -33,6 +33,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "tgsi/util/tgsi_parse.h" #include "i915_context.h" #include "i915_reg.h" @@ -436,7 +437,7 @@ i915_create_fs_state(struct pipe_context *pipe, if (!ifs) return NULL; - ifs->state = *templ; + ifs->state.tokens = tgsi_dup_tokens(templ->tokens); tgsi_scan_shader(templ->tokens, &ifs->info); @@ -465,6 +466,8 @@ void i915_delete_fs_state(struct pipe_context *pipe, void *shader) FREE(ifs->program); ifs->program_len = 0; + FREE(ifs->state.tokens); + FREE(ifs); } diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 376f1487b29..ac243b7e4f9 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -35,6 +35,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" #include "brw_context.h" #include "brw_defines.h" @@ -182,9 +183,7 @@ static void * brw_create_fs_state(struct pipe_context *pipe, { struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_fp->program = *shader; + brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); brw_fp->id = brw_context(pipe)->program_id++; tgsi_scan_shader(shader->tokens, &brw_fp->info); @@ -210,7 +209,10 @@ static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) { - FREE(shader); + struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; + + FREE((void *) brw_fp->program.tokens); + FREE(brw_fp); } @@ -223,9 +225,7 @@ static void *brw_create_vs_state(struct pipe_context *pipe, { struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_vp->program = *shader; + brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); brw_vp->id = brw_context(pipe)->program_id++; tgsi_scan_shader(shader->tokens, &brw_vp->info); @@ -251,7 +251,10 @@ static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) { - FREE(shader); + struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; + + FREE((void *) brw_vp->program.tokens); + FREE(brw_vp); } diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index d5bd7a702f1..0b199a2193e 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -37,6 +37,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_parse.h" struct sp_exec_fragment_shader { struct sp_fragment_shader base; @@ -116,6 +117,7 @@ exec_run( const struct sp_fragment_shader *base, static void exec_delete( struct sp_fragment_shader *base ) { + FREE((void *) base->shader.tokens); FREE(base); } @@ -137,7 +139,8 @@ softpipe_create_fs_exec(struct softpipe_context *softpipe, if (!shader) return NULL; - shader->base.shader = *templ; + /* we need to keep a local copy of the tokens */ + shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); shader->base.prepare = exec_prepare; shader->base.run = exec_run; shader->base.delete = exec_delete; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 45056502b8e..452e51fa791 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -54,9 +54,11 @@ struct tgsi_sampler; struct tgsi_exec_machine; +struct vertex_info; -/** Subclass of pipe_shader_state (though it doesn't really need to be). +/** + * Subclass of pipe_shader_state (though it doesn't really need to be). * * This is starting to look an awful lot like a quad pipeline stage... */ @@ -80,11 +82,10 @@ struct sp_fragment_shader { void (*delete)( struct sp_fragment_shader * ); }; -struct vertex_info; /** Subclass of pipe_shader_state */ struct sp_vertex_shader { - struct pipe_shader_state shader; + struct pipe_shader_state shader; /* Note: this field not actually used */ struct draw_vertex_shader *draw_data; }; diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 9e77b7e91bc..24b91fbc794 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -102,10 +102,7 @@ softpipe_create_vs_state(struct pipe_context *pipe, return NULL; } - state->shader = *templ; - - state->draw_data = draw_create_vertex_shader(softpipe->draw, - &state->shader); + state->draw_data = draw_create_vertex_shader(softpipe->draw, templ); if (state->draw_data == NULL) { FREE( state ); return NULL; -- cgit v1.2.3 From aafe3ddee276230243ed6767803ce4b6ce2aeec6 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 19 May 2008 15:18:40 +0900 Subject: i915simple: Fix win9x build. --- src/gallium/drivers/i915simple/i915_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 4adeb37e860..e09c2259005 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -466,7 +466,7 @@ void i915_delete_fs_state(struct pipe_context *pipe, void *shader) FREE(ifs->program); ifs->program_len = 0; - FREE(ifs->state.tokens); + FREE((struct tgsi_token *)ifs->state.tokens); FREE(ifs); } -- cgit v1.2.3 From 59007a811de2d76ea00164e8f1cacb4a375d1458 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 19 May 2008 09:34:28 -0600 Subject: if x86_get_func() returns NULL, handle it properly instead of aborting --- src/gallium/auxiliary/draw/draw_vs_sse.c | 3 +++ src/gallium/drivers/softpipe/sp_fs_sse.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 5929ea76b23..e3f4e67472a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -215,6 +215,9 @@ draw_create_vs_sse(struct draw_context *draw, goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); + if (!vs->func) { + goto fail; + } return &vs->base; diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 25fdfea4915..55741cc1df8 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -139,7 +139,11 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, } shader->func = (codegen_function) x86_get_func( &shader->sse2_program ); - assert(shader->func); + if (!shader->func) { + x86_release_func( &shader->sse2_program ); + FREE(shader); + return NULL; + } shader->base.shader = *templ; shader->base.prepare = fs_sse_prepare; -- cgit v1.2.3 From 845db16dbe66c0f741063333a70af79bca5b6544 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 22 May 2008 15:02:54 +0200 Subject: i915: Fix for edgeflags --- src/gallium/drivers/i915simple/i915_state.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 4adeb37e860..964bd7c871c 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -717,10 +717,16 @@ static void i915_set_vertex_elements(struct pipe_context *pipe, } +static void i915_set_edgeflags(struct pipe_context *pipe, + const unsigned *bitfield) +{ + /* TODO do something here */ +} void i915_init_state_functions( struct i915_context *i915 ) { + i915->pipe.set_edgeflags = i915_set_edgeflags; i915->pipe.create_blend_state = i915_create_blend_state; i915->pipe.bind_blend_state = i915_bind_blend_state; i915->pipe.delete_blend_state = i915_delete_blend_state; -- cgit v1.2.3 From f5599a7a3c4a6335ce79fdbd82e18f08bb0ac8e7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 25 May 2008 14:25:15 +1000 Subject: nouveau: remove final PIPE_FORMAT_U_* usage --- src/gallium/drivers/nv10/nv10_fragtex.c | 6 +++--- src/gallium/drivers/nv10/nv10_screen.c | 6 +++--- src/gallium/drivers/nv30/nv30_fragtex.c | 8 ++++---- src/gallium/drivers/nv30/nv30_screen.c | 8 ++++---- src/gallium/drivers/nv40/nv40_fragtex.c | 8 ++++---- src/gallium/drivers/nv40/nv40_screen.c | 8 ++++---- 6 files changed, 22 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index 67e0b4bd45d..238634d0bb4 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -44,9 +44,9 @@ nv10_texture_formats[] = { _(A8R8G8B8_UNORM, A8R8G8B8), _(A1R5G5B5_UNORM, A1R5G5B5), _(A4R4G4B4_UNORM, A4R4G4B4), - _(U_L8 , L8 ), - _(U_A8 , A8 ), - _(U_A8_L8 , A8L8 ), + _(L8_UNORM , L8 ), + _(A8_UNORM , A8 ), + _(A8L8_UNORM , A8L8 ), // _(RGB_DXT1 , DXT1, ), // _(RGBA_DXT1 , DXT1, ), // _(RGBA_DXT3 , DXT3, ), diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index fad96058b8e..5fe3a030811 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -102,9 +102,9 @@ nv10_screen_is_format_supported(struct pipe_screen *screen, case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: return TRUE; default: break; diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 45ee6db8d6a..47f3ff047db 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -50,10 +50,10 @@ nv30_texture_formats[] = { _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), // _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), - _(U_L8 , L8 , S1, S1, S1, ONE, X, X, X, X), - _(U_A8 , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), - _(U_I8 , L8 , S1, S1, S1, S1, X, X, X, X), - _(U_A8_L8 , A8L8 , S1, S1, S1, S1, X, X, X, Y), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), // _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), // _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), // _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00), diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 9b0ac55ff88..bb77776ff15 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -106,10 +106,10 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z24S8_UNORM: return TRUE; diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 980705f94ad..c79ea8becb3 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -24,10 +24,10 @@ nv40_texture_formats[] = { _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), - _(U_L8 , L8 , S1, S1, S1, ONE, X, X, X, X), - _(U_A8 , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), - _(U_I8 , L8 , S1, S1, S1, S1, X, X, X, X), - _(U_A8_L8 , A8L8 , S1, S1, S1, S1, X, X, X, Y), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 0336b84e5f2..51640533938 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -111,10 +111,10 @@ nv40_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_DXT1_RGB: -- cgit v1.2.3 From e0fd3449f824ed8eaec49e83a0f90b7fe47e09a6 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Sun, 25 May 2008 17:26:07 +0200 Subject: i915: Fix for tex-surface merge --- src/gallium/drivers/i915simple/i915_texture.c | 29 +++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 3e23e540f9d..b08385e7db2 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -132,11 +132,11 @@ i915_displaytarget_layout(struct pipe_screen *screen, /* Now extract the goodies: */ - i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); - i915_miptree_set_level_info( tex, 0, 0, 0, 0, + i915_miptree_set_level_info( tex, 0, 1, 0, 0, tex->base.width[0], tex->base.height[0], 1 ); + i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); tex->buffer = surf.buffer; tex->pitch = surf.pitch; @@ -633,6 +633,7 @@ i915_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); + pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; ps->cpp = pt->cpp; @@ -652,6 +653,29 @@ i915_init_texture_functions(struct i915_context *i915) // i915->pipe.texture_update = i915_texture_update; } +static void +i915_tex_surface_release_screen(struct pipe_screen *screen, + struct pipe_surface **surface) +{ + struct pipe_surface *surf = *surface; + + if (--surf->refcount == 0) { + + /* This really should not be possible, but it's actually + * happening quite a bit... Will fix. + */ + if (surf->status == PIPE_SURFACE_STATUS_CLEAR) { + debug_printf("XXX destroying a surface with pending clears...\n"); + assert(0); + } + + pipe_texture_reference(&surf->texture, NULL); + pipe_buffer_reference(screen->winsys, &surf->buffer, NULL); + FREE(surf); + } + + *surface = NULL; +} void i915_init_screen_texture_functions(struct pipe_screen *screen) @@ -659,4 +683,5 @@ i915_init_screen_texture_functions(struct pipe_screen *screen) screen->texture_create = i915_texture_create_screen; screen->texture_release = i915_texture_release_screen; screen->get_tex_surface = i915_get_tex_surface_screen; + screen->tex_surface_release = i915_tex_surface_release_screen; } -- cgit v1.2.3 From 791eee64e03c7323c8a8907f54b09a015c046e2f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Mon, 26 May 2008 11:20:51 +0200 Subject: i915: Removed screen sufixes on texture functions --- src/gallium/drivers/i915simple/i915_texture.c | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index b08385e7db2..df11ba0544c 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -525,8 +525,8 @@ i945_miptree_layout(struct i915_texture * tex) static struct pipe_texture * -i915_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) +i915_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) { struct i915_screen *i915screen = i915_screen(screen); struct pipe_winsys *ws = screen->winsys; @@ -571,8 +571,8 @@ i915_texture_create_screen(struct pipe_screen *screen, static void -i915_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) +i915_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -606,10 +606,10 @@ i915_texture_release_screen(struct pipe_screen *screen, * XXX note: same as code in sp_surface.c */ static struct pipe_surface * -i915_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) +i915_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) { struct i915_texture *tex = (struct i915_texture *)pt; struct pipe_winsys *ws = screen->winsys; @@ -654,8 +654,8 @@ i915_init_texture_functions(struct i915_context *i915) } static void -i915_tex_surface_release_screen(struct pipe_screen *screen, - struct pipe_surface **surface) +i915_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **surface) { struct pipe_surface *surf = *surface; @@ -680,8 +680,8 @@ i915_tex_surface_release_screen(struct pipe_screen *screen, void i915_init_screen_texture_functions(struct pipe_screen *screen) { - screen->texture_create = i915_texture_create_screen; - screen->texture_release = i915_texture_release_screen; - screen->get_tex_surface = i915_get_tex_surface_screen; - screen->tex_surface_release = i915_tex_surface_release_screen; + screen->texture_create = i915_texture_create; + screen->texture_release = i915_texture_release; + screen->get_tex_surface = i915_get_tex_surface; + screen->tex_surface_release = i915_tex_surface_release; } -- cgit v1.2.3 From 253066d716e3039522eeb7b072811cccd89b4a82 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 26 May 2008 23:29:38 +0900 Subject: Revert DOS line endings. --- src/gallium/drivers/i915simple/i915_context.h | 682 ++++++------ src/gallium/drivers/i965simple/brw_context.h | 1368 ++++++++++++------------- src/gallium/drivers/i965simple/brw_state.c | 938 ++++++++--------- 3 files changed, 1494 insertions(+), 1494 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 38e6a348845..53fc5ed0795 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -1,341 +1,341 @@ - /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_CONTEXT_H -#define I915_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "draw/draw_vertex.h" - -#include "tgsi/util/tgsi_scan.h" - - -#define I915_TEX_UNITS 8 - -#define I915_DYNAMIC_MODES4 0 -#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ -#define I915_DYNAMIC_DEPTHSCALE_1 2 -#define I915_DYNAMIC_IAB 3 -#define I915_DYNAMIC_BC_0 4 /* just the header */ -#define I915_DYNAMIC_BC_1 5 -#define I915_DYNAMIC_BFO_0 6 -#define I915_DYNAMIC_BFO_1 7 -#define I915_DYNAMIC_STP_0 8 -#define I915_DYNAMIC_STP_1 9 -#define I915_DYNAMIC_SC_ENA_0 10 -#define I915_DYNAMIC_SC_RECT_0 11 -#define I915_DYNAMIC_SC_RECT_1 12 -#define I915_DYNAMIC_SC_RECT_2 13 -#define I915_MAX_DYNAMIC 14 - - -#define I915_IMMEDIATE_S0 0 -#define I915_IMMEDIATE_S1 1 -#define I915_IMMEDIATE_S2 2 -#define I915_IMMEDIATE_S3 3 -#define I915_IMMEDIATE_S4 4 -#define I915_IMMEDIATE_S5 5 -#define I915_IMMEDIATE_S6 6 -#define I915_IMMEDIATE_S7 7 -#define I915_MAX_IMMEDIATE 8 - -/* These must mach the order of LI0_STATE_* bits, as they will be used - * to generate hardware packets: - */ -#define I915_CACHE_STATIC 0 -#define I915_CACHE_DYNAMIC 1 /* handled specially */ -#define I915_CACHE_SAMPLER 2 -#define I915_CACHE_MAP 3 -#define I915_CACHE_PROGRAM 4 -#define I915_CACHE_CONSTANTS 5 -#define I915_MAX_CACHE 6 - -#define I915_MAX_CONSTANT 32 - - -/** See constant_flags[] below */ -#define I915_CONSTFLAG_USER 0x1f - - -/** - * Subclass of pipe_shader_state - */ -struct i915_fragment_shader -{ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - uint *program; - uint program_len; - - /** - * constants introduced during translation. - * These are placed at the end of the constant buffer and grow toward - * the beginning (eg: slot 31, 30 29, ...) - * User-provided constants start at 0. - * This allows both types of constants to co-exist (until there's too many) - * and doesn't require regenerating/changing the fragment program to - * shuffle constants around. - */ - uint num_constants; - float constants[I915_MAX_CONSTANT][4]; - - /** - * Status of each constant - * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding - * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) - * Else, the bitmask indicates which components are occupied by immediates. - */ - ubyte constant_flags[I915_MAX_CONSTANT]; -}; - - -struct i915_cache_context; - -/* Use to calculate differences between state emitted to hardware and - * current driver-calculated state. - */ -struct i915_state -{ - unsigned immediate[I915_MAX_IMMEDIATE]; - unsigned dynamic[I915_MAX_DYNAMIC]; - - float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; - /** number of constants passed in through a constant buffer */ - uint num_user_constants[PIPE_SHADER_TYPES]; - - /* texture sampler state */ - unsigned sampler[I915_TEX_UNITS][3]; - unsigned sampler_enable_flags; - unsigned sampler_enable_nr; - - /* texture image buffers */ - unsigned texbuffer[I915_TEX_UNITS][2]; - - /** Describes the current hardware vertex layout */ - struct vertex_info vertex_info; - - unsigned id; /* track lost context events */ -}; - -struct i915_blend_state { - unsigned iab; - unsigned modes4; - unsigned LIS5; - unsigned LIS6; -}; - -struct i915_depth_stencil_state { - unsigned stencil_modes4; - unsigned bfo[2]; - unsigned stencil_LIS5; - unsigned depth_LIS6; -}; - -struct i915_rasterizer_state { - int light_twoside : 1; - unsigned st; - enum interp_mode color_interp; - - unsigned LIS4; - unsigned LIS7; - unsigned sc[1]; - - const struct pipe_rasterizer_state *templ; - - union { float f; unsigned u; } ds[2]; -}; - -struct i915_sampler_state { - unsigned state[3]; - const struct pipe_sampler_state *templ; -}; - - -struct i915_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -struct i915_context -{ - struct pipe_context pipe; - struct i915_winsys *winsys; - struct draw_context *draw; - - /* The most recent drawing state as set by the driver: - */ - const struct i915_blend_state *blend; - const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct i915_depth_stencil_state *depth_stencil; - const struct i915_rasterizer_state *rasterizer; - - struct i915_fragment_shader *fs; - - struct pipe_blend_color blend_color; - struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct i915_texture *texture[PIPE_MAX_SAMPLERS]; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - - unsigned dirty; - - unsigned num_samplers; - unsigned num_textures; - unsigned num_vertex_elements; - unsigned num_vertex_buffers; - - unsigned *batch_start; - - /** Vertex buffer */ - struct pipe_buffer *vbo; - - struct i915_state current; - unsigned hardware_dirty; - - unsigned debug; -}; - -/* A flag for each state_tracker state object: - */ -#define I915_NEW_VIEWPORT 0x1 -#define I915_NEW_RASTERIZER 0x2 -#define I915_NEW_FS 0x4 -#define I915_NEW_BLEND 0x8 -#define I915_NEW_CLIP 0x10 -#define I915_NEW_SCISSOR 0x20 -#define I915_NEW_STIPPLE 0x40 -#define I915_NEW_FRAMEBUFFER 0x80 -#define I915_NEW_ALPHA_TEST 0x100 -#define I915_NEW_DEPTH_STENCIL 0x200 -#define I915_NEW_SAMPLER 0x400 -#define I915_NEW_TEXTURE 0x800 -#define I915_NEW_CONSTANTS 0x1000 -#define I915_NEW_VBO 0x2000 -#define I915_NEW_VS 0x4000 - - -/* Driver's internally generated state flags: - */ -#define I915_NEW_VERTEX_FORMAT 0x10000 - - -/* Dirty flags for hardware emit - */ -#define I915_HW_STATIC (1<<I915_CACHE_STATIC) -#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) -#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) -#define I915_HW_MAP (1<<I915_CACHE_MAP) -#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) -#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) -#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) -#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) - - -/*********************************************************************** - * i915_prim_emit.c: - */ -struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); - - -/*********************************************************************** - * i915_prim_vbuf.c: - */ -struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); - - -/*********************************************************************** - * i915_state_emit.c: - */ -void i915_emit_hardware_state(struct i915_context *i915 ); - - - -/*********************************************************************** - * i915_clear.c: - */ -void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue); - - -/*********************************************************************** - * i915_surface.c: - */ -void i915_init_surface_functions( struct i915_context *i915 ); - -void i915_init_state_functions( struct i915_context *i915 ); -void i915_init_flush_functions( struct i915_context *i915 ); -void i915_init_string_functions( struct i915_context *i915 ); - - - - -/*********************************************************************** - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static INLINE struct i915_context * -i915_context( struct pipe_context *pipe ) -{ - return (struct i915_context *)pipe; -} - - - -#endif + /************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "tgsi/util/tgsi_scan.h" + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4 0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2 +#define I915_DYNAMIC_IAB 3 +#define I915_DYNAMIC_BC_0 4 /* just the header */ +#define I915_DYNAMIC_BC_1 5 +#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_1 7 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_DYNAMIC_SC_ENA_0 10 +#define I915_DYNAMIC_SC_RECT_0 11 +#define I915_DYNAMIC_SC_RECT_1 12 +#define I915_DYNAMIC_SC_RECT_2 13 +#define I915_MAX_DYNAMIC 14 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_DYNAMIC 1 /* handled specially */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 + +#define I915_MAX_CONSTANT 32 + + +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; +}; + + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +struct i915_context +{ + struct pipe_context pipe; + struct i915_winsys *winsys; + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + + struct i915_fragment_shader *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + + unsigned *batch_start; + + /** Vertex buffer */ + struct pipe_buffer *vbo; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<<I915_CACHE_STATIC) +#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) +#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) +#define I915_HW_MAP (1<<I915_CACHE_MAP) +#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) +#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) +#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) +#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) + + +/*********************************************************************** + * i915_prim_emit.c: + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_prim_vbuf.c: + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_state_emit.c: + */ +void i915_emit_hardware_state(struct i915_context *i915 ); + + + +/*********************************************************************** + * i915_clear.c: + */ +void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +/*********************************************************************** + * i915_surface.c: + */ +void i915_init_surface_functions( struct i915_context *i915 ); + +void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_flush_functions( struct i915_context *i915 ); +void i915_init_string_functions( struct i915_context *i915 ); + + + + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct i915_context * +i915_context( struct pipe_context *pipe ) +{ + return (struct i915_context *)pipe; +} + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index eeccf367856..8ac6b4e6897 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -1,684 +1,684 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRWCONTEXT_INC -#define BRWCONTEXT_INC - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "tgsi/util/tgsi_scan.h" - -#include "brw_structs.h" -#include "brw_winsys.h" - - -/* Glossary: - * - * URB - uniform resource buffer. A mid-sized buffer which is - * partitioned between the fixed function units and used for passing - * values (vertices, primitives, constants) between them. - * - * CURBE - constant URB entry. An urb region (entry) used to hold - * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. - * - * VUE - vertex URB entry. An urb entry holding a vertex and usually - * a vertex header. The header contains control information and - * things like primitive type, Begin/end flags and clip codes. - * - * PUE - primitive URB entry. An urb entry produced by the setup (SF) - * unit holding rasterization and interpolation parameters. - * - * GRF - general register file. One of several register files - * addressable by programmed threads. The inputs (r0, payload, curbe, - * urb) of the thread are preloaded to this area before the thread is - * spawned. The registers are individually 8 dwords wide and suitable - * for general usage. Registers holding thread input values are not - * special and may be overwritten. - * - * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous - * MRF registers. All program output is via these messages. URB - * entries are populated by sending a message to the shared URB - * function containing the new data, together with a control word, - * often an unmodified copy of R0. - * - * R0 - GRF register 0. Typically holds control information used when - * sending messages to other threads. - * - * EU or GEN4 EU: The name of the programmable subsystem of the - * i965 hardware. Threads are executed by the EU, the registers - * described above are part of the EU architecture. - * - * Fixed function units: - * - * CS - Command streamer. Notional first unit, little software - * interaction. Holds the URB entries used for constant data, ie the - * CURBEs. - * - * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of - * this unit is responsible for pulling vertices out of vertex buffers - * in vram and injecting them into the processing pipe as VUEs. If - * enabled, it first passes them to a VS thread which is a good place - * for the driver to implement any active vertex shader. - * - * GS - Geometry Shader. This corresponds to a new DX10 concept. If - * enabled, incoming strips etc are passed to GS threads in individual - * line/triangle/point units. The GS thread may perform arbitary - * computation and emit whatever primtives with whatever vertices it - * chooses. This makes GS an excellent place to implement GL's - * unfilled polygon modes, though of course it is capable of much - * more. Additionally, GS is used to translate away primitives not - * handled by latter units, including Quads and Lineloops. - * - * CS - Clipper. Mesa's clipping algorithms are imported to run on - * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the - * incoming primitive needs to be passed to a thread for clipping. - * User clip planes are handled via cooperation with the VS thread. - * - * SF - Strips Fans or Setup: Triangles are prepared for - * rasterization. Interpolation coefficients are calculated. - * Flatshading and two-side lighting usually performed here. - * - * WM - Windower. Interpolation of vertex attributes performed here. - * Fragment shader implemented here. SIMD aspects of EU taken full - * advantage of, as pixels are processed in blocks of 16. - * - * CC - Color Calculator. No EU threads associated with this unit. - * Handles blending and (presumably) depth and stencil testing. - */ - -#define BRW_MAX_CURBE (32*16) - -struct brw_context; -struct brw_winsys; - - -/* Raised when we receive new state across the pipe interface: - */ -#define BRW_NEW_VIEWPORT 0x1 -#define BRW_NEW_RASTERIZER 0x2 -#define BRW_NEW_FS 0x4 -#define BRW_NEW_BLEND 0x8 -#define BRW_NEW_CLIP 0x10 -#define BRW_NEW_SCISSOR 0x20 -#define BRW_NEW_STIPPLE 0x40 -#define BRW_NEW_FRAMEBUFFER 0x80 -#define BRW_NEW_ALPHA_TEST 0x100 -#define BRW_NEW_DEPTH_STENCIL 0x200 -#define BRW_NEW_SAMPLER 0x400 -#define BRW_NEW_TEXTURE 0x800 -#define BRW_NEW_CONSTANTS 0x1000 -#define BRW_NEW_VBO 0x2000 -#define BRW_NEW_VS 0x4000 - -/* Raised for other internal events: - */ -#define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_PSP 0x20000 -#define BRW_NEW_CURBE_OFFSETS 0x40000 -#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 -#define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_SCENE 0x200000 -#define BRW_NEW_SF_LINKAGE 0x400000 - -extern int BRW_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 -#define DEBUG_BATCH 0x80000 -#define DEBUG_BUFMGR 0x100000 -#define DEBUG_BLIT 0x200000 -#define DEBUG_REGION 0x400000 -#define DEBUG_MIPTREE 0x800000 - -#define DBG(...) do { \ - if (BRW_DEBUG & FILE_DEBUG_FLAG) \ - debug_printf(__VA_ARGS__); \ -} while(0) - -#define PRINT(...) do { \ - debug_printf(brw->pipe.winsys, __VA_ARGS__); \ -} while(0) - -struct brw_state_flags { - unsigned cache; - unsigned brw; -}; - - -struct brw_vertex_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - int id; -}; - - -struct brw_fragment_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - - boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ - int id; -}; - - -struct pipe_setup_linkage { - struct { - unsigned vp_output:5; - unsigned interp_mode:4; - unsigned bf_vp_output:5; - } fp_input[PIPE_MAX_SHADER_INPUTS]; - - unsigned fp_input_count:5; - unsigned max_vp_output:5; -}; - - - -struct brw_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ - -struct brw_wm_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - - unsigned first_curbe_grf; - unsigned total_grf; - unsigned total_scratch; - - /* Internally generated constants for the CURBE. These are loaded - * ahead of the data from the constant buffer. - */ - const float internal_const[8]; - unsigned nr_internal_consts; - unsigned max_const; - - boolean error; -}; - -struct brw_sf_prog_data { - unsigned urb_read_length; - unsigned total_grf; - - /* Each vertex may have upto 12 attributes, 4 components each, - * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 - * rows. - * - * Actually we use 4 for each, so call it 12 rows. - */ - unsigned urb_entry_size; -}; - -struct brw_clip_prog_data { - unsigned curb_read_length; /* user planes? */ - unsigned clip_mode; - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_gs_prog_data { - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_vs_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - unsigned total_grf; - unsigned outputs_written; - - unsigned inputs_read; - - unsigned max_const; - - float imm_buf[PIPE_MAX_CONSTANT][4]; - unsigned num_imm; - unsigned num_consts; - - /* Used for calculating urb partitions: - */ - unsigned urb_entry_size; -}; - - -#define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 - -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - unsigned surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - struct pipe_buffer *buffer; - - unsigned size; - unsigned offset; /* offset of first free byte */ - - struct brw_context *brw; -}; - -struct brw_cache_item { - unsigned hash; - unsigned key_size; /* for variable-sized keys */ - const void *key; - - unsigned offset; /* offset within pool's buffer */ - unsigned data_size; - - struct brw_cache_item *next; -}; - - - -struct brw_cache { - unsigned id; - - const char *name; - - struct brw_context *brw; - struct brw_mem_pool *pool; - - struct brw_cache_item **items; - unsigned size, n_items; - - unsigned key_size; /* for fixed-size keys */ - unsigned aux_size; - - unsigned last_addr; /* offset of active item */ -}; - - - - -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ -struct brw_tracked_state { - struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -/* Flags for brw->state.cache. - */ -#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) -#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) -#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) -#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) -#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) -#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) -#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) -#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) -#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) -#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) -#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) -#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) -#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) -#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) -#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) -#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) -#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) -#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) - - - - -enum brw_mempool_id { - BRW_GS_POOL, - BRW_SS_POOL, - BRW_MAX_POOL -}; - - -struct brw_cached_batch_item { - struct header *header; - unsigned sz; - struct brw_cached_batch_item *next; -}; - - - -/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life - * be easier if C allowed arrays of packed elements? - */ -#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) - - - - -struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ -}; - - - - - -struct brw_context -{ - struct pipe_context pipe; - struct brw_winsys *winsys; - - unsigned primitive; - unsigned reduced_primitive; - - boolean emit_state_always; - - struct { - struct brw_state_flags dirty; - } state; - - - struct { - const struct pipe_blend_state *Blend; - const struct pipe_depth_stencil_alpha_state *DepthStencil; - const struct pipe_poly_stipple *PolygonStipple; - const struct pipe_rasterizer_state *Raster; - const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; - const struct brw_vertex_program *VertexProgram; - const struct brw_fragment_program *FragmentProgram; - - struct pipe_clip_state Clip; - struct pipe_blend_color BlendColor; - struct pipe_scissor_state Scissor; - struct pipe_viewport_state Viewport; - struct pipe_framebuffer_state FrameBuffer; - - const struct pipe_constant_buffer *Constants[2]; - const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; - } attribs; - - unsigned num_samplers; - unsigned num_textures; - - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; - struct brw_cached_batch_item *cached_batch_items; - - struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; - - struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: - */ - struct brw_vertex_info info; - } vb; - - - unsigned hardware_dirty; - unsigned dirty; - unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: - */ - struct { - unsigned vsize; /* vertex size plus header in urb registers */ - unsigned csize; /* constant buffer size in urb registers */ - unsigned sfsize; /* setup data size in urb registers */ - - boolean constrained; - - unsigned nr_vs_entries; - unsigned nr_gs_entries; - unsigned nr_clip_entries; - unsigned nr_sf_entries; - unsigned nr_cs_entries; - -/* unsigned vs_size; */ -/* unsigned gs_size; */ -/* unsigned clip_size; */ -/* unsigned sf_size; */ -/* unsigned cs_size; */ - - unsigned vs_start; - unsigned gs_start; - unsigned clip_start; - unsigned sf_start; - unsigned cs_start; - } urb; - - - /* BRW_NEW_CURBE_OFFSETS: - */ - struct { - unsigned wm_start; - unsigned wm_size; - unsigned clip_start; - unsigned clip_size; - unsigned vs_start; - unsigned vs_size; - unsigned total_size; - - unsigned gs_offset; - - float *last_buf; - unsigned last_bufsz; - } curbe; - - struct { - struct brw_vs_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } vs; - - struct { - struct brw_gs_prog_data *prog_data; - - boolean prog_active; - unsigned prog_gs_offset; - unsigned state_gs_offset; - } gs; - - struct { - struct brw_clip_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } clip; - - - struct { - struct brw_sf_prog_data *prog_data; - - struct pipe_setup_linkage linkage; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } sf; - - struct { - struct brw_wm_prog_data *prog_data; - -// struct brw_wm_compiler *compile_data; - - - /** - * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER - * cache - */ - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - unsigned render_surf; - unsigned nr_surfaces; - - unsigned max_threads; - struct pipe_buffer *scratch_buffer; - unsigned scratch_buffer_size; - - unsigned sampler_count; - unsigned sampler_gs_offset; - - struct brw_surface_binding_table bind; - unsigned bind_ss_offset; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } wm; - - - struct { - unsigned vp_gs_offset; - unsigned state_gs_offset; - } cc; - - - /* Used to give every program string a unique id - */ - unsigned program_id; -}; - - -#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - - -/*====================================================================== - * brw_vtbl.c - */ -void brw_do_flush( struct brw_context *brw, - unsigned flags ); - - -/*====================================================================== - * brw_state.c - */ -void brw_validate_state(struct brw_context *brw); -void brw_init_state(struct brw_context *brw); -void brw_destroy_state(struct brw_context *brw); - - -/*====================================================================== - * brw_tex.c - */ -void brwUpdateTextureState( struct brw_context *brw ); - - -/* brw_urb.c - */ -void brw_upload_urb_fence(struct brw_context *brw); - -void brw_upload_constant_buffer_state(struct brw_context *brw); - -void brw_init_surface_functions(struct brw_context *brw); -void brw_init_state_functions(struct brw_context *brw); -void brw_init_flush_functions(struct brw_context *brw); -void brw_init_string_functions(struct brw_context *brw); - -/*====================================================================== - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static inline struct brw_context * -brw_context( struct pipe_context *ctx ) -{ - return (struct brw_context *)ctx; -} - -#endif - +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "tgsi/util/tgsi_scan.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT 0x1 +#define BRW_NEW_RASTERIZER 0x2 +#define BRW_NEW_FS 0x4 +#define BRW_NEW_BLEND 0x8 +#define BRW_NEW_CLIP 0x10 +#define BRW_NEW_SCISSOR 0x20 +#define BRW_NEW_STIPPLE 0x40 +#define BRW_NEW_FRAMEBUFFER 0x80 +#define BRW_NEW_ALPHA_TEST 0x100 +#define BRW_NEW_DEPTH_STENCIL 0x200 +#define BRW_NEW_SAMPLER 0x400 +#define BRW_NEW_TEXTURE 0x800 +#define BRW_NEW_CONSTANTS 0x1000 +#define BRW_NEW_VBO 0x2000 +#define BRW_NEW_VS 0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE 0x10000 +#define BRW_NEW_PSP 0x20000 +#define BRW_NEW_CURBE_OFFSETS 0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 +#define BRW_NEW_PRIMITIVE 0x100000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 +#define DEBUG_BATCH 0x80000 +#define DEBUG_BUFMGR 0x100000 +#define DEBUG_BLIT 0x200000 +#define DEBUG_REGION 0x400000 +#define DEBUG_MIPTREE 0x800000 + +#define DBG(...) do { \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + debug_printf(__VA_ARGS__); \ +} while(0) + +#define PRINT(...) do { \ + debug_printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +struct brw_state_flags { + unsigned cache; + unsigned brw; +}; + + +struct brw_vertex_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + int id; +}; + + +struct brw_fragment_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + + boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ + int id; +}; + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + +struct brw_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + + unsigned first_curbe_grf; + unsigned total_grf; + unsigned total_scratch; + + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. + */ + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; +}; + +struct brw_sf_prog_data { + unsigned urb_read_length; + unsigned total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { + unsigned curb_read_length; /* user planes? */ + unsigned clip_mode; + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_gs_prog_data { + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_vs_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + unsigned total_grf; + unsigned outputs_written; + + unsigned inputs_read; + + unsigned max_const; + + float imm_buf[PIPE_MAX_CONSTANT][4]; + unsigned num_imm; + unsigned num_consts; + + /* Used for calculating urb partitions: + */ + unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct pipe_buffer *buffer; + + unsigned size; + unsigned offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + unsigned hash; + unsigned key_size; /* for variable-sized keys */ + const void *key; + + unsigned offset; /* offset within pool's buffer */ + unsigned data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + unsigned id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + unsigned size, n_items; + + unsigned key_size; /* for fixed-size keys */ + unsigned aux_size; + + unsigned last_addr; /* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + + + + +enum brw_mempool_id { + BRW_GS_POOL, + BRW_SS_POOL, + BRW_MAX_POOL +}; + + +struct brw_cached_batch_item { + struct header *header; + unsigned sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) + + + + +struct brw_vertex_info { + unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ +}; + + + + + +struct brw_context +{ + struct pipe_context pipe; + struct brw_winsys *winsys; + + unsigned primitive; + unsigned reduced_primitive; + + boolean emit_state_always; + + struct { + struct brw_state_flags dirty; + } state; + + + struct { + const struct pipe_blend_state *Blend; + const struct pipe_depth_stencil_alpha_state *DepthStencil; + const struct pipe_poly_stipple *PolygonStipple; + const struct pipe_rasterizer_state *Raster; + const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; + const struct brw_vertex_program *VertexProgram; + const struct brw_fragment_program *FragmentProgram; + + struct pipe_clip_state Clip; + struct pipe_blend_color BlendColor; + struct pipe_scissor_state Scissor; + struct pipe_viewport_state Viewport; + struct pipe_framebuffer_state FrameBuffer; + + const struct pipe_constant_buffer *Constants[2]; + const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; + } attribs; + + unsigned num_samplers; + unsigned num_textures; + + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; + + struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + + unsigned hardware_dirty; + unsigned dirty; + unsigned pci_id; + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + unsigned vsize; /* vertex size plus header in urb registers */ + unsigned csize; /* constant buffer size in urb registers */ + unsigned sfsize; /* setup data size in urb registers */ + + boolean constrained; + + unsigned nr_vs_entries; + unsigned nr_gs_entries; + unsigned nr_clip_entries; + unsigned nr_sf_entries; + unsigned nr_cs_entries; + +/* unsigned vs_size; */ +/* unsigned gs_size; */ +/* unsigned clip_size; */ +/* unsigned sf_size; */ +/* unsigned cs_size; */ + + unsigned vs_start; + unsigned gs_start; + unsigned clip_start; + unsigned sf_start; + unsigned cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + unsigned wm_start; + unsigned wm_size; + unsigned clip_start; + unsigned clip_size; + unsigned vs_start; + unsigned vs_size; + unsigned total_size; + + unsigned gs_offset; + + float *last_buf; + unsigned last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + boolean prog_active; + unsigned prog_gs_offset; + unsigned state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct pipe_setup_linkage linkage; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + +// struct brw_wm_compiler *compile_data; + + + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache + */ + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + unsigned render_surf; + unsigned nr_surfaces; + + unsigned max_threads; + struct pipe_buffer *scratch_buffer; + unsigned scratch_buffer_size; + + unsigned sampler_count; + unsigned sampler_gs_offset; + + struct brw_surface_binding_table bind; + unsigned bind_ss_offset; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } wm; + + + struct { + unsigned vp_gs_offset; + unsigned state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, + unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index ac243b7e4f9..caeeba46300 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -1,469 +1,469 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Zack Rusin <zack@tungstengraphics.com> - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "pipe/p_winsys.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_parse.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_draw.h" - - -#define DUP( TYPE, VAL ) \ -do { \ - struct TYPE *x = malloc(sizeof(*x)); \ - memcpy(x, VAL, sizeof(*x) ); \ - return x; \ -} while (0) - -/************************************************************************ - * Blend - */ -static void * -brw_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - DUP( pipe_blend_state, blend ); -} - -static void brw_bind_blend_state(struct pipe_context *pipe, - void *blend) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Blend = (struct pipe_blend_state*)blend; - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - - -static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - free(blend); -} - -static void brw_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.BlendColor = *blend_color; - - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - -/************************************************************************ - * Sampler - */ - -static void * -brw_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - DUP( pipe_sampler_state, sampler ); -} - -static void brw_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) -{ - struct brw_context *brw = brw_context(pipe); - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == brw->num_samplers && - !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) - return; - - memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); - memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * - sizeof(void *)); - - brw->num_samplers = num; - - brw->state.dirty.brw |= BRW_NEW_SAMPLER; -} - -static void brw_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - free(sampler); -} - - -/************************************************************************ - * Depth stencil - */ - -static void * -brw_create_depth_stencil_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - DUP( pipe_depth_stencil_alpha_state, depth_stencil ); -} - -static void brw_bind_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - - brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; -} - -static void brw_delete_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - free(depth_stencil); -} - -/************************************************************************ - * Scissor - */ -static void brw_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct brw_context *brw = brw_context(pipe); - - memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); - brw->state.dirty.brw |= BRW_NEW_SCISSOR; -} - - -/************************************************************************ - * Stipple - */ - -static void brw_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ -} - - -/************************************************************************ - * Fragment shader - */ - -static void * brw_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - - brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); - brw_fp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_fp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_fp->info2); -#endif - - tgsi_dump(shader->tokens, 0); - - - return (void *)brw_fp; -} - -static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; - brw->state.dirty.brw |= BRW_NEW_FS; -} - -static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; - - FREE((void *) brw_fp->program.tokens); - FREE(brw_fp); -} - - -/************************************************************************ - * Vertex shader and other TNL state - */ - -static void *brw_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - - brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); - brw_vp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_vp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_vp->info2); -#endif - tgsi_dump(shader->tokens, 0); - - return (void *)brw_vp; -} - -static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; - brw->state.dirty.brw |= BRW_NEW_VS; - - debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); -} - -static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; - - FREE((void *) brw_vp->program.tokens); - FREE(brw_vp); -} - - -static void brw_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Clip = *clip; -} - - -static void brw_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Viewport = *viewport; /* struct copy */ - brw->state.dirty.brw |= BRW_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - //draw_set_viewport_state(brw->draw, viewport); -} - - -static void brw_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct brw_context *brw = brw_context(pipe); - memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); -} - -static void brw_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements) -{ - /* flush ? */ - struct brw_context *brw = brw_context(pipe); - uint i; - - assert(count <= PIPE_MAX_ATTRIBS); - - for (i = 0; i < count; i++) { - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); - - el.ve0.src_offset = elements[i].src_offset; - el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; - - el.ve1.dst_offset = i * 4; - - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - - switch (elements[i].nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } - - brw->vb.inputs[i] = el; - } -} - - - -/************************************************************************ - * Constant buffers - */ - -static void brw_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct brw_context *brw = brw_context(pipe); - - assert(buf == 0 || index == 0); - - brw->attribs.Constants[shader] = buf; - brw->state.dirty.brw |= BRW_NEW_CONSTANTS; -} - - -/************************************************************************ - * Texture surfaces - */ - - -static void brw_set_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) -{ - struct brw_context *brw = brw_context(pipe); - uint i; - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == brw->num_textures && - !memcmp(brw->attribs.Texture, texture, num * - sizeof(struct pipe_texture *))) - return; - - for (i = 0; i < num; i++) - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], - texture[i]); - - for (i = num; i < brw->num_textures; i++) - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], - NULL); - - brw->num_textures = num; - - brw->state.dirty.brw |= BRW_NEW_TEXTURE; -} - - -/************************************************************************ - * Render targets, etc - */ - -static void brw_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FrameBuffer = *fb; /* struct copy */ - - brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; -} - - - -/************************************************************************ - * Rasterizer state - */ - -static void * -brw_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - DUP(pipe_rasterizer_state, rasterizer); -} - -static void brw_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; - - /* Also pass-through to draw module: - */ - //draw_set_rasterizer_state(brw->draw, setup); - - brw->state.dirty.brw |= BRW_NEW_RASTERIZER; -} - -static void brw_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) -{ - free(setup); -} - - - -void -brw_init_state_functions( struct brw_context *brw ) -{ - brw->pipe.create_blend_state = brw_create_blend_state; - brw->pipe.bind_blend_state = brw_bind_blend_state; - brw->pipe.delete_blend_state = brw_delete_blend_state; - - brw->pipe.create_sampler_state = brw_create_sampler_state; - brw->pipe.bind_sampler_states = brw_bind_sampler_states; - brw->pipe.delete_sampler_state = brw_delete_sampler_state; - - brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; - brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; - brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; - - brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; - brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; - brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; - brw->pipe.create_fs_state = brw_create_fs_state; - brw->pipe.bind_fs_state = brw_bind_fs_state; - brw->pipe.delete_fs_state = brw_delete_fs_state; - brw->pipe.create_vs_state = brw_create_vs_state; - brw->pipe.bind_vs_state = brw_bind_vs_state; - brw->pipe.delete_vs_state = brw_delete_vs_state; - - brw->pipe.set_blend_color = brw_set_blend_color; - brw->pipe.set_clip_state = brw_set_clip_state; - brw->pipe.set_constant_buffer = brw_set_constant_buffer; - brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; - -// brw->pipe.set_feedback_state = brw_set_feedback_state; -// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; - - brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; - brw->pipe.set_scissor_state = brw_set_scissor_state; - brw->pipe.set_sampler_textures = brw_set_sampler_textures; - brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; - brw->pipe.set_vertex_elements = brw_set_vertex_elements; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin <zack@tungstengraphics.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL ) \ +do { \ + struct TYPE *x = malloc(sizeof(*x)); \ + memcpy(x, VAL, sizeof(*x) ); \ + return x; \ +} while (0) + +/************************************************************************ + * Blend + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Blend = (struct pipe_blend_state*)blend; + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.BlendColor = *blend_color; + + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct brw_context *brw = brw_context(pipe); + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_samplers && + !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) + return; + + memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); + memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + + brw->num_samplers = num; + + brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + free(sampler); +} + + +/************************************************************************ + * Depth stencil + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); + brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + + brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); + brw_fp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_fp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_fp->info2); +#endif + + tgsi_dump(shader->tokens, 0); + + + return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; + brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; + + FREE((void *) brw_fp->program.tokens); + FREE(brw_fp); +} + + +/************************************************************************ + * Vertex shader and other TNL state + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + + brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); + brw_vp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_vp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_vp->info2); +#endif + tgsi_dump(shader->tokens, 0); + + return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; + brw->state.dirty.brw |= BRW_NEW_VS; + + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; + + FREE((void *) brw_vp->program.tokens); + FREE(brw_vp); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Viewport = *viewport; /* struct copy */ + brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct brw_context *brw = brw_context(pipe); + memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); +} + +static void brw_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + /* flush ? */ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(count <= PIPE_MAX_ATTRIBS); + + for (i = 0; i < count; i++) { + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); + + el.ve0.src_offset = elements[i].src_offset; + el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; + + el.ve1.dst_offset = i * 4; + + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (elements[i].nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[i] = el; + } +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(buf == 0 || index == 0); + + brw->attribs.Constants[shader] = buf; + brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_textures && + !memcmp(brw->attribs.Texture, texture, num * + sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + texture[i]); + + for (i = num; i < brw->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + NULL); + + brw->num_textures = num; + + brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FrameBuffer = *fb; /* struct copy */ + + brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + + /* Also pass-through to draw module: + */ + //draw_set_rasterizer_state(brw->draw, setup); + + brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ + brw->pipe.create_blend_state = brw_create_blend_state; + brw->pipe.bind_blend_state = brw_bind_blend_state; + brw->pipe.delete_blend_state = brw_delete_blend_state; + + brw->pipe.create_sampler_state = brw_create_sampler_state; + brw->pipe.bind_sampler_states = brw_bind_sampler_states; + brw->pipe.delete_sampler_state = brw_delete_sampler_state; + + brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + + brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; + brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; + brw->pipe.create_fs_state = brw_create_fs_state; + brw->pipe.bind_fs_state = brw_bind_fs_state; + brw->pipe.delete_fs_state = brw_delete_fs_state; + brw->pipe.create_vs_state = brw_create_vs_state; + brw->pipe.bind_vs_state = brw_bind_vs_state; + brw->pipe.delete_vs_state = brw_delete_vs_state; + + brw->pipe.set_blend_color = brw_set_blend_color; + brw->pipe.set_clip_state = brw_set_clip_state; + brw->pipe.set_constant_buffer = brw_set_constant_buffer; + brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +// brw->pipe.set_feedback_state = brw_set_feedback_state; +// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + + brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; + brw->pipe.set_scissor_state = brw_set_scissor_state; + brw->pipe.set_sampler_textures = brw_set_sampler_textures; + brw->pipe.set_viewport_state = brw_set_viewport_state; + brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; + brw->pipe.set_vertex_elements = brw_set_vertex_elements; +} -- cgit v1.2.3 From c428997a521475c46ccd1df4e8ed8ccc6c4f8d61 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 26 May 2008 23:29:38 +0900 Subject: Revert DOS line endings. --- src/gallium/drivers/i915simple/i915_context.h | 682 ++++++------ src/gallium/drivers/i965simple/brw_context.h | 1368 ++++++++++++------------- src/gallium/drivers/i965simple/brw_state.c | 938 ++++++++--------- 3 files changed, 1494 insertions(+), 1494 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 38e6a348845..53fc5ed0795 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -1,341 +1,341 @@ - /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_CONTEXT_H -#define I915_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "draw/draw_vertex.h" - -#include "tgsi/util/tgsi_scan.h" - - -#define I915_TEX_UNITS 8 - -#define I915_DYNAMIC_MODES4 0 -#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ -#define I915_DYNAMIC_DEPTHSCALE_1 2 -#define I915_DYNAMIC_IAB 3 -#define I915_DYNAMIC_BC_0 4 /* just the header */ -#define I915_DYNAMIC_BC_1 5 -#define I915_DYNAMIC_BFO_0 6 -#define I915_DYNAMIC_BFO_1 7 -#define I915_DYNAMIC_STP_0 8 -#define I915_DYNAMIC_STP_1 9 -#define I915_DYNAMIC_SC_ENA_0 10 -#define I915_DYNAMIC_SC_RECT_0 11 -#define I915_DYNAMIC_SC_RECT_1 12 -#define I915_DYNAMIC_SC_RECT_2 13 -#define I915_MAX_DYNAMIC 14 - - -#define I915_IMMEDIATE_S0 0 -#define I915_IMMEDIATE_S1 1 -#define I915_IMMEDIATE_S2 2 -#define I915_IMMEDIATE_S3 3 -#define I915_IMMEDIATE_S4 4 -#define I915_IMMEDIATE_S5 5 -#define I915_IMMEDIATE_S6 6 -#define I915_IMMEDIATE_S7 7 -#define I915_MAX_IMMEDIATE 8 - -/* These must mach the order of LI0_STATE_* bits, as they will be used - * to generate hardware packets: - */ -#define I915_CACHE_STATIC 0 -#define I915_CACHE_DYNAMIC 1 /* handled specially */ -#define I915_CACHE_SAMPLER 2 -#define I915_CACHE_MAP 3 -#define I915_CACHE_PROGRAM 4 -#define I915_CACHE_CONSTANTS 5 -#define I915_MAX_CACHE 6 - -#define I915_MAX_CONSTANT 32 - - -/** See constant_flags[] below */ -#define I915_CONSTFLAG_USER 0x1f - - -/** - * Subclass of pipe_shader_state - */ -struct i915_fragment_shader -{ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - uint *program; - uint program_len; - - /** - * constants introduced during translation. - * These are placed at the end of the constant buffer and grow toward - * the beginning (eg: slot 31, 30 29, ...) - * User-provided constants start at 0. - * This allows both types of constants to co-exist (until there's too many) - * and doesn't require regenerating/changing the fragment program to - * shuffle constants around. - */ - uint num_constants; - float constants[I915_MAX_CONSTANT][4]; - - /** - * Status of each constant - * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding - * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) - * Else, the bitmask indicates which components are occupied by immediates. - */ - ubyte constant_flags[I915_MAX_CONSTANT]; -}; - - -struct i915_cache_context; - -/* Use to calculate differences between state emitted to hardware and - * current driver-calculated state. - */ -struct i915_state -{ - unsigned immediate[I915_MAX_IMMEDIATE]; - unsigned dynamic[I915_MAX_DYNAMIC]; - - float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; - /** number of constants passed in through a constant buffer */ - uint num_user_constants[PIPE_SHADER_TYPES]; - - /* texture sampler state */ - unsigned sampler[I915_TEX_UNITS][3]; - unsigned sampler_enable_flags; - unsigned sampler_enable_nr; - - /* texture image buffers */ - unsigned texbuffer[I915_TEX_UNITS][2]; - - /** Describes the current hardware vertex layout */ - struct vertex_info vertex_info; - - unsigned id; /* track lost context events */ -}; - -struct i915_blend_state { - unsigned iab; - unsigned modes4; - unsigned LIS5; - unsigned LIS6; -}; - -struct i915_depth_stencil_state { - unsigned stencil_modes4; - unsigned bfo[2]; - unsigned stencil_LIS5; - unsigned depth_LIS6; -}; - -struct i915_rasterizer_state { - int light_twoside : 1; - unsigned st; - enum interp_mode color_interp; - - unsigned LIS4; - unsigned LIS7; - unsigned sc[1]; - - const struct pipe_rasterizer_state *templ; - - union { float f; unsigned u; } ds[2]; -}; - -struct i915_sampler_state { - unsigned state[3]; - const struct pipe_sampler_state *templ; -}; - - -struct i915_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -struct i915_context -{ - struct pipe_context pipe; - struct i915_winsys *winsys; - struct draw_context *draw; - - /* The most recent drawing state as set by the driver: - */ - const struct i915_blend_state *blend; - const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct i915_depth_stencil_state *depth_stencil; - const struct i915_rasterizer_state *rasterizer; - - struct i915_fragment_shader *fs; - - struct pipe_blend_color blend_color; - struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct i915_texture *texture[PIPE_MAX_SAMPLERS]; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - - unsigned dirty; - - unsigned num_samplers; - unsigned num_textures; - unsigned num_vertex_elements; - unsigned num_vertex_buffers; - - unsigned *batch_start; - - /** Vertex buffer */ - struct pipe_buffer *vbo; - - struct i915_state current; - unsigned hardware_dirty; - - unsigned debug; -}; - -/* A flag for each state_tracker state object: - */ -#define I915_NEW_VIEWPORT 0x1 -#define I915_NEW_RASTERIZER 0x2 -#define I915_NEW_FS 0x4 -#define I915_NEW_BLEND 0x8 -#define I915_NEW_CLIP 0x10 -#define I915_NEW_SCISSOR 0x20 -#define I915_NEW_STIPPLE 0x40 -#define I915_NEW_FRAMEBUFFER 0x80 -#define I915_NEW_ALPHA_TEST 0x100 -#define I915_NEW_DEPTH_STENCIL 0x200 -#define I915_NEW_SAMPLER 0x400 -#define I915_NEW_TEXTURE 0x800 -#define I915_NEW_CONSTANTS 0x1000 -#define I915_NEW_VBO 0x2000 -#define I915_NEW_VS 0x4000 - - -/* Driver's internally generated state flags: - */ -#define I915_NEW_VERTEX_FORMAT 0x10000 - - -/* Dirty flags for hardware emit - */ -#define I915_HW_STATIC (1<<I915_CACHE_STATIC) -#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) -#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) -#define I915_HW_MAP (1<<I915_CACHE_MAP) -#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) -#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) -#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) -#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) - - -/*********************************************************************** - * i915_prim_emit.c: - */ -struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); - - -/*********************************************************************** - * i915_prim_vbuf.c: - */ -struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); - - -/*********************************************************************** - * i915_state_emit.c: - */ -void i915_emit_hardware_state(struct i915_context *i915 ); - - - -/*********************************************************************** - * i915_clear.c: - */ -void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue); - - -/*********************************************************************** - * i915_surface.c: - */ -void i915_init_surface_functions( struct i915_context *i915 ); - -void i915_init_state_functions( struct i915_context *i915 ); -void i915_init_flush_functions( struct i915_context *i915 ); -void i915_init_string_functions( struct i915_context *i915 ); - - - - -/*********************************************************************** - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static INLINE struct i915_context * -i915_context( struct pipe_context *pipe ) -{ - return (struct i915_context *)pipe; -} - - - -#endif + /************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "tgsi/util/tgsi_scan.h" + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4 0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2 +#define I915_DYNAMIC_IAB 3 +#define I915_DYNAMIC_BC_0 4 /* just the header */ +#define I915_DYNAMIC_BC_1 5 +#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_1 7 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_DYNAMIC_SC_ENA_0 10 +#define I915_DYNAMIC_SC_RECT_0 11 +#define I915_DYNAMIC_SC_RECT_1 12 +#define I915_DYNAMIC_SC_RECT_2 13 +#define I915_MAX_DYNAMIC 14 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_DYNAMIC 1 /* handled specially */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 + +#define I915_MAX_CONSTANT 32 + + +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; +}; + + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +struct i915_context +{ + struct pipe_context pipe; + struct i915_winsys *winsys; + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + + struct i915_fragment_shader *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + + unsigned *batch_start; + + /** Vertex buffer */ + struct pipe_buffer *vbo; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<<I915_CACHE_STATIC) +#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) +#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) +#define I915_HW_MAP (1<<I915_CACHE_MAP) +#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) +#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) +#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) +#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) + + +/*********************************************************************** + * i915_prim_emit.c: + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_prim_vbuf.c: + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_state_emit.c: + */ +void i915_emit_hardware_state(struct i915_context *i915 ); + + + +/*********************************************************************** + * i915_clear.c: + */ +void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +/*********************************************************************** + * i915_surface.c: + */ +void i915_init_surface_functions( struct i915_context *i915 ); + +void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_flush_functions( struct i915_context *i915 ); +void i915_init_string_functions( struct i915_context *i915 ); + + + + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct i915_context * +i915_context( struct pipe_context *pipe ) +{ + return (struct i915_context *)pipe; +} + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index eeccf367856..8ac6b4e6897 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -1,684 +1,684 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRWCONTEXT_INC -#define BRWCONTEXT_INC - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "tgsi/util/tgsi_scan.h" - -#include "brw_structs.h" -#include "brw_winsys.h" - - -/* Glossary: - * - * URB - uniform resource buffer. A mid-sized buffer which is - * partitioned between the fixed function units and used for passing - * values (vertices, primitives, constants) between them. - * - * CURBE - constant URB entry. An urb region (entry) used to hold - * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. - * - * VUE - vertex URB entry. An urb entry holding a vertex and usually - * a vertex header. The header contains control information and - * things like primitive type, Begin/end flags and clip codes. - * - * PUE - primitive URB entry. An urb entry produced by the setup (SF) - * unit holding rasterization and interpolation parameters. - * - * GRF - general register file. One of several register files - * addressable by programmed threads. The inputs (r0, payload, curbe, - * urb) of the thread are preloaded to this area before the thread is - * spawned. The registers are individually 8 dwords wide and suitable - * for general usage. Registers holding thread input values are not - * special and may be overwritten. - * - * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous - * MRF registers. All program output is via these messages. URB - * entries are populated by sending a message to the shared URB - * function containing the new data, together with a control word, - * often an unmodified copy of R0. - * - * R0 - GRF register 0. Typically holds control information used when - * sending messages to other threads. - * - * EU or GEN4 EU: The name of the programmable subsystem of the - * i965 hardware. Threads are executed by the EU, the registers - * described above are part of the EU architecture. - * - * Fixed function units: - * - * CS - Command streamer. Notional first unit, little software - * interaction. Holds the URB entries used for constant data, ie the - * CURBEs. - * - * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of - * this unit is responsible for pulling vertices out of vertex buffers - * in vram and injecting them into the processing pipe as VUEs. If - * enabled, it first passes them to a VS thread which is a good place - * for the driver to implement any active vertex shader. - * - * GS - Geometry Shader. This corresponds to a new DX10 concept. If - * enabled, incoming strips etc are passed to GS threads in individual - * line/triangle/point units. The GS thread may perform arbitary - * computation and emit whatever primtives with whatever vertices it - * chooses. This makes GS an excellent place to implement GL's - * unfilled polygon modes, though of course it is capable of much - * more. Additionally, GS is used to translate away primitives not - * handled by latter units, including Quads and Lineloops. - * - * CS - Clipper. Mesa's clipping algorithms are imported to run on - * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the - * incoming primitive needs to be passed to a thread for clipping. - * User clip planes are handled via cooperation with the VS thread. - * - * SF - Strips Fans or Setup: Triangles are prepared for - * rasterization. Interpolation coefficients are calculated. - * Flatshading and two-side lighting usually performed here. - * - * WM - Windower. Interpolation of vertex attributes performed here. - * Fragment shader implemented here. SIMD aspects of EU taken full - * advantage of, as pixels are processed in blocks of 16. - * - * CC - Color Calculator. No EU threads associated with this unit. - * Handles blending and (presumably) depth and stencil testing. - */ - -#define BRW_MAX_CURBE (32*16) - -struct brw_context; -struct brw_winsys; - - -/* Raised when we receive new state across the pipe interface: - */ -#define BRW_NEW_VIEWPORT 0x1 -#define BRW_NEW_RASTERIZER 0x2 -#define BRW_NEW_FS 0x4 -#define BRW_NEW_BLEND 0x8 -#define BRW_NEW_CLIP 0x10 -#define BRW_NEW_SCISSOR 0x20 -#define BRW_NEW_STIPPLE 0x40 -#define BRW_NEW_FRAMEBUFFER 0x80 -#define BRW_NEW_ALPHA_TEST 0x100 -#define BRW_NEW_DEPTH_STENCIL 0x200 -#define BRW_NEW_SAMPLER 0x400 -#define BRW_NEW_TEXTURE 0x800 -#define BRW_NEW_CONSTANTS 0x1000 -#define BRW_NEW_VBO 0x2000 -#define BRW_NEW_VS 0x4000 - -/* Raised for other internal events: - */ -#define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_PSP 0x20000 -#define BRW_NEW_CURBE_OFFSETS 0x40000 -#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 -#define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_SCENE 0x200000 -#define BRW_NEW_SF_LINKAGE 0x400000 - -extern int BRW_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 -#define DEBUG_BATCH 0x80000 -#define DEBUG_BUFMGR 0x100000 -#define DEBUG_BLIT 0x200000 -#define DEBUG_REGION 0x400000 -#define DEBUG_MIPTREE 0x800000 - -#define DBG(...) do { \ - if (BRW_DEBUG & FILE_DEBUG_FLAG) \ - debug_printf(__VA_ARGS__); \ -} while(0) - -#define PRINT(...) do { \ - debug_printf(brw->pipe.winsys, __VA_ARGS__); \ -} while(0) - -struct brw_state_flags { - unsigned cache; - unsigned brw; -}; - - -struct brw_vertex_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - int id; -}; - - -struct brw_fragment_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - - boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ - int id; -}; - - -struct pipe_setup_linkage { - struct { - unsigned vp_output:5; - unsigned interp_mode:4; - unsigned bf_vp_output:5; - } fp_input[PIPE_MAX_SHADER_INPUTS]; - - unsigned fp_input_count:5; - unsigned max_vp_output:5; -}; - - - -struct brw_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ - -struct brw_wm_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - - unsigned first_curbe_grf; - unsigned total_grf; - unsigned total_scratch; - - /* Internally generated constants for the CURBE. These are loaded - * ahead of the data from the constant buffer. - */ - const float internal_const[8]; - unsigned nr_internal_consts; - unsigned max_const; - - boolean error; -}; - -struct brw_sf_prog_data { - unsigned urb_read_length; - unsigned total_grf; - - /* Each vertex may have upto 12 attributes, 4 components each, - * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 - * rows. - * - * Actually we use 4 for each, so call it 12 rows. - */ - unsigned urb_entry_size; -}; - -struct brw_clip_prog_data { - unsigned curb_read_length; /* user planes? */ - unsigned clip_mode; - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_gs_prog_data { - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_vs_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - unsigned total_grf; - unsigned outputs_written; - - unsigned inputs_read; - - unsigned max_const; - - float imm_buf[PIPE_MAX_CONSTANT][4]; - unsigned num_imm; - unsigned num_consts; - - /* Used for calculating urb partitions: - */ - unsigned urb_entry_size; -}; - - -#define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 - -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - unsigned surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - struct pipe_buffer *buffer; - - unsigned size; - unsigned offset; /* offset of first free byte */ - - struct brw_context *brw; -}; - -struct brw_cache_item { - unsigned hash; - unsigned key_size; /* for variable-sized keys */ - const void *key; - - unsigned offset; /* offset within pool's buffer */ - unsigned data_size; - - struct brw_cache_item *next; -}; - - - -struct brw_cache { - unsigned id; - - const char *name; - - struct brw_context *brw; - struct brw_mem_pool *pool; - - struct brw_cache_item **items; - unsigned size, n_items; - - unsigned key_size; /* for fixed-size keys */ - unsigned aux_size; - - unsigned last_addr; /* offset of active item */ -}; - - - - -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ -struct brw_tracked_state { - struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -/* Flags for brw->state.cache. - */ -#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) -#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) -#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) -#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) -#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) -#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) -#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) -#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) -#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) -#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) -#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) -#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) -#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) -#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) -#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) -#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) -#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) -#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) - - - - -enum brw_mempool_id { - BRW_GS_POOL, - BRW_SS_POOL, - BRW_MAX_POOL -}; - - -struct brw_cached_batch_item { - struct header *header; - unsigned sz; - struct brw_cached_batch_item *next; -}; - - - -/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life - * be easier if C allowed arrays of packed elements? - */ -#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) - - - - -struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ -}; - - - - - -struct brw_context -{ - struct pipe_context pipe; - struct brw_winsys *winsys; - - unsigned primitive; - unsigned reduced_primitive; - - boolean emit_state_always; - - struct { - struct brw_state_flags dirty; - } state; - - - struct { - const struct pipe_blend_state *Blend; - const struct pipe_depth_stencil_alpha_state *DepthStencil; - const struct pipe_poly_stipple *PolygonStipple; - const struct pipe_rasterizer_state *Raster; - const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; - const struct brw_vertex_program *VertexProgram; - const struct brw_fragment_program *FragmentProgram; - - struct pipe_clip_state Clip; - struct pipe_blend_color BlendColor; - struct pipe_scissor_state Scissor; - struct pipe_viewport_state Viewport; - struct pipe_framebuffer_state FrameBuffer; - - const struct pipe_constant_buffer *Constants[2]; - const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; - } attribs; - - unsigned num_samplers; - unsigned num_textures; - - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; - struct brw_cached_batch_item *cached_batch_items; - - struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; - - struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: - */ - struct brw_vertex_info info; - } vb; - - - unsigned hardware_dirty; - unsigned dirty; - unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: - */ - struct { - unsigned vsize; /* vertex size plus header in urb registers */ - unsigned csize; /* constant buffer size in urb registers */ - unsigned sfsize; /* setup data size in urb registers */ - - boolean constrained; - - unsigned nr_vs_entries; - unsigned nr_gs_entries; - unsigned nr_clip_entries; - unsigned nr_sf_entries; - unsigned nr_cs_entries; - -/* unsigned vs_size; */ -/* unsigned gs_size; */ -/* unsigned clip_size; */ -/* unsigned sf_size; */ -/* unsigned cs_size; */ - - unsigned vs_start; - unsigned gs_start; - unsigned clip_start; - unsigned sf_start; - unsigned cs_start; - } urb; - - - /* BRW_NEW_CURBE_OFFSETS: - */ - struct { - unsigned wm_start; - unsigned wm_size; - unsigned clip_start; - unsigned clip_size; - unsigned vs_start; - unsigned vs_size; - unsigned total_size; - - unsigned gs_offset; - - float *last_buf; - unsigned last_bufsz; - } curbe; - - struct { - struct brw_vs_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } vs; - - struct { - struct brw_gs_prog_data *prog_data; - - boolean prog_active; - unsigned prog_gs_offset; - unsigned state_gs_offset; - } gs; - - struct { - struct brw_clip_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } clip; - - - struct { - struct brw_sf_prog_data *prog_data; - - struct pipe_setup_linkage linkage; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } sf; - - struct { - struct brw_wm_prog_data *prog_data; - -// struct brw_wm_compiler *compile_data; - - - /** - * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER - * cache - */ - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - unsigned render_surf; - unsigned nr_surfaces; - - unsigned max_threads; - struct pipe_buffer *scratch_buffer; - unsigned scratch_buffer_size; - - unsigned sampler_count; - unsigned sampler_gs_offset; - - struct brw_surface_binding_table bind; - unsigned bind_ss_offset; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } wm; - - - struct { - unsigned vp_gs_offset; - unsigned state_gs_offset; - } cc; - - - /* Used to give every program string a unique id - */ - unsigned program_id; -}; - - -#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - - -/*====================================================================== - * brw_vtbl.c - */ -void brw_do_flush( struct brw_context *brw, - unsigned flags ); - - -/*====================================================================== - * brw_state.c - */ -void brw_validate_state(struct brw_context *brw); -void brw_init_state(struct brw_context *brw); -void brw_destroy_state(struct brw_context *brw); - - -/*====================================================================== - * brw_tex.c - */ -void brwUpdateTextureState( struct brw_context *brw ); - - -/* brw_urb.c - */ -void brw_upload_urb_fence(struct brw_context *brw); - -void brw_upload_constant_buffer_state(struct brw_context *brw); - -void brw_init_surface_functions(struct brw_context *brw); -void brw_init_state_functions(struct brw_context *brw); -void brw_init_flush_functions(struct brw_context *brw); -void brw_init_string_functions(struct brw_context *brw); - -/*====================================================================== - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static inline struct brw_context * -brw_context( struct pipe_context *ctx ) -{ - return (struct brw_context *)ctx; -} - -#endif - +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "tgsi/util/tgsi_scan.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT 0x1 +#define BRW_NEW_RASTERIZER 0x2 +#define BRW_NEW_FS 0x4 +#define BRW_NEW_BLEND 0x8 +#define BRW_NEW_CLIP 0x10 +#define BRW_NEW_SCISSOR 0x20 +#define BRW_NEW_STIPPLE 0x40 +#define BRW_NEW_FRAMEBUFFER 0x80 +#define BRW_NEW_ALPHA_TEST 0x100 +#define BRW_NEW_DEPTH_STENCIL 0x200 +#define BRW_NEW_SAMPLER 0x400 +#define BRW_NEW_TEXTURE 0x800 +#define BRW_NEW_CONSTANTS 0x1000 +#define BRW_NEW_VBO 0x2000 +#define BRW_NEW_VS 0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE 0x10000 +#define BRW_NEW_PSP 0x20000 +#define BRW_NEW_CURBE_OFFSETS 0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 +#define BRW_NEW_PRIMITIVE 0x100000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 +#define DEBUG_BATCH 0x80000 +#define DEBUG_BUFMGR 0x100000 +#define DEBUG_BLIT 0x200000 +#define DEBUG_REGION 0x400000 +#define DEBUG_MIPTREE 0x800000 + +#define DBG(...) do { \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + debug_printf(__VA_ARGS__); \ +} while(0) + +#define PRINT(...) do { \ + debug_printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +struct brw_state_flags { + unsigned cache; + unsigned brw; +}; + + +struct brw_vertex_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + int id; +}; + + +struct brw_fragment_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + + boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ + int id; +}; + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + +struct brw_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + + unsigned first_curbe_grf; + unsigned total_grf; + unsigned total_scratch; + + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. + */ + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; +}; + +struct brw_sf_prog_data { + unsigned urb_read_length; + unsigned total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { + unsigned curb_read_length; /* user planes? */ + unsigned clip_mode; + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_gs_prog_data { + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_vs_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + unsigned total_grf; + unsigned outputs_written; + + unsigned inputs_read; + + unsigned max_const; + + float imm_buf[PIPE_MAX_CONSTANT][4]; + unsigned num_imm; + unsigned num_consts; + + /* Used for calculating urb partitions: + */ + unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct pipe_buffer *buffer; + + unsigned size; + unsigned offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + unsigned hash; + unsigned key_size; /* for variable-sized keys */ + const void *key; + + unsigned offset; /* offset within pool's buffer */ + unsigned data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + unsigned id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + unsigned size, n_items; + + unsigned key_size; /* for fixed-size keys */ + unsigned aux_size; + + unsigned last_addr; /* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + + + + +enum brw_mempool_id { + BRW_GS_POOL, + BRW_SS_POOL, + BRW_MAX_POOL +}; + + +struct brw_cached_batch_item { + struct header *header; + unsigned sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) + + + + +struct brw_vertex_info { + unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ +}; + + + + + +struct brw_context +{ + struct pipe_context pipe; + struct brw_winsys *winsys; + + unsigned primitive; + unsigned reduced_primitive; + + boolean emit_state_always; + + struct { + struct brw_state_flags dirty; + } state; + + + struct { + const struct pipe_blend_state *Blend; + const struct pipe_depth_stencil_alpha_state *DepthStencil; + const struct pipe_poly_stipple *PolygonStipple; + const struct pipe_rasterizer_state *Raster; + const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; + const struct brw_vertex_program *VertexProgram; + const struct brw_fragment_program *FragmentProgram; + + struct pipe_clip_state Clip; + struct pipe_blend_color BlendColor; + struct pipe_scissor_state Scissor; + struct pipe_viewport_state Viewport; + struct pipe_framebuffer_state FrameBuffer; + + const struct pipe_constant_buffer *Constants[2]; + const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; + } attribs; + + unsigned num_samplers; + unsigned num_textures; + + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; + + struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + + unsigned hardware_dirty; + unsigned dirty; + unsigned pci_id; + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + unsigned vsize; /* vertex size plus header in urb registers */ + unsigned csize; /* constant buffer size in urb registers */ + unsigned sfsize; /* setup data size in urb registers */ + + boolean constrained; + + unsigned nr_vs_entries; + unsigned nr_gs_entries; + unsigned nr_clip_entries; + unsigned nr_sf_entries; + unsigned nr_cs_entries; + +/* unsigned vs_size; */ +/* unsigned gs_size; */ +/* unsigned clip_size; */ +/* unsigned sf_size; */ +/* unsigned cs_size; */ + + unsigned vs_start; + unsigned gs_start; + unsigned clip_start; + unsigned sf_start; + unsigned cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + unsigned wm_start; + unsigned wm_size; + unsigned clip_start; + unsigned clip_size; + unsigned vs_start; + unsigned vs_size; + unsigned total_size; + + unsigned gs_offset; + + float *last_buf; + unsigned last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + boolean prog_active; + unsigned prog_gs_offset; + unsigned state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct pipe_setup_linkage linkage; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + +// struct brw_wm_compiler *compile_data; + + + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache + */ + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + unsigned render_surf; + unsigned nr_surfaces; + + unsigned max_threads; + struct pipe_buffer *scratch_buffer; + unsigned scratch_buffer_size; + + unsigned sampler_count; + unsigned sampler_gs_offset; + + struct brw_surface_binding_table bind; + unsigned bind_ss_offset; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } wm; + + + struct { + unsigned vp_gs_offset; + unsigned state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, + unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index ac243b7e4f9..caeeba46300 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -1,469 +1,469 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Zack Rusin <zack@tungstengraphics.com> - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "pipe/p_winsys.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_parse.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_draw.h" - - -#define DUP( TYPE, VAL ) \ -do { \ - struct TYPE *x = malloc(sizeof(*x)); \ - memcpy(x, VAL, sizeof(*x) ); \ - return x; \ -} while (0) - -/************************************************************************ - * Blend - */ -static void * -brw_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - DUP( pipe_blend_state, blend ); -} - -static void brw_bind_blend_state(struct pipe_context *pipe, - void *blend) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Blend = (struct pipe_blend_state*)blend; - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - - -static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - free(blend); -} - -static void brw_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.BlendColor = *blend_color; - - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - -/************************************************************************ - * Sampler - */ - -static void * -brw_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - DUP( pipe_sampler_state, sampler ); -} - -static void brw_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) -{ - struct brw_context *brw = brw_context(pipe); - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == brw->num_samplers && - !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) - return; - - memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); - memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * - sizeof(void *)); - - brw->num_samplers = num; - - brw->state.dirty.brw |= BRW_NEW_SAMPLER; -} - -static void brw_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - free(sampler); -} - - -/************************************************************************ - * Depth stencil - */ - -static void * -brw_create_depth_stencil_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - DUP( pipe_depth_stencil_alpha_state, depth_stencil ); -} - -static void brw_bind_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - - brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; -} - -static void brw_delete_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - free(depth_stencil); -} - -/************************************************************************ - * Scissor - */ -static void brw_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct brw_context *brw = brw_context(pipe); - - memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); - brw->state.dirty.brw |= BRW_NEW_SCISSOR; -} - - -/************************************************************************ - * Stipple - */ - -static void brw_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ -} - - -/************************************************************************ - * Fragment shader - */ - -static void * brw_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - - brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); - brw_fp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_fp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_fp->info2); -#endif - - tgsi_dump(shader->tokens, 0); - - - return (void *)brw_fp; -} - -static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; - brw->state.dirty.brw |= BRW_NEW_FS; -} - -static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; - - FREE((void *) brw_fp->program.tokens); - FREE(brw_fp); -} - - -/************************************************************************ - * Vertex shader and other TNL state - */ - -static void *brw_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - - brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); - brw_vp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_vp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_vp->info2); -#endif - tgsi_dump(shader->tokens, 0); - - return (void *)brw_vp; -} - -static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; - brw->state.dirty.brw |= BRW_NEW_VS; - - debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); -} - -static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; - - FREE((void *) brw_vp->program.tokens); - FREE(brw_vp); -} - - -static void brw_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Clip = *clip; -} - - -static void brw_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Viewport = *viewport; /* struct copy */ - brw->state.dirty.brw |= BRW_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - //draw_set_viewport_state(brw->draw, viewport); -} - - -static void brw_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct brw_context *brw = brw_context(pipe); - memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); -} - -static void brw_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements) -{ - /* flush ? */ - struct brw_context *brw = brw_context(pipe); - uint i; - - assert(count <= PIPE_MAX_ATTRIBS); - - for (i = 0; i < count; i++) { - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); - - el.ve0.src_offset = elements[i].src_offset; - el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; - - el.ve1.dst_offset = i * 4; - - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - - switch (elements[i].nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } - - brw->vb.inputs[i] = el; - } -} - - - -/************************************************************************ - * Constant buffers - */ - -static void brw_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct brw_context *brw = brw_context(pipe); - - assert(buf == 0 || index == 0); - - brw->attribs.Constants[shader] = buf; - brw->state.dirty.brw |= BRW_NEW_CONSTANTS; -} - - -/************************************************************************ - * Texture surfaces - */ - - -static void brw_set_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) -{ - struct brw_context *brw = brw_context(pipe); - uint i; - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == brw->num_textures && - !memcmp(brw->attribs.Texture, texture, num * - sizeof(struct pipe_texture *))) - return; - - for (i = 0; i < num; i++) - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], - texture[i]); - - for (i = num; i < brw->num_textures; i++) - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], - NULL); - - brw->num_textures = num; - - brw->state.dirty.brw |= BRW_NEW_TEXTURE; -} - - -/************************************************************************ - * Render targets, etc - */ - -static void brw_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FrameBuffer = *fb; /* struct copy */ - - brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; -} - - - -/************************************************************************ - * Rasterizer state - */ - -static void * -brw_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - DUP(pipe_rasterizer_state, rasterizer); -} - -static void brw_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; - - /* Also pass-through to draw module: - */ - //draw_set_rasterizer_state(brw->draw, setup); - - brw->state.dirty.brw |= BRW_NEW_RASTERIZER; -} - -static void brw_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) -{ - free(setup); -} - - - -void -brw_init_state_functions( struct brw_context *brw ) -{ - brw->pipe.create_blend_state = brw_create_blend_state; - brw->pipe.bind_blend_state = brw_bind_blend_state; - brw->pipe.delete_blend_state = brw_delete_blend_state; - - brw->pipe.create_sampler_state = brw_create_sampler_state; - brw->pipe.bind_sampler_states = brw_bind_sampler_states; - brw->pipe.delete_sampler_state = brw_delete_sampler_state; - - brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; - brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; - brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; - - brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; - brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; - brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; - brw->pipe.create_fs_state = brw_create_fs_state; - brw->pipe.bind_fs_state = brw_bind_fs_state; - brw->pipe.delete_fs_state = brw_delete_fs_state; - brw->pipe.create_vs_state = brw_create_vs_state; - brw->pipe.bind_vs_state = brw_bind_vs_state; - brw->pipe.delete_vs_state = brw_delete_vs_state; - - brw->pipe.set_blend_color = brw_set_blend_color; - brw->pipe.set_clip_state = brw_set_clip_state; - brw->pipe.set_constant_buffer = brw_set_constant_buffer; - brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; - -// brw->pipe.set_feedback_state = brw_set_feedback_state; -// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; - - brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; - brw->pipe.set_scissor_state = brw_set_scissor_state; - brw->pipe.set_sampler_textures = brw_set_sampler_textures; - brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; - brw->pipe.set_vertex_elements = brw_set_vertex_elements; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin <zack@tungstengraphics.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL ) \ +do { \ + struct TYPE *x = malloc(sizeof(*x)); \ + memcpy(x, VAL, sizeof(*x) ); \ + return x; \ +} while (0) + +/************************************************************************ + * Blend + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Blend = (struct pipe_blend_state*)blend; + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.BlendColor = *blend_color; + + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct brw_context *brw = brw_context(pipe); + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_samplers && + !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) + return; + + memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); + memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + + brw->num_samplers = num; + + brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + free(sampler); +} + + +/************************************************************************ + * Depth stencil + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); + brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + + brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); + brw_fp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_fp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_fp->info2); +#endif + + tgsi_dump(shader->tokens, 0); + + + return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; + brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; + + FREE((void *) brw_fp->program.tokens); + FREE(brw_fp); +} + + +/************************************************************************ + * Vertex shader and other TNL state + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + + brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); + brw_vp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_vp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_vp->info2); +#endif + tgsi_dump(shader->tokens, 0); + + return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; + brw->state.dirty.brw |= BRW_NEW_VS; + + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; + + FREE((void *) brw_vp->program.tokens); + FREE(brw_vp); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Viewport = *viewport; /* struct copy */ + brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct brw_context *brw = brw_context(pipe); + memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); +} + +static void brw_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + /* flush ? */ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(count <= PIPE_MAX_ATTRIBS); + + for (i = 0; i < count; i++) { + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); + + el.ve0.src_offset = elements[i].src_offset; + el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; + + el.ve1.dst_offset = i * 4; + + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (elements[i].nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[i] = el; + } +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(buf == 0 || index == 0); + + brw->attribs.Constants[shader] = buf; + brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_textures && + !memcmp(brw->attribs.Texture, texture, num * + sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + texture[i]); + + for (i = num; i < brw->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + NULL); + + brw->num_textures = num; + + brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FrameBuffer = *fb; /* struct copy */ + + brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + + /* Also pass-through to draw module: + */ + //draw_set_rasterizer_state(brw->draw, setup); + + brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ + brw->pipe.create_blend_state = brw_create_blend_state; + brw->pipe.bind_blend_state = brw_bind_blend_state; + brw->pipe.delete_blend_state = brw_delete_blend_state; + + brw->pipe.create_sampler_state = brw_create_sampler_state; + brw->pipe.bind_sampler_states = brw_bind_sampler_states; + brw->pipe.delete_sampler_state = brw_delete_sampler_state; + + brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + + brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; + brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; + brw->pipe.create_fs_state = brw_create_fs_state; + brw->pipe.bind_fs_state = brw_bind_fs_state; + brw->pipe.delete_fs_state = brw_delete_fs_state; + brw->pipe.create_vs_state = brw_create_vs_state; + brw->pipe.bind_vs_state = brw_bind_vs_state; + brw->pipe.delete_vs_state = brw_delete_vs_state; + + brw->pipe.set_blend_color = brw_set_blend_color; + brw->pipe.set_clip_state = brw_set_clip_state; + brw->pipe.set_constant_buffer = brw_set_constant_buffer; + brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +// brw->pipe.set_feedback_state = brw_set_feedback_state; +// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + + brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; + brw->pipe.set_scissor_state = brw_set_scissor_state; + brw->pipe.set_sampler_textures = brw_set_sampler_textures; + brw->pipe.set_viewport_state = brw_set_viewport_state; + brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; + brw->pipe.set_vertex_elements = brw_set_vertex_elements; +} -- cgit v1.2.3 From fa4b2439d4f240a5e573d4ea198b829791d614f4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 27 May 2008 01:22:22 +1000 Subject: nouveau: very quick port to tex-surface changes. probably the last match-gallium-upstream merge for a bit, some cleanup+nv50 work coming RSN... --- src/gallium/drivers/nv10/nv10_context.c | 1 - src/gallium/drivers/nv10/nv10_context.h | 1 - src/gallium/drivers/nv10/nv10_miptree.c | 11 +++++++---- src/gallium/drivers/nv10/nv10_screen.c | 25 +++++++++++++++++++++++++ src/gallium/drivers/nv30/nv30_context.c | 1 - src/gallium/drivers/nv30/nv30_context.h | 1 - src/gallium/drivers/nv30/nv30_miptree.c | 20 ++++++++------------ src/gallium/drivers/nv30/nv30_screen.c | 25 +++++++++++++++++++++++++ src/gallium/drivers/nv40/nv40_context.c | 1 - src/gallium/drivers/nv40/nv40_context.h | 1 - src/gallium/drivers/nv40/nv40_miptree.c | 20 ++++++++------------ src/gallium/drivers/nv40/nv40_screen.c | 25 +++++++++++++++++++++++++ src/gallium/drivers/nv50/nv50_context.c | 1 - src/gallium/drivers/nv50/nv50_context.h | 1 - src/gallium/drivers/nv50/nv50_miptree.c | 25 +++++++++++-------------- src/gallium/drivers/nv50/nv50_screen.c | 25 +++++++++++++++++++++++++ 16 files changed, 134 insertions(+), 50 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 79253f8a2b3..9fcd0b0fc3a 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -285,7 +285,6 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) nv10_init_surface_functions(nv10); nv10_init_state_functions(nv10); - nv10_init_miptree_functions(nv10); nv10->draw = draw_create(); assert(nv10->draw); diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 433d04dc2a8..5636dfc9d22 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -109,7 +109,6 @@ nv10_context(struct pipe_context *pipe) extern void nv10_init_state_functions(struct nv10_context *nv10); extern void nv10_init_surface_functions(struct nv10_context *nv10); -extern void nv10_init_miptree_functions(struct nv10_context *nv10); extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen); diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 4dfc675a6b9..1b9947354d6 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -52,7 +52,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) } static struct pipe_texture * -nv10_miptree_create(struct pipe_screen *screen, struct pipe_texture *pt) +nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) { struct pipe_winsys *ws = screen->winsys; struct nv10_miptree *mt; @@ -105,7 +105,8 @@ nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, static struct pipe_surface * nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) + unsigned face, unsigned level, unsigned zslice, + unsigned flags) { struct pipe_winsys *ws = screen->winsys; struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt; @@ -130,9 +131,10 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return ps; } -void nv10_init_miptree_functions(struct nv10_context *nv10) +static void +nv10_miptree_surface_release(struct pipe_screen *screen, + struct pipe_surface **surface) { - nv10->pipe.texture_update = nv10_miptree_update; } void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen) @@ -140,5 +142,6 @@ void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen) pscreen->texture_create = nv10_miptree_create; pscreen->texture_release = nv10_miptree_release; pscreen->get_tex_surface = nv10_miptree_surface_get; + pscreen->tex_surface_release = nv10_miptree_surface_release; } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 5fe3a030811..67787d8e9ce 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -117,6 +117,28 @@ nv10_screen_is_format_supported(struct pipe_screen *screen, return FALSE; } +static void * +nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + static void nv10_screen_destroy(struct pipe_screen *pscreen) { @@ -180,6 +202,9 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.is_format_supported = nv10_screen_is_format_supported; + screen->pipe.surface_map = nv10_surface_map; + screen->pipe.surface_unmap = nv10_surface_unmap; + nv10_screen_init_miptree_functions(&screen->pipe); return &screen->pipe; diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 7a2fee78751..b2d9d3f1814 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -66,7 +66,6 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) nv30_init_query_functions(nv30); nv30_init_surface_functions(nv30); nv30_init_state_functions(nv30); - nv30_init_miptree_functions(nv30); /* Create, configure, and install fallback swtnl path */ nv30->draw = draw_create(); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index f49450f82d9..333bd4875c6 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -134,7 +134,6 @@ nv30_context(struct pipe_context *pipe) extern void nv30_init_state_functions(struct nv30_context *nv30); extern void nv30_init_surface_functions(struct nv30_context *nv30); -extern void nv30_init_miptree_functions(struct nv30_context *nv30); extern void nv30_init_query_functions(struct nv30_context *nv30); extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 10ab46e19a5..6078b1865e2 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -100,15 +100,10 @@ nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) } } -static void -nv30_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, - uint face, uint levels) -{ -} - static struct pipe_surface * -nv30_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) +nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) { struct pipe_winsys *ws = pscreen->winsys; struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; @@ -136,10 +131,10 @@ nv30_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, return ps; } -void -nv30_init_miptree_functions(struct nv30_context *nv30) +static void +nv30_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) { - nv30->pipe.texture_update = nv30_miptree_update; } void @@ -147,6 +142,7 @@ nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) { pscreen->texture_create = nv30_miptree_create; pscreen->texture_release = nv30_miptree_release; - pscreen->get_tex_surface = nv30_miptree_surface; + pscreen->get_tex_surface = nv30_miptree_surface_new; + pscreen->tex_surface_release = nv30_miptree_surface_del; } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index bb77776ff15..9c576369891 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -124,6 +124,28 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, return FALSE; } +static void * +nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + static void nv30_screen_destroy(struct pipe_screen *pscreen) { @@ -300,6 +322,9 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.is_format_supported = nv30_screen_surface_format_supported; + screen->pipe.surface_map = nv30_surface_map; + screen->pipe.surface_unmap = nv30_surface_unmap; + nv30_screen_init_miptree_functions(&screen->pipe); return &screen->pipe; diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index d9d9accea8d..a40f14895f4 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -59,7 +59,6 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) nv40_init_query_functions(nv40); nv40_init_surface_functions(nv40); nv40_init_state_functions(nv40); - nv40_init_miptree_functions(nv40); /* Create, configure, and install fallback swtnl path */ nv40->draw = draw_create(); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 77b3da0ab91..d8d1891dff0 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -172,7 +172,6 @@ struct nv40_state_entry { extern void nv40_init_state_functions(struct nv40_context *nv40); extern void nv40_init_surface_functions(struct nv40_context *nv40); -extern void nv40_init_miptree_functions(struct nv40_context *nv40); extern void nv40_init_query_functions(struct nv40_context *nv40); extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 1b192172232..23da6e36a3e 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -100,15 +100,10 @@ nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) } } -static void -nv40_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, - uint face, uint levels) -{ -} - static struct pipe_surface * -nv40_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) +nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) { struct pipe_winsys *ws = pscreen->winsys; struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; @@ -136,10 +131,10 @@ nv40_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, return ps; } -void -nv40_init_miptree_functions(struct nv40_context *nv40) +static void +nv40_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) { - nv40->pipe.texture_update = nv40_miptree_update; } void @@ -147,6 +142,7 @@ nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) { pscreen->texture_create = nv40_miptree_create; pscreen->texture_release = nv40_miptree_release; - pscreen->get_tex_surface = nv40_miptree_surface; + pscreen->get_tex_surface = nv40_miptree_surface_new; + pscreen->tex_surface_release = nv40_miptree_surface_del; } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 51640533938..ed0215b4862 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -133,6 +133,28 @@ nv40_screen_surface_format_supported(struct pipe_screen *pscreen, return FALSE; } +static void * +nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + static void nv40_screen_destroy(struct pipe_screen *pscreen) { @@ -282,6 +304,9 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.is_format_supported = nv40_screen_surface_format_supported; + screen->pipe.surface_map = nv40_surface_map; + screen->pipe.surface_unmap = nv40_surface_unmap; + nv40_screen_init_miptree_functions(&screen->pipe); return &screen->pipe; diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 6eb1878b847..a225c4bf728 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -55,7 +55,6 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.flush = nv50_flush; - nv50_init_miptree_functions(nv50); nv50_init_surface_functions(nv50); nv50_init_state_functions(nv50); nv50_init_query_functions(nv50); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index f532fa6bfb3..e68c702deae 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -71,7 +71,6 @@ nv50_context(struct pipe_context *pipe) return (struct nv50_context *)pipe; } -extern void nv50_init_miptree_functions(struct nv50_context *nv50); extern void nv50_init_surface_functions(struct nv50_context *nv50); extern void nv50_init_state_functions(struct nv50_context *nv50); extern void nv50_init_query_functions(struct nv50_context *nv50); diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 58584934b18..ccb916d6acc 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -56,8 +56,9 @@ nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) } static struct pipe_surface * -nv50_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) +nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) { struct pipe_winsys *ws = pscreen->winsys; struct nv50_miptree *mt = nv50_miptree(pt); @@ -80,22 +81,18 @@ nv50_miptree_surface(struct pipe_screen *pscreen, struct pipe_texture *pt, return ps; } -void -nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nv50_miptree_create; - pscreen->texture_release = nv50_miptree_release; - pscreen->get_tex_surface = nv50_miptree_surface; -} - static void -nv50_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, - uint face, uint levels) +nv50_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) { } void -nv50_init_miptree_functions(struct nv50_context *nv50) +nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) { - nv50->pipe.texture_update = nv50_miptree_update; + pscreen->texture_create = nv50_miptree_create; + pscreen->texture_release = nv50_miptree_release; + pscreen->get_tex_surface = nv50_miptree_surface_new; + pscreen->tex_surface_release = nv50_miptree_surface_del; } + diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index d069639dc48..29c057a1453 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -117,6 +117,28 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) } } +static void * +nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv50_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + static void nv50_screen_destroy(struct pipe_screen *pscreen) { @@ -209,6 +231,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.is_format_supported = nv50_screen_is_format_supported; + screen->pipe.surface_map = nv50_surface_map; + screen->pipe.surface_unmap = nv50_surface_unmap; + nv50_screen_init_miptree_functions(&screen->pipe); return &screen->pipe; -- cgit v1.2.3 From b7b9ce0f8677993c3cd5376add72a684a5653341 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Tue, 27 May 2008 23:23:37 +0100 Subject: softpipe: enable vbuf by default The non-vbuf option is going away... --- src/gallium/drivers/softpipe/sp_context.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index a48e5461391..ed9322a1098 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -227,11 +227,13 @@ softpipe_create( struct pipe_screen *screen, if (GETENV( "SP_NO_RAST" ) != NULL) softpipe->no_rast = TRUE; - if (GETENV( "SP_VBUF" ) != NULL) { - sp_init_vbuf(softpipe); + if (GETENV( "SP_NO_VBUF" ) != NULL) { + /* Deprecated path -- vbuf is the intended interface to the draw module: + */ + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); } else { - draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + sp_init_vbuf(softpipe); } /* plug in AA line/point stages */ -- cgit v1.2.3 From 08130512b9961da76a6385403d56387125df5e8c Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Tue, 27 May 2008 13:18:25 +0200 Subject: i915: Made vbuf work --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 248 +++++++++++++++++++++-- src/gallium/winsys/dri/intel/intel_winsys_pipe.c | 4 +- 2 files changed, 228 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 7fb2adbb53b..81293d0d1f8 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -62,8 +62,14 @@ struct i915_vbuf_render { /** Vertex size in bytes */ unsigned vertex_size; + /** Software primitive */ + unsigned prim; + /** Hardware primitive */ unsigned hwprim; + + /** Genereate a vertex list */ + unsigned fallback; }; @@ -95,8 +101,8 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) static void * i915_vbuf_render_allocate_vertices( struct vbuf_render *render, - ushort vertex_size, - ushort nr_vertices ) + ushort vertex_size, + ushort nr_vertices ) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; @@ -107,9 +113,9 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, assert(!i915->vbo); i915->vbo = winsys->buffer_create(winsys, 64, I915_BUFFER_USAGE_LIT_VERTEX, size); - + i915->dirty |= I915_NEW_VBO; - + return winsys->buffer_map(winsys, i915->vbo, PIPE_BUFFER_USAGE_CPU_WRITE); @@ -121,16 +127,36 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, unsigned prim ) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - + i915_render->prim = prim; + switch(prim) { case PIPE_PRIM_POINTS: i915_render->hwprim = PRIM3D_POINTLIST; + i915_render->fallback = 0; return TRUE; case PIPE_PRIM_LINES: i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_LINE_STRIP: + i915_render->hwprim = PRIM3D_LINESTRIP; + i915_render->fallback = 0; return TRUE; case PIPE_PRIM_TRIANGLES: i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + i915_render->hwprim = PRIM3D_TRISTRIP; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_QUADS: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUADS; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUAD_STRIP; return TRUE; default: /* Actually, can handle a lot more just fine... Fixme. @@ -140,6 +166,179 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, } + +/** + * Used for fallbacks in draw_arrays + */ +static void +draw_arrays_generate_indices( struct vbuf_render *render, + unsigned start, uint nr, + unsigned type ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + unsigned end = start + nr; + switch(type) { + case 0: + for (i = start; i+1 < end; i += 2) + OUT_BATCH( (i+0) | (i+1) << 16 ); + if (i < end) + OUT_BATCH( i ); + break; + case PIPE_PRIM_QUADS: + for (i = start; i + 3 < end; i += 4) { + OUT_BATCH( (i+0) | (i+1) << 16 ); + OUT_BATCH( (i+3) | (i+1) << 16 ); + OUT_BATCH( (i+2) | (i+3) << 16 ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = start; i + 3 < end; i += 2) { + OUT_BATCH( (i+0) | (i+1) << 16 ); + OUT_BATCH( (i+3) | (i+2) << 16 ); + OUT_BATCH( (i+0) | (i+3) << 16 ); + } + break; + default: + assert(0); + } +} + +static unsigned +draw_arrays_calc_nr_indices( uint nr, unsigned type ) +{ + switch (type) { + case 0: + return nr; + case PIPE_PRIM_QUADS: + return (nr / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + +static void +draw_arrays_fallback( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_winsys *winsys = i915->pipe.winsys; + unsigned nr_indices; + + winsys->buffer_unmap( winsys, i915->vbo ); + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + nr_indices = draw_arrays_calc_nr_indices( nr, i915_render->fallback ); + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + assert(0); + goto out; + } + } + OUT_BATCH( _3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + + draw_arrays_generate_indices( render, start, nr, i915_render->fallback ); +out: + winsys->buffer_map( winsys, i915->vbo, PIPE_BUFFER_USAGE_CPU_WRITE ); +} + +static void +i915_vbuf_render_draw_arrays( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + if (i915_render->fallback) { + draw_arrays_fallback( render, start, nr ); + return; + } + + /* JB: TODO submit direct cmds */ + draw_arrays_fallback( render, start, nr ); +} + +/** + * Used for normal and fallback emitting of indices + * If type is zero normal operation assumed. + */ +static void +draw_generate_indices( struct vbuf_render *render, + const ushort *indices, + uint nr_indices, + unsigned type ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + + switch(type) { + case 0: + for (i = 0; i + 1 < nr_indices; i += 2) { + OUT_BATCH( (indices[i] & 0x0FFF) | ((indices[i+1] & 0x0FFF) << 16) ); + } + if (i < nr_indices) { + OUT_BATCH( indices[i] & 0x0FFF ); + } + break; + case PIPE_PRIM_QUADS: + for (i = 0; i + 3 < nr_indices; i += 4) { + OUT_BATCH( (indices[i+0] & 0x0FFF) | (indices[i+1] & 0x0FFF) << 16 ); + OUT_BATCH( (indices[i+3] & 0x0FFF) | (indices[i+1] & 0x0FFF) << 16 ); + OUT_BATCH( (indices[i+2] & 0x0FFF) | (indices[i+3] & 0x0FFF) << 16 ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i + 3 < nr_indices; i += 2) { + OUT_BATCH( (indices[i+0] & 0x0FFF) | (indices[i+1] & 0x0FFF) << 16 ); + OUT_BATCH( (indices[i+3] & 0x0FFF) | (indices[i+2] & 0x0FFF) << 16 ); + OUT_BATCH( (indices[i+0] & 0x0FFF) | (indices[i+3] & 0x0FFF) << 16 ); + } + break; + default: + assert(0); + break; + } +} + +static unsigned +draw_calc_nr_indices( uint nr_indices, unsigned type ) +{ + switch (type) { + case 0: + return nr_indices; + case PIPE_PRIM_QUADS: + return (nr_indices / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr_indices - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + static void i915_vbuf_render_draw( struct vbuf_render *render, const ushort *indices, @@ -147,13 +346,15 @@ i915_vbuf_render_draw( struct vbuf_render *render, { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - unsigned i; + struct pipe_winsys *winsys = i915->pipe.winsys; + unsigned save_nr_indices; + + save_nr_indices = nr_indices; + nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback ); assert(nr_indices); + winsys->buffer_unmap( winsys, i915->vbo ); - /* this seems to be bogus, since we validate state right after this */ - /*assert((i915->dirty & ~I915_NEW_VBO) == 0);*/ - if (i915->dirty) i915_update_derived( i915 ); @@ -170,22 +371,23 @@ i915_vbuf_render_draw( struct vbuf_render *render, if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { assert(0); - return; + goto out; } } OUT_BATCH( _3DPRIMITIVE | - PRIM_INDIRECT | - i915_render->hwprim | - PRIM_INDIRECT_ELTS | - nr_indices ); - for (i = 0; i + 1 < nr_indices; i += 2) { - OUT_BATCH( indices[i] | - (indices[i + 1] << 16) ); - } - if (i < nr_indices) { - OUT_BATCH( indices[i] ); - } + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + draw_generate_indices( render, + indices, + save_nr_indices, + i915_render->fallback ); + +out: + winsys->buffer_map( winsys, i915->vbo, PIPE_BUFFER_USAGE_CPU_WRITE ); + return; } @@ -234,6 +436,7 @@ i915_vbuf_render_create( struct i915_context *i915 ) i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; i915_render->base.set_primitive = i915_vbuf_render_set_primitive; i915_render->base.draw = i915_vbuf_render_draw; + i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; @@ -258,6 +461,7 @@ struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ) render->destroy(render); return NULL; } - + draw_set_render(i915->draw, render); + return stage; } diff --git a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c index 10e1a84cc12..059b16be3b1 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c @@ -328,8 +328,8 @@ intel_create_pipe_winsys( int fd, struct _DriFreeSlabManager *fMan ) DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_MEM_TT, - 32 * 4096, - 1, 40, 32 * 4096 * 2, 0, + 128, + 6, 120, 32 * 4096, 0, fMan); } -- cgit v1.2.3 From cb5e05d99c40d4f7ab1ecbb42a6390caf3966ba4 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Tue, 27 May 2008 19:00:16 +0200 Subject: i915: Made vertex submission eaven faster --- src/gallium/drivers/i915simple/i915_context.h | 2 + src/gallium/drivers/i915simple/i915_flush.c | 1 + src/gallium/drivers/i915simple/i915_prim_vbuf.c | 66 +++++++++++++++++----- .../drivers/i915simple/i915_state_immediate.c | 2 +- src/gallium/winsys/dri/intel/intel_winsys_pipe.c | 8 +-- 5 files changed, 61 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 53fc5ed0795..2da90ae49d9 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -245,6 +245,8 @@ struct i915_context /** Vertex buffer */ struct pipe_buffer *vbo; + size_t vbo_offset; + unsigned vbo_flushed; struct i915_state current; unsigned hardware_dirty; diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index 7d23e6b6b90..4c4718d68e1 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -68,6 +68,7 @@ static void i915_flush( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ FLUSH_BATCH(fence); + i915->vbo_flushed = 1; } diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 81293d0d1f8..4f36c2a22af 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -70,6 +70,13 @@ struct i915_vbuf_render { /** Genereate a vertex list */ unsigned fallback; + + /* Stuff for the vbo */ + struct pipe_buffer *vbo; + size_t vbo_size; + size_t vbo_offset; + void *vbo_ptr; + size_t vbo_alloc_size; }; @@ -111,14 +118,31 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, /* FIXME: handle failure */ assert(!i915->vbo); - i915->vbo = winsys->buffer_create(winsys, 64, I915_BUFFER_USAGE_LIT_VERTEX, - size); + if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { + } else { + i915->vbo_flushed = 0; + pipe_buffer_reference(winsys, &i915_render->vbo, NULL); + } + + if (!i915_render->vbo) { + i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); + i915_render->vbo_offset = 0; + i915_render->vbo = winsys->buffer_create(winsys, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = winsys->buffer_map(winsys, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + winsys->buffer_unmap(winsys, i915_render->vbo); + } + + i915->vbo = i915_render->vbo; + i915->vbo_offset = i915_render->vbo_offset; i915->dirty |= I915_NEW_VBO; - return winsys->buffer_map(winsys, - i915->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); + return i915_render->vbo_ptr + i915->vbo_offset; } @@ -231,7 +255,6 @@ draw_arrays_fallback( struct vbuf_render *render, struct pipe_winsys *winsys = i915->pipe.winsys; unsigned nr_indices; - winsys->buffer_unmap( winsys, i915->vbo ); if (i915->dirty) i915_update_derived( i915 ); @@ -247,6 +270,7 @@ draw_arrays_fallback( struct vbuf_render *render, */ i915_update_derived( i915 ); i915_emit_hardware_state( i915 ); + i915->vbo_flushed = 1; if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { assert(0); @@ -260,8 +284,9 @@ draw_arrays_fallback( struct vbuf_render *render, nr_indices ); draw_arrays_generate_indices( render, start, nr, i915_render->fallback ); + out: - winsys->buffer_map( winsys, i915->vbo, PIPE_BUFFER_USAGE_CPU_WRITE ); + return; } static void @@ -353,7 +378,6 @@ i915_vbuf_render_draw( struct vbuf_render *render, nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback ); assert(nr_indices); - winsys->buffer_unmap( winsys, i915->vbo ); if (i915->dirty) i915_update_derived( i915 ); @@ -368,6 +392,7 @@ i915_vbuf_render_draw( struct vbuf_render *render, */ i915_update_derived( i915 ); i915_emit_hardware_state( i915 ); + i915->vbo_flushed = 1; if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { assert(0); @@ -386,7 +411,6 @@ i915_vbuf_render_draw( struct vbuf_render *render, i915_render->fallback ); out: - winsys->buffer_map( winsys, i915->vbo, PIPE_BUFFER_USAGE_CPU_WRITE ); return; } @@ -400,10 +424,13 @@ i915_vbuf_render_release_vertices( struct vbuf_render *render, struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; struct pipe_winsys *winsys = i915->pipe.winsys; + size_t size = (size_t)vertex_size * (size_t)vertices_used; assert(i915->vbo); - winsys->buffer_unmap(winsys, i915->vbo); - pipe_buffer_reference(winsys, &i915->vbo, NULL); + + i915_render->vbo_offset += size; + i915->vbo = NULL; + i915->dirty |= I915_NEW_VBO; } @@ -422,6 +449,7 @@ static struct vbuf_render * i915_vbuf_render_create( struct i915_context *i915 ) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); + struct pipe_winsys *winsys = i915->pipe.winsys; i915_render->i915 = i915; @@ -431,7 +459,7 @@ i915_vbuf_render_create( struct i915_context *i915 ) * batch buffer. */ i915_render->base.max_indices = 16*1024; - + i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; i915_render->base.set_primitive = i915_vbuf_render_set_primitive; @@ -439,7 +467,19 @@ i915_vbuf_render_create( struct i915_context *i915 ) i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; - + + i915_render->vbo_alloc_size = 128 * 4096; + i915_render->vbo_size = i915_render->vbo_alloc_size; + i915_render->vbo_offset = 0; + i915_render->vbo = winsys->buffer_create(winsys, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = winsys->buffer_map(winsys, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + winsys->buffer_unmap(winsys, i915_render->vbo); + return &i915_render->base; } diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index dfbbcab624a..704ea4d838c 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -54,7 +54,7 @@ static void upload_S0S1(struct i915_context *i915) /* INTEL_NEW_VBO */ /* TODO: re-use vertex buffers here? */ - LIS0 = 0; + LIS0 = i915->vbo_offset; /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! */ diff --git a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c index 059b16be3b1..fb8f44c8457 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c @@ -224,7 +224,6 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, unsigned tex_usage) { const unsigned alignment = 64; - //int ret; surf->width = width; surf->height = height; @@ -235,7 +234,8 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, assert(!surf->buffer); surf->buffer = winsys->buffer_create(winsys, alignment, PIPE_BUFFER_USAGE_PIXEL, - surf->pitch * surf->cpp * height); + surf->pitch * surf->cpp * surf->height); + if(!surf->buffer) return -1; @@ -328,8 +328,8 @@ intel_create_pipe_winsys( int fd, struct _DriFreeSlabManager *fMan ) DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_MEM_TT, - 128, - 6, 120, 32 * 4096, 0, + 128 * 4096, + 1, 120, 128 * 4096 * 4, 0, fMan); } -- cgit v1.2.3 From ce56bcb640072496809d2aefede5a32fe9256ace Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Tue, 27 May 2008 21:15:36 +0200 Subject: i915: Prepare for tiled private front and back buffers --- src/gallium/drivers/i915simple/i915_state_emit.c | 1 + src/gallium/winsys/dri/intel/intel_winsys_pipe.c | 26 ++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 6f947d43468..8bcc26ad356 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -217,6 +217,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(pitch) | /* pitch in bytes */ +// BUF_3D_TILED_SURFACE); /* JB: Used to force tileing */ BUF_3D_USE_FENCE); OUT_RELOC(cbuf_surface->buffer, diff --git a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c index fb8f44c8457..2e6c452cd1f 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c @@ -212,6 +212,15 @@ round_up(unsigned n, unsigned multiple) return (n + multiple - 1) & ~(multiple - 1); } +static unsigned +power_of_two(unsigned x) +{ + int value = 1; + while (value <= x) + value = value << 1; + return value; +} + /*pipe_buffer_reference(winsys, &i915_render->vbo, NULL);*/ /** * Copied from xm_winsys.c */ @@ -224,18 +233,31 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, unsigned tex_usage) { const unsigned alignment = 64; + assert(!surf->buffer); +#if 0 + surf->width = width; + surf->height = round_up(height, 8); + surf->format = format; + surf->cpp = pf_get_size(format); + surf->pitch = power_of_two(MAX2(width * surf->cpp, 512)); + surf->buffer = winsys->buffer_create(winsys, 2*4096, + PIPE_BUFFER_USAGE_PIXEL, + surf->pitch * surf->height); + + surf->pitch = surf->pitch / surf->cpp; + surf->height = height; +#else surf->width = width; surf->height = height; surf->format = format; surf->cpp = pf_get_size(format); surf->pitch = round_up(width, alignment / surf->cpp); - assert(!surf->buffer); surf->buffer = winsys->buffer_create(winsys, alignment, PIPE_BUFFER_USAGE_PIXEL, surf->pitch * surf->cpp * surf->height); - +#endif if(!surf->buffer) return -1; -- cgit v1.2.3 From 4b1f382e13ba2d8ad08bdf8dac1738ad1d22acf3 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Wed, 28 May 2008 14:22:07 +0200 Subject: i915: Remove workaround for buggy draw module --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 4f36c2a22af..faf5971abbb 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -322,24 +322,24 @@ draw_generate_indices( struct vbuf_render *render, switch(type) { case 0: for (i = 0; i + 1 < nr_indices; i += 2) { - OUT_BATCH( (indices[i] & 0x0FFF) | ((indices[i+1] & 0x0FFF) << 16) ); + OUT_BATCH( indices[i] | indices[i+1] << 16 ); } if (i < nr_indices) { - OUT_BATCH( indices[i] & 0x0FFF ); + OUT_BATCH( indices[i] ); } break; case PIPE_PRIM_QUADS: for (i = 0; i + 3 < nr_indices; i += 4) { - OUT_BATCH( (indices[i+0] & 0x0FFF) | (indices[i+1] & 0x0FFF) << 16 ); - OUT_BATCH( (indices[i+3] & 0x0FFF) | (indices[i+1] & 0x0FFF) << 16 ); - OUT_BATCH( (indices[i+2] & 0x0FFF) | (indices[i+3] & 0x0FFF) << 16 ); + OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+3] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+2] | indices[i+3] << 16 ); } break; case PIPE_PRIM_QUAD_STRIP: for (i = 0; i + 3 < nr_indices; i += 2) { - OUT_BATCH( (indices[i+0] & 0x0FFF) | (indices[i+1] & 0x0FFF) << 16 ); - OUT_BATCH( (indices[i+3] & 0x0FFF) | (indices[i+2] & 0x0FFF) << 16 ); - OUT_BATCH( (indices[i+0] & 0x0FFF) | (indices[i+3] & 0x0FFF) << 16 ); + OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+3] | indices[i+2] << 16 ); + OUT_BATCH( indices[i+0] | indices[i+3] << 16 ); } break; default: -- cgit v1.2.3 From 276552c0dd8d9d68b8324c42b05c768c45a9db76 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Wed, 28 May 2008 14:52:10 +0200 Subject: i915: Add draw_flush to state changes --- src/gallium/drivers/i915simple/i915_state.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index e6c4671700e..39d5d5e9d65 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -41,7 +41,6 @@ #include "i915_state_inlines.h" #include "i915_fpc.h" - /* The i915 (and related graphics cores) do not support GL_CLAMP. The * Intel drivers for "other operating systems" implement GL_CLAMP as * GL_CLAMP_TO_EDGE, so the same is done here. @@ -178,6 +177,7 @@ static void i915_bind_blend_state(struct pipe_context *pipe, void *blend) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->blend = (struct i915_blend_state*)blend; @@ -194,6 +194,7 @@ static void i915_set_blend_color( struct pipe_context *pipe, const struct pipe_blend_color *blend_color ) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->blend_color = *blend_color; @@ -280,6 +281,8 @@ static void i915_bind_sampler_states(struct pipe_context *pipe, !memcmp(i915->sampler, sampler, num * sizeof(void *))) return; + draw_flush(i915->draw); + for (i = 0; i < num; ++i) i915->sampler[i] = sampler[i]; for (i = num; i < PIPE_MAX_SAMPLERS; ++i) @@ -398,6 +401,7 @@ static void i915_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; @@ -415,6 +419,7 @@ static void i915_set_scissor_state( struct pipe_context *pipe, const struct pipe_scissor_state *scissor ) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); memcpy( &i915->scissor, scissor, sizeof(*scissor) ); i915->dirty |= I915_NEW_SCISSOR; @@ -451,6 +456,7 @@ static void i915_bind_fs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->fs = (struct i915_fragment_shader*) shader; @@ -506,6 +512,7 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); struct pipe_winsys *ws = pipe->winsys; + draw_flush(i915->draw); assert(shader < PIPE_SHADER_TYPES); assert(index == 0); @@ -574,6 +581,7 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->framebuffer = *fb; /* struct copy */ @@ -586,6 +594,7 @@ static void i915_set_clip_state( struct pipe_context *pipe, const struct pipe_clip_state *clip ) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); draw_set_clip_state(i915->draw, clip); @@ -698,6 +707,10 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); i915->num_vertex_buffers = count; @@ -711,6 +724,11 @@ static void i915_set_vertex_elements(struct pipe_context *pipe, const struct pipe_vertex_element *elements) { struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); + i915->num_vertex_elements = count; /* pass-through to draw module */ draw_set_vertex_elements(i915->draw, count, elements); -- cgit v1.2.3 From 82605d7bcd533d7c96cc619c45970efd7229dc3b Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 29 May 2008 11:46:43 +0100 Subject: draw: draw_range_elements trial --- src/gallium/auxiliary/draw/draw_context.c | 20 +++- src/gallium/auxiliary/draw/draw_context.h | 10 +- src/gallium/auxiliary/draw/draw_private.h | 2 + src/gallium/auxiliary/draw/draw_pt.h | 9 ++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 48 ++++++++ .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 49 ++++++++ .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 80 +++++++++++++- src/gallium/auxiliary/draw/draw_pt_vcache.c | 123 ++++++++++++++++++++- src/gallium/drivers/softpipe/sp_context.c | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 31 +++++- src/gallium/drivers/softpipe/sp_state.h | 7 ++ src/gallium/include/pipe/p_context.h | 14 +++ src/mesa/state_tracker/st_draw.c | 27 ++++- 13 files changed, 404 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 8509baf8654..bcec85c2ef8 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -348,14 +348,30 @@ void draw_set_edgeflags( struct draw_context *draw, * \param elements the element buffer ptr */ void -draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, void *elements ) +draw_set_mapped_element_buffer_range( struct draw_context *draw, + unsigned eltSize, + unsigned min_index, + unsigned max_index, + void *elements ) { draw->pt.user.elts = elements; draw->pt.user.eltSize = eltSize; + draw->pt.user.min_index = min_index; + draw->pt.user.max_index = max_index; } +void +draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, + void *elements ) +{ + draw->pt.user.elts = elements; + draw->pt.user.eltSize = eltSize; + draw->pt.user.min_index = 0; + draw->pt.user.max_index = 0xffffffff; +} + /* Revamp me please: */ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c5c3d3b09e0..8dd03cb79ef 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -118,8 +118,16 @@ void draw_set_vertex_elements(struct draw_context *draw, unsigned count, const struct pipe_vertex_element *elements); +void +draw_set_mapped_element_buffer_range( struct draw_context *draw, + unsigned eltSize, + unsigned min_index, + unsigned max_index, + void *elements ); + void draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, void *elements ); + unsigned eltSize, + void *elements ); void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4cbccc8b5bb..40f1d978f21 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -147,6 +147,8 @@ struct draw_context const void *elts; /** bytes per index (0, 1, 2 or 4) */ unsigned eltSize; + unsigned min_index; + unsigned max_index; /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index e03816ebbc7..6b8ba1d171b 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -96,6 +96,15 @@ struct draw_pt_middle_end { unsigned start, unsigned count); + /* Transform all vertices in a linear range and then draw them with + * the supplied element list. + */ + void (*run_linear_elts)( struct draw_pt_middle_end *, + unsigned fetch_start, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ); + void (*finish)( struct draw_pt_middle_end * ); void (*destroy)( struct draw_pt_middle_end * ); }; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index a1d041a74f5..09bdc5fb5e1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -311,6 +311,53 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, } +static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)count ); + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices and emit HW verts. + */ + feme->translate->run( feme->translate, + start, + count, + hw_verts ); + + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + /* Done -- that was easy, wasn't it: + */ + draw->render->release_vertices( draw->render, + hw_verts, + feme->translate->key.output_stride, + count ); + +} + + + static void fetch_emit_finish( struct draw_pt_middle_end *middle ) { @@ -343,6 +390,7 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) fetch_emit->base.prepare = fetch_emit_prepare; fetch_emit->base.run = fetch_emit_run; fetch_emit->base.run_linear = fetch_emit_run_linear; + fetch_emit->base.run_linear_elts = fetch_emit_run_linear_elts; fetch_emit->base.finish = fetch_emit_finish; fetch_emit->base.destroy = fetch_emit_destroy; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 5265a131605..efa6dddbda8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -310,6 +310,54 @@ fse_run(struct draw_pt_middle_end *middle, } + +static void fse_run_linear_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned alloc_count = align(count, 4); + char *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)alloc_count ); + + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping is done elsewhere -- either by the API or on hardware, + * or for some other reason not required... + */ + fse->active->run_linear( fse->active, + start, count, + hw_verts ); + + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); +} + + + static void fse_finish( struct draw_pt_middle_end *middle ) { } @@ -330,6 +378,7 @@ struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) fse->base.prepare = fse_prepare; fse->base.run = fse_run; fse->base.run_linear = fse_run_linear; + fse->base.run_linear_elts = fse_run_linear_elts; fse->base.finish = fse_finish; fse->base.destroy = fse_destroy; fse->draw = draw; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 06718779a5b..c58a9008679 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -98,7 +98,6 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - static void fetch_pipeline_run( struct draw_pt_middle_end *middle, const unsigned *fetch_elts, unsigned fetch_count, @@ -251,6 +250,84 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, +static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align_int( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run_linear( fpme->fetch, + start, + count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.constants, + count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + FREE(pipeline_verts); +} + + + static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) { /* nothing to do */ @@ -282,6 +359,7 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * fpme->base.prepare = fetch_pipeline_prepare; fpme->base.run = fetch_pipeline_run; fpme->base.run_linear = fetch_pipeline_linear_run; + fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts; fpme->base.finish = fetch_pipeline_finish; fpme->base.destroy = fetch_pipeline_destroy; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 96e02fbf3a9..720b91b8e64 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -36,8 +36,8 @@ #include "draw/draw_pt.h" -#define CACHE_MAX 32 -#define FETCH_MAX 128 +#define CACHE_MAX 1024 +#define FETCH_MAX 4096 #define DRAW_MAX (16*1024) struct vcache_frontend { @@ -201,7 +201,124 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, #define FUNC vcache_run #include "draw_pt_vcache_tmp.h" +static void translate_uint_elts( const unsigned *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + +static void translate_ushort_elts( const ushort *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} +static void translate_ubyte_elts( const ubyte *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + +#if 0 +static enum pipe_format format_from_get_elt( pt_elt_func get_elt ) +{ + switch (draw->pt.user.eltSize) { + case 1: return PIPE_FORMAT_R8_UNORM; + case 2: return PIPE_FORMAT_R16_UNORM; + case 4: return PIPE_FORMAT_R32_UNORM; + default: return PIPE_FORMAT_NONE; + } +} +#endif + +static void vcache_check_run( struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned draw_count ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + struct draw_context *draw = vcache->draw; + unsigned min_index = draw->pt.user.min_index; + unsigned max_index = draw->pt.user.max_index; + unsigned index_size = draw->pt.user.eltSize; + unsigned fetch_count = MAX2(max_index, max_index + 1 - min_index); + const ushort *transformed_elts; + ushort *storage = NULL; + + printf("fetch_count %x\n", fetch_count); + + if (fetch_count >= FETCH_MAX || + fetch_count > draw_count) + goto fail; + + + if (min_index == 0 && + index_size == 2) + { + transformed_elts = (const ushort *)elts; + } + else + { + storage = MALLOC( draw_count * sizeof(ushort) ); + if (!storage) + goto fail; + + switch(index_size) { + case 1: + translate_ubyte_elts( (const ubyte *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + case 2: + translate_ushort_elts( (const ushort *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + case 4: + translate_uint_elts( (const uint *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + default: + assert(0); + return; + } + transformed_elts = storage; + } + + vcache->middle->run_linear_elts( vcache->middle, + min_index, /* start */ + fetch_count, + transformed_elts, + draw_count ); + + FREE(storage); + return; + + fail: + vcache_run( frontend, get_elt, elts, draw_count ); +} @@ -219,7 +336,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, } else { - vcache->base.run = vcache_run; + vcache->base.run = vcache_check_run; } vcache->input_prim = prim; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 045a1f74a95..1e0106b86c7 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -179,6 +179,7 @@ softpipe_create( struct pipe_screen *screen, softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; + softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.set_edgeflags = softpipe_set_edgeflags; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 6c58f9909da..dbecf6865f4 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -108,11 +108,14 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * * XXX should the element buffer be specified/bound with a separate function? */ + boolean -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; @@ -141,11 +144,14 @@ softpipe_draw_elements(struct pipe_context *pipe, void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); } else { /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, NULL); + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } @@ -171,6 +177,19 @@ softpipe_draw_elements(struct pipe_context *pipe, return TRUE; } +boolean +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return softpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + void softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 452e51fa791..701e02b295e 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -171,6 +171,13 @@ boolean softpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count); void softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 0f68f592f77..faf112c6d61 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -76,6 +76,20 @@ struct pipe_context { struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); + + /* XXX: this is (probably) a temporary entrypoint, as the range + * information should be available from the vertex_buffer state. + * Using this to quickly evaluate a specialized path in the draw + * module. + */ + boolean (*draw_range_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); /*@}*/ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index a3bffbfc95b..551860452ae 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -365,14 +365,33 @@ st_draw_vbo(GLcontext *ctx, } /* draw */ - for (i = 0; i < nr_prims; i++) { + if (nr_prims == 1 && pipe->draw_range_elements != NULL) { + i = 0; + + /* XXX: exercise temporary path to pass min/max directly + * through to driver & draw module. These interfaces still + * need a bit of work... + */ setup_edgeflags(ctx, prims[i].mode, prims[i].start + indexOffset, prims[i].count, arrays[VERT_ATTRIB_EDGEFLAG]); - pipe->draw_elements(pipe, indexBuf, indexSize, - prims[i].mode, - prims[i].start + indexOffset, prims[i].count); + pipe->draw_range_elements(pipe, indexBuf, indexSize, + min_index, + max_index, + prims[i].mode, + prims[i].start + indexOffset, prims[i].count); + } + else { + for (i = 0; i < nr_prims; i++) { + setup_edgeflags(ctx, prims[i].mode, + prims[i].start + indexOffset, prims[i].count, + arrays[VERT_ATTRIB_EDGEFLAG]); + + pipe->draw_elements(pipe, indexBuf, indexSize, + prims[i].mode, + prims[i].start + indexOffset, prims[i].count); + } } pipe_reference_buffer(pipe, &indexBuf, NULL); -- cgit v1.2.3 From 79b67d8408d1ba5b2232791ae35e731a5953b52d Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 29 May 2008 12:44:53 +0200 Subject: i915: Remove last usage of surface_alloc_storage --- src/gallium/drivers/i915simple/i915_texture.c | 42 +++++++++--------------- src/gallium/winsys/dri/intel/intel_winsys_pipe.c | 31 +---------------- 2 files changed, 17 insertions(+), 56 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index df11ba0544c..122f88fe3a0 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -104,7 +104,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex, */ } - +#if 0 /* Hack it up to use the old winsys->surface_alloc_storage() * method for now: */ @@ -145,10 +145,7 @@ i915_displaytarget_layout(struct pipe_screen *screen, return tex->buffer != NULL; } - - - - +#endif static void i945_miptree_layout_2d( struct i915_texture *tex ) @@ -539,32 +536,25 @@ i915_texture_create(struct pipe_screen *screen, tex->base.refcount = 1; tex->base.screen = screen; - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { - if (!i915_displaytarget_layout(screen, tex)) - goto fail; + if (i915screen->is_i945) { + if (!i945_miptree_layout(tex)) + goto fail; + } else { + if (!i915_miptree_layout(tex)) + goto fail; } - else { - if (i915screen->is_i945) { - if (!i945_miptree_layout(tex)) - goto fail; - } - else { - if (!i915_miptree_layout(tex)) - goto fail; - } - - tex->buffer = ws->buffer_create(ws, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); - if (!tex->buffer) - goto fail; - } + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); + + if (!tex->buffer) + goto fail; return &tex->base; - fail: +fail: FREE(tex); return NULL; } diff --git a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c index b3e5f29c3e5..a7f3047352c 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c @@ -232,36 +232,7 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, unsigned flags, unsigned tex_usage) { - const unsigned alignment = 64; - assert(!surf->buffer); -#if 0 - surf->width = width; - surf->height = round_up(height, 8); - surf->format = format; - surf->cpp = pf_get_size(format); - surf->pitch = power_of_two(MAX2(width * surf->cpp, 512)); - - surf->buffer = winsys->buffer_create(winsys, 2*4096, - PIPE_BUFFER_USAGE_PIXEL, - surf->pitch * surf->height); - - surf->pitch = surf->pitch / surf->cpp; - surf->height = height; -#else - surf->width = width; - surf->height = height; - surf->format = format; - surf->cpp = pf_get_size(format); - surf->pitch = round_up(width, alignment / surf->cpp); - - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->pitch * surf->cpp * surf->height); -#endif - if(!surf->buffer) - return -1; - - return 0; + return -1; } -- cgit v1.2.3 From bb2e13b9e82b68ec3b9fc56a4c35e7ead8fd138f Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Thu, 29 May 2008 12:38:49 +0100 Subject: draw: make sure constant buffer data is aligned before passing to aos.c --- src/gallium/auxiliary/draw/draw_context.c | 5 +++-- src/gallium/auxiliary/draw/draw_context.h | 3 ++- src/gallium/auxiliary/draw/draw_private.h | 9 ++++++++- src/gallium/auxiliary/draw/draw_vs.c | 19 ++++++++++++++++++- src/gallium/auxiliary/draw/draw_vs_aos.c | 4 ++-- src/gallium/drivers/i915simple/i915_context.c | 4 +++- src/gallium/drivers/softpipe/sp_draw_arrays.c | 5 +++-- src/mesa/state_tracker/st_draw.c | 7 ++++--- 8 files changed, 43 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index bcec85c2ef8..2f263cf06a9 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -217,10 +217,11 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw, void draw_set_mapped_constant_buffer(struct draw_context *draw, - const void *buffer) + const void *buffer, + unsigned size ) { draw->pt.user.constants = buffer; - draw_vs_set_constants( draw, (const float (*)[4])buffer ); + draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 8dd03cb79ef..b8f2bfa3327 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -133,7 +133,8 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); void draw_set_mapped_constant_buffer(struct draw_context *draw, - const void *buffer); + const void *buffer, + unsigned size ); void draw_set_edgeflags( struct draw_context *draw, const unsigned *edgeflag ); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 40f1d978f21..88a7224b623 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -190,6 +190,12 @@ struct draw_context struct aos_machine *aos_machine; + const float (*aligned_constants)[4]; + + const float (*aligned_constant_storage)[4]; + unsigned const_storage_size; + + struct translate *fetch; struct translate_cache *fetch_cache; struct translate *emit; @@ -225,7 +231,8 @@ void draw_vs_set_viewport( struct draw_context *, const struct pipe_viewport_state * ); void draw_vs_set_constants( struct draw_context *, - const float (*constants)[4] ); + const float (*constants)[4], + unsigned size ); diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index a8b6d0c90d2..ce35112fc10 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -43,8 +43,22 @@ void draw_vs_set_constants( struct draw_context *draw, - const float (*constants)[4] ) + const float (*constants)[4], + unsigned size ) { + if (((unsigned)constants) & 0xf) { + if (size > draw->vs.const_storage_size) { + if (draw->vs.aligned_constant_storage) + align_free(draw->vs.aligned_constant_storage); + draw->vs.aligned_constant_storage = align_malloc( size, 16 ); + } + memcpy( draw->vs.aligned_constant_storage, + constants, + size ); + constants = draw->vs.aligned_constant_storage; + } + + draw->vs.aligned_constants = constants; draw_vs_aos_machine_constants( draw->vs.aos_machine, constants ); } @@ -159,6 +173,9 @@ draw_vs_destroy( struct draw_context *draw ) if (draw->vs.aos_machine) draw_vs_aos_machine_destroy(draw->vs.aos_machine); + if (draw->vs.aligned_constant_storage) + align_free(draw->vs.aligned_constant_storage); + tgsi_exec_machine_free_data(&draw->vs.machine); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index b5e4e1e7b1a..55cabb6df9a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1957,7 +1957,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, struct aos_machine *machine = vaos->draw->vs.aos_machine; machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - machine->constants = (const float (*)[4])vaos->draw->pt.user.constants; + machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; machine->attrib = vaos->attrib; @@ -1976,7 +1976,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, struct aos_machine *machine = vaos->draw->vs.aos_machine; machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - machine->constants = (const float (*)[4])vaos->draw->pt.user.constants; + machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; machine->attrib = vaos->attrib; diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 4bef21619c2..c609d16a5af 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -86,7 +86,9 @@ i915_draw_elements( struct pipe_context *pipe, draw_set_mapped_constant_buffer(draw, - i915->current.constants[PIPE_SHADER_VERTEX]); + i915->current.constants[PIPE_SHADER_VERTEX], + ( i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float) )); /* draw! */ draw_arrays(i915->draw, prim, start, count); diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index dbecf6865f4..d4d5fa744fb 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -54,7 +54,8 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) } draw_set_mapped_constant_buffer(sp->draw, - sp->mapped_constants[PIPE_SHADER_VERTEX]); + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[i].size); } static void @@ -68,7 +69,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, NULL); + draw_set_mapped_constant_buffer(sp->draw, NULL, 0); for (i = 0; i < 2; i++) { if (sp->constants[i].size) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 551860452ae..5300848ef6a 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -596,9 +596,10 @@ st_feedback_draw_vbo(GLcontext *ctx, /* map constant buffers */ mapped_constants = pipe_buffer_map(pipe, - st->state.constants[PIPE_SHADER_VERTEX].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(st->draw, mapped_constants); + st->state.constants[PIPE_SHADER_VERTEX].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(st->draw, mapped_constants, + st->state.constants[PIPE_SHADER_VERTEX].buffer->size); /* draw here */ -- cgit v1.2.3 From 056bea86b013858832a6ab390fe6efe310a7bc8f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 29 May 2008 15:49:23 +0200 Subject: i915: Deprecated pipe_winsys buffer functions --- src/gallium/drivers/i915simple/i915_texture.c | 2 +- src/gallium/winsys/dri/intel/intel_winsys_pipe.c | 45 ++++-------------------- 2 files changed, 8 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 122f88fe3a0..935bc742b11 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -619,7 +619,7 @@ i915_get_tex_surface(struct pipe_screen *screen, assert(zslice == 0); } - ps = ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface);//ws->surface_alloc(ws); if (ps) { assert(ps->refcount); assert(ps->winsys); diff --git a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c index a7f3047352c..af439a796f9 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_pipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_pipe.c @@ -190,40 +190,16 @@ intel_flush_frontbuffer( struct pipe_winsys *winsys, intelDisplaySurface(dPriv, surf, NULL); } - +/* + * Deprecated surface functions + */ static struct pipe_surface * intel_i915_surface_alloc(struct pipe_winsys *winsys) { - struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface); - if (surf) { - surf->refcount = 1; - surf->winsys = winsys; - } - return surf; + assert("intel_i915_surface_alloc is deprecated" & 0); + return NULL; } - -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - -static unsigned -power_of_two(unsigned x) -{ - int value = 1; - while (value <= x) - value = value << 1; - return value; -} - -/** - * Copied from xm_winsys.c - */ static int intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, struct pipe_surface *surf, @@ -232,21 +208,14 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, unsigned flags, unsigned tex_usage) { + assert("intel_i915_surface_alloc_storage is deprecated" & 0); return -1; } - static void intel_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) { - struct pipe_surface *surf = *s; - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + assert("intel_i915_surface_release is deprecated" & 0); } -- cgit v1.2.3 From 529b3f4cc0ce3a3219daf5e1e8d4248cd1afe286 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 29 May 2008 19:07:40 +0200 Subject: i915: Fix GPU lockup on resize --- src/gallium/drivers/i915simple/i915_texture.c | 66 ++++++++++++++------------- 1 file changed, 35 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 935bc742b11..b216e3bfb95 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -104,48 +104,49 @@ i915_miptree_set_image_offset(struct i915_texture *tex, */ } -#if 0 -/* Hack it up to use the old winsys->surface_alloc_storage() - * method for now: +static unsigned +power_of_two(unsigned x) +{ + int value = 1; + while (value <= x) + value = value << 1; + return value; +} + +static unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + +/** + * Special case to deal with display targets. */ static boolean i915_displaytarget_layout(struct pipe_screen *screen, struct i915_texture *tex) { - struct pipe_winsys *ws = screen->winsys; - struct pipe_surface surf; - unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE); - - - memset(&surf, 0, sizeof(surf)); - - ws->surface_alloc_storage( ws, - &surf, - tex->base.width[0], - tex->base.height[0], - tex->base.format, - flags, - tex->base.tex_usage); - - /* Now extract the goodies: - */ + struct pipe_texture *pt = &tex->base; + + if (pt->last_level > 1 || pt->cpp != 4) + return 0; + i915_miptree_set_level_info( tex, 0, 1, 0, 0, tex->base.width[0], tex->base.height[0], 1 ); i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); - tex->buffer = surf.buffer; - tex->pitch = surf.pitch; - tex->total_height = 0; - +#if 0 + tex->pitch = MAX2(512, power_of_two(tex->base.width[0] * pt->cpp)) / pt->cpp; + tex->total_height = round_up(tex->base.height[0], 8); +#else + tex->pitch = round_up(tex->base.width[0], 64 / pt->cpp); + tex->total_height = tex->base.height[0]; +#endif - return tex->buffer != NULL; + return 1; } -#endif static void i945_miptree_layout_2d( struct i915_texture *tex ) @@ -529,14 +530,17 @@ i915_texture_create(struct pipe_screen *screen, struct pipe_winsys *ws = screen->winsys; struct i915_texture *tex = CALLOC_STRUCT(i915_texture); - if (!tex) + if (!tex) return NULL; tex->base = *templat; tex->base.refcount = 1; tex->base.screen = screen; - if (i915screen->is_i945) { + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if (!i915_displaytarget_layout(screen, tex)) + goto fail; + } else if (i915screen->is_i945) { if (!i945_miptree_layout(tex)) goto fail; } else { -- cgit v1.2.3 From 996b549fdbfe772ee56a51858e81e93bccaae5c5 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 30 May 2008 11:04:55 +1000 Subject: nv40: a couple of memory leaks --- src/gallium/drivers/nv40/nv40_state_viewport.c | 3 ++- src/gallium/drivers/nv40/nv40_vertprog.c | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 1b6248e5b8e..869a55b4053 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -3,8 +3,8 @@ static boolean nv40_state_viewport_validate(struct nv40_context *nv40) { - struct nouveau_stateobj *so = so_new(11, 0); struct pipe_viewport_state *vpt = &nv40->viewport; + struct nouveau_stateobj *so; unsigned bypass; if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping) @@ -18,6 +18,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40) return FALSE; nv40->state.viewport_bypass = bypass; + so = so_new(11, 0); if (!bypass) { so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index e10250528e2..eb14869bfe0 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -843,6 +843,12 @@ nv40_vertprog_translate(struct nv40_context *nv40, vp->translated = TRUE; out_err: tgsi_parse_free(&parse); + if (vpc->r_temp) + FREE(vpc->r_temp); + if (vpc->r_address) + FREE(vpc->r_address); + if (vpc->imm) + FREE(vpc->imm); FREE(vpc); } -- cgit v1.2.3 From 2ade5268dca67a73d3f5f8cc41ea86d1e48de9f0 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 30 May 2008 13:31:42 +0200 Subject: i915: Remade texture allocation code again --- src/gallium/drivers/i915simple/i915_texture.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index b216e3bfb95..4c0c7ce3f3e 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -128,7 +128,7 @@ i915_displaytarget_layout(struct pipe_screen *screen, { struct pipe_texture *pt = &tex->base; - if (pt->last_level > 1 || pt->cpp != 4) + if (pt->last_level > 0 || pt->cpp != 4) return 0; i915_miptree_set_level_info( tex, 0, 1, 0, 0, @@ -177,7 +177,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) /* Pitch must be a whole number of dwords, even though we * express it in texels. */ - tex->pitch = align_int(tex->pitch * pt->cpp, 4) / pt->cpp; + tex->pitch = align_int(tex->pitch * pt->cpp, 64) / pt->cpp; tex->total_height = 0; for (level = 0; level <= pt->last_level; level++) { @@ -537,10 +537,7 @@ i915_texture_create(struct pipe_screen *screen, tex->base.refcount = 1; tex->base.screen = screen; - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { - if (!i915_displaytarget_layout(screen, tex)) - goto fail; - } else if (i915screen->is_i945) { + if (i915screen->is_i945) { if (!i945_miptree_layout(tex)) goto fail; } else { -- cgit v1.2.3 From b12a31f59d80a1d5703181b00b42df3fa712497b Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 29 May 2008 16:57:34 +0200 Subject: i915: Now using draw_elements_range --- src/gallium/drivers/i915simple/i915_context.c | 29 +++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index c609d16a5af..81cab757381 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -50,10 +50,12 @@ static void i915_destroy( struct pipe_context *pipe ) static boolean -i915_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) +i915_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) { struct i915_context *i915 = i915_context( pipe ); struct draw_context *draw = i915->draw; @@ -77,7 +79,10 @@ i915_draw_elements( struct pipe_context *pipe, void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); } else { /* no index/element buffer */ @@ -102,12 +107,23 @@ i915_draw_elements( struct pipe_context *pipe, } if (indexBuffer) { pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); - draw_set_mapped_element_buffer(draw, 0, NULL); + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } return TRUE; } +static boolean +i915_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count ); +} static boolean i915_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) @@ -138,6 +154,7 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, i915->pipe.draw_arrays = i915_draw_arrays; i915->pipe.draw_elements = i915_draw_elements; + i915->pipe.draw_range_elements = i915_draw_range_elements; /* * Create drawing context and plug our rendering stage into it. -- cgit v1.2.3 From 12bb9075221da6bf8121f3d705888f29ca11cc0f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 30 May 2008 15:07:58 -0600 Subject: gallium: fix some PIPE_FORMAT_Z24S8_UNORM bugs in softpipe driver --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 2 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index a1859f98832..33888abcc5e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -230,7 +230,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) int x = quad->x0 % TILE_SIZE + (j & 1); int y = quad->y0 % TILE_SIZE + (j >> 1); uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 24); + z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); tile->data.depth32[y][x] = z24s8; } break; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 28c29da87c9..0e4c8c41eea 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -169,6 +169,7 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, PIPE_BUFFER_USAGE_CPU_WRITE); tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || + ps->format == PIPE_FORMAT_Z24S8_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM || ps->format == PIPE_FORMAT_Z32_UNORM || ps->format == PIPE_FORMAT_S8_UNORM); -- cgit v1.2.3 From 45b2c23d7a2f1bd723d0719b13470125de09c243 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 31 May 2008 18:08:24 +0900 Subject: i915: Eliminate void pointer arithmetic. --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index faf5971abbb..cdbb5feb232 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -142,7 +142,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, i915->vbo_offset = i915_render->vbo_offset; i915->dirty |= I915_NEW_VBO; - return i915_render->vbo_ptr + i915->vbo_offset; + return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; } -- cgit v1.2.3 From 9b3c1582befd0c64d9b48f7ed39566f9ae2e24d9 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Sat, 31 May 2008 18:42:17 +0200 Subject: i915: Add mising include. --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index cdbb5feb232..f9297e29fcf 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -38,6 +38,7 @@ */ +#include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "pipe/p_debug.h" #include "pipe/p_util.h" -- cgit v1.2.3 From 140e0e071890f24653e333e309778f6242c8d8f9 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Sat, 31 May 2008 18:44:47 +0200 Subject: i915: Comment out dead & problematic code. --- src/gallium/drivers/i915simple/i915_texture.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 4c0c7ce3f3e..f0d00280c23 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -104,6 +104,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex, */ } +#if 0 static unsigned power_of_two(unsigned x) { @@ -112,6 +113,7 @@ power_of_two(unsigned x) value = value << 1; return value; } +#endif static unsigned round_up(unsigned n, unsigned multiple) -- cgit v1.2.3 From 347d28fd20645674c3509b9fb8ebf8c31a24c239 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Sat, 31 May 2008 19:51:50 +0200 Subject: cell: Fix build after TGSI declaration interface changes. --- src/gallium/drivers/cell/spu/spu_exec.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 48edc62f49b..69b05261209 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -830,13 +830,11 @@ exec_declaration(struct spu_exec_machine *mach, unsigned first, last, mask; interpolation_func interp; - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; - switch( decl->Interpolation.Interpolate ) { + switch( decl->Declaration.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: interp = constant_interpolation; break; -- cgit v1.2.3 From 99b46555499005bd9454fb4a91d28d4e7d93dba4 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Sat, 31 May 2008 19:52:41 +0200 Subject: i915: Fix build after TGSI declaration interface changes. --- src/gallium/drivers/i915simple/i915_fpc_translate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 3ccf74c72c4..48f3ceb2e88 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -943,8 +943,8 @@ i915_translate_instructions(struct i915_fp_compile *p, if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { uint i; - for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; i++) { assert(ifs->constant_flags[i] == 0x0); ifs->constant_flags[i] = I915_CONSTFLAG_USER; @@ -954,8 +954,8 @@ i915_translate_instructions(struct i915_fp_compile *p, else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; i++) { assert(i < I915_MAX_TEMPORARY); /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ -- cgit v1.2.3 From 01122116144619a93c7ebb852eaffb3a6c96fe67 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Sat, 31 May 2008 19:54:20 +0200 Subject: brw: Fix build after TGSI declaration interface changes. --- src/gallium/drivers/i965simple/brw_sf.c | 10 +++++----- src/gallium/drivers/i965simple/brw_shader_info.c | 4 +--- src/gallium/drivers/i965simple/brw_vs_emit.c | 6 ++---- src/gallium/drivers/i965simple/brw_wm_decl.c | 8 +++----- 4 files changed, 11 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c index c3b815a82be..96f8fb87a3d 100644 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -169,9 +169,9 @@ static void upload_sf_prog( struct brw_context *brw ) case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) { - int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; - int interp_mode = parse.FullToken.FullDeclaration.Interpolation.Interpolate; + int first = parse.FullToken.FullDeclaration.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; + int interp_mode = parse.FullToken.FullDeclaration.Declaration.Interpolate; //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; @@ -291,8 +291,8 @@ static void update_sf_linkage( struct brw_context *brw ) case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) { - int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + int first = parse.FullToken.FullDeclaration.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; for (i = first; i < last; i++) { vp_semantic[i].semantic = diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index f4694a4433b..fb3da92421e 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -26,9 +26,7 @@ void brw_shader_info(const struct tgsi_token *tokens, case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - unsigned last = decl->u.DeclarationRange.Last; - - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + unsigned last = decl->DeclarationRange.Last; // Broken by crazy wpos init: //assert( info->nr_regs[decl->Declaration.File] <= last); diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 9020fcc0012..a1432fece13 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -988,10 +988,8 @@ post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) static void process_declaration(const struct tgsi_full_declaration *decl, struct brw_prog_info *info) { - int first = decl->u.DeclarationRange.First; - int last = decl->u.DeclarationRange.Last; - - assert (decl->Declaration.Declare != TGSI_DECLARE_MASK); + int first = decl->DeclarationRange.First; + int last = decl->DeclarationRange.Last; switch(decl->Declaration.File) { case TGSI_FILE_CONSTANT: diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index 74ccfd494a4..bf1b4d961a7 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -351,18 +351,16 @@ void brw_wm_emit_decls(struct brw_wm_compile *c) case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - unsigned first = decl->u.DeclarationRange.First; - unsigned last = decl->u.DeclarationRange.Last; + unsigned first = decl->DeclarationRange.First; + unsigned last = decl->DeclarationRange.Last; unsigned mask = decl->Declaration.UsageMask; /* ? */ unsigned i; if (decl->Declaration.File != TGSI_FILE_INPUT) break; - assert(decl->Declaration.Interpolate); - for( i = first; i <= last; i++ ) { - switch (decl->Interpolation.Interpolate) { + switch (decl->Declaration.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: emit_cinterp(c, i, mask); break; -- cgit v1.2.3 From dc6068a8bcd66e2cbcf76962c70ba202e0078a49 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Mon, 2 Jun 2008 11:40:44 +0200 Subject: cell: SWZ no longer aliases MOV. --- src/gallium/drivers/cell/spu/spu_exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 69b05261209..3a80df427da 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -896,7 +896,7 @@ exec_instruction( break; case TGSI_OPCODE_MOV: - /* TGSI_OPCODE_SWZ */ + case TGSI_OPCODE_SWZ: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); STORE( &r[0], 0, chan_index ); -- cgit v1.2.3 From dfd30b878680dd6dca96928a06a301b837b7a650 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Mon, 2 Jun 2008 11:41:17 +0200 Subject: i915: SWZ no longer aliases MOV. --- src/gallium/drivers/i915simple/i915_fpc_translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 48f3ceb2e88..23cd909337e 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -676,7 +676,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MOV: - /* aka TGSI_OPCODE_SWZ */ + case TGSI_OPCODE_SWZ: emit_simple_arith(p, inst, A0_MOV, 1); break; -- cgit v1.2.3 From 49ed85d6b1cdb74a7985e2d743635c73151bbfdb Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Mon, 2 Jun 2008 11:42:16 +0200 Subject: brw: SWZ no longer aliases MOV. --- src/gallium/drivers/i965simple/brw_vs_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index a1432fece13..81423e2d7d5 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -1135,8 +1135,8 @@ static void process_instruction(struct brw_vs_compile *c, emit_min(p, dst, args[0], args[1]); break; case TGSI_OPCODE_MOV: -#if 0 case TGSI_OPCODE_SWZ: +#if 0 /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ -- cgit v1.2.3 From f4364cd1a6f9e825ea183c1fa6d1050b7e113695 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 30 May 2008 18:46:40 +0200 Subject: i915: Fixed initialization of surface --- src/gallium/drivers/i915simple/i915_texture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index f0d00280c23..16354dce509 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -624,8 +624,8 @@ i915_get_tex_surface(struct pipe_screen *screen, ps = CALLOC_STRUCT(pipe_surface);//ws->surface_alloc(ws); if (ps) { - assert(ps->refcount); - assert(ps->winsys); + ps->refcount = 1; + ps->winsys = ws; pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; -- cgit v1.2.3 From c1949e2bd3acc45c23cc434eef2b0d6aae9092ca Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@aurora.(none)> Date: Mon, 2 Jun 2008 12:55:35 +0200 Subject: i915: Fixed some warnings --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 4 +--- src/gallium/drivers/i915simple/i915_state_emit.c | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index f9297e29fcf..adfb16fbc5f 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -253,7 +253,6 @@ draw_arrays_fallback( struct vbuf_render *render, { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct pipe_winsys *winsys = i915->pipe.winsys; unsigned nr_indices; if (i915->dirty) @@ -372,7 +371,6 @@ i915_vbuf_render_draw( struct vbuf_render *render, { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct pipe_winsys *winsys = i915->pipe.winsys; unsigned save_nr_indices; save_nr_indices = nr_indices; @@ -424,7 +422,6 @@ i915_vbuf_render_release_vertices( struct vbuf_render *render, { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct pipe_winsys *winsys = i915->pipe.winsys; size_t size = (size_t)vertex_size * (size_t)vertices_used; assert(i915->vbo); @@ -502,6 +499,7 @@ struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ) render->destroy(render); return NULL; } + /** TODO JB: this shouldn't be here */ draw_set_render(i915->draw, render); return stage; diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 8bcc26ad356..de0c1a787ab 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -377,6 +377,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) { uint w, h; boolean k = framebuffer_size(&i915->framebuffer, &w, &h); + (void)k; assert(k); OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); -- cgit v1.2.3 From e0860518dfb5a5c6ba6584e3c1b5d7b203277dac Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 2 Jun 2008 22:31:02 +0900 Subject: gallium: Replace XSTDCALL by PIPE_CDECL. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 38 +++++++---------------------- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/include/pipe/p_compiler.h | 16 +----------- 4 files changed, 12 insertions(+), 46 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index f638208bf58..0aa0c9a76b8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -50,7 +50,7 @@ #define SSE_MAX_VERTICES 4 -typedef void (XSTDCALL *codegen_function) ( +typedef void (PIPE_CDECL *codegen_function) ( const struct tgsi_exec_vector *input, /* 1 */ struct tgsi_exec_vector *output, /* 2 */ float (*constant)[4], /* 3 */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index cdce5ea9c52..cdbdf5c8827 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -194,22 +194,12 @@ get_coef( } -#ifdef WIN32 -static void -emit_retw( - struct x86_function *func, - unsigned short size ) -{ - x86_retw( func, size ); -} -#else static void emit_ret( struct x86_function *func ) { x86_ret( func ); } -#endif /** @@ -475,7 +465,7 @@ static void emit_func_call_dst( struct x86_function *func, unsigned xmm_dst, - void (*code)() ) + void (PIPE_CDECL *code)() ) { sse_movaps( func, @@ -496,9 +486,7 @@ emit_func_call_dst( x86_push( func, ecx ); x86_mov_reg_imm( func, ecx, (unsigned long) code ); x86_call( func, ecx ); -#ifndef WIN32 x86_pop(func, ecx ); -#endif } @@ -516,7 +504,7 @@ emit_func_call_dst_src( struct x86_function *func, unsigned xmm_dst, unsigned xmm_src, - void (*code)() ) + void (PIPE_CDECL *code)() ) { sse_movaps( func, @@ -558,7 +546,7 @@ emit_add( make_xmm( xmm_src ) ); } -static void XSTDCALL +static void PIPE_CDECL cos4f( float *store ) { @@ -581,7 +569,7 @@ emit_cos( cos4f ); } -static void XSTDCALL +static void PIPE_CDECL ex24f( float *store ) { @@ -615,7 +603,7 @@ emit_f2it( make_xmm( xmm ) ); } -static void XSTDCALL +static void PIPE_CDECL flr4f( float *store ) { @@ -638,7 +626,7 @@ emit_flr( flr4f ); } -static void XSTDCALL +static void PIPE_CDECL frc4f( float *store ) { @@ -661,7 +649,7 @@ emit_frc( frc4f ); } -static void XSTDCALL +static void PIPE_CDECL lg24f( float *store ) { @@ -720,7 +708,7 @@ emit_neg( TGSI_EXEC_TEMP_80000000_C ) ); } -static void XSTDCALL +static void PIPE_CDECL pow4f( float *store ) { @@ -820,7 +808,7 @@ emit_setsign( TGSI_EXEC_TEMP_80000000_C ) ); } -static void XSTDCALL +static void PIPE_CDECL sin4f( float *store ) { @@ -1736,11 +1724,7 @@ emit_instruction( break; case TGSI_OPCODE_RET: -#ifdef WIN32 - emit_retw( func, 16 ); -#else emit_ret( func ); -#endif break; case TGSI_OPCODE_END: @@ -2281,11 +2265,7 @@ tgsi_emit_sse2( func, get_immediate_base() ); -#ifdef WIN32 - emit_retw( func, 16 ); -#else emit_ret( func ); -#endif tgsi_parse_free( &parse ); diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 55741cc1df8..69f7f960aa1 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -46,7 +46,7 @@ /* Surely this should be defined somewhere in a tgsi header: */ -typedef void (XSTDCALL *codegen_function)( +typedef void (PIPE_CDECL *codegen_function)( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, const float (*constant)[4], diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index f6707385cd8..b14260dd901 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -128,7 +128,7 @@ typedef unsigned char boolean; /* This should match linux gcc cdecl semantics everywhere, so that we * just codegen one calling convention on all platforms. */ -#ifdef WIN32 +#ifdef _MSC_VER #define PIPE_CDECL __cdecl #else #define PIPE_CDECL @@ -148,18 +148,4 @@ typedef unsigned char boolean; -/** - * For calling code-gen'd functions, phase out in favor of - * PIPE_CDECL, above, which really means cdecl on all platforms, not - * like the below... - */ -#if !defined(XSTDCALL) -#if defined(WIN32) -#define XSTDCALL __stdcall /* phase this out */ -#else -#define XSTDCALL /* XXX: NOTE! not STDCALL! */ -#endif -#endif - - #endif /* P_COMPILER_H */ -- cgit v1.2.3 From 4ee14279f3a466093869f1f40819e6c6d5af378d Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@aurora.(none)> Date: Mon, 2 Jun 2008 14:55:06 +0200 Subject: i915: Rework of batchbuffer code --- src/gallium/drivers/i915simple/i915_batch.h | 92 +++++++++++++++++++++++++-- src/gallium/drivers/i915simple/i915_context.c | 3 +- src/gallium/drivers/i915simple/i915_context.h | 4 +- src/gallium/drivers/i915simple/i915_debug.c | 2 +- 4 files changed, 94 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index 4ea06ce02bf..6c62e84bc38 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -31,23 +31,107 @@ #include "i915_winsys.h" #include "i915_debug.h" +struct i915_batchbuffer +{ + struct pipe_buffer *buffer; + struct i915_winsys *winsys; + + unsigned char *map; + unsigned char *ptr; + + size_t size; + + size_t relocs; + size_t max_relocs; +}; + +static INLINE boolean +i915_batchbuffer_check( struct i915_batchbuffer *batch, + size_t dwords, + size_t relocs ) +{ +#if 0 /* To be used */ + /** TODO JB: Check relocs */ + return dwords * 4 <= batch->size - (batch->ptr - batch->map); +#else + if (batch->winsys->batch_start( batch->winsys, dwords, relocs )) + return 1; + return 0; +#endif +} + +static INLINE size_t +i915_batchbuffer_space( struct i915_batchbuffer *batch ) +{ +#if 0 /* To be used */ + return batch->size - (batch->ptr - batch->map); +#else + return 0; +#endif +} + +static INLINE void +i915_batchbuffer_dword( struct i915_batchbuffer *batch, + unsigned dword ) +{ +#if 0 /* To be used */ + if (i915_batchbuffer_space(batch) < 4) + return; + + *(unsigned *)batch->ptr = dword; + batch->ptr += 4; +#else + batch->winsys->batch_dword( batch->winsys, dword ); +#endif +} + +static INLINE void +i915_batchbuffer_write( struct i915_batchbuffer *batch, + void *data, + size_t size ) +{ +#if 0 /* To be used */ + if (i915_batchbuffer_space(batch) < size) + return; + + memcpy(data, batch->ptr, size); + batch->ptr += size; +#else +#endif +} + +static INLINE void +i915_batchbuffer_reloc( struct i915_batchbuffer *batch, + struct pipe_buffer *buffer, + size_t flags, + size_t offset ) +{ + batch->winsys->batch_reloc( batch->winsys, buffer, flags, offset ); +} + +static INLINE void +i915_batchbuffer_flush( struct i915_batchbuffer *batch, + struct pipe_fence_handle **fence ) +{ + batch->winsys->batch_flush( batch->winsys, fence ); +} + #define BATCH_LOCALS #define BEGIN_BATCH( dwords, relocs ) \ - (i915->batch_start = i915->winsys->batch_start( i915->winsys, dwords, relocs )) + (i915_batchbuffer_check( i915->batch, dwords, relocs )) #define OUT_BATCH( dword ) \ - i915->winsys->batch_dword( i915->winsys, dword ) + i915_batchbuffer_dword( i915->batch, dword ) #define OUT_RELOC( buf, flags, delta ) \ - i915->winsys->batch_reloc( i915->winsys, buf, flags, delta ) + i915_batchbuffer_reloc( i915->batch, buf, flags, delta ) #define ADVANCE_BATCH() #define FLUSH_BATCH(fence) do { \ if (0) i915_dump_batchbuffer( i915 ); \ i915->winsys->batch_flush( i915->winsys, fence ); \ - i915->batch_start = NULL; \ i915->hardware_dirty = ~0; \ } while (0) diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 81cab757381..378e4a5d4db 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -181,7 +181,8 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, /* Batch stream debugging is a bit hacked up at the moment: */ - i915->batch_start = NULL; + i915->batch = CALLOC_STRUCT(i915_batchbuffer); + i915->batch->winsys = i915_winsys; return &i915->pipe; } diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 2da90ae49d9..2ee0381648d 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -209,6 +209,8 @@ struct i915_texture { struct pipe_buffer *buffer; }; +struct i915_batchbuffer; + struct i915_context { struct pipe_context pipe; @@ -241,7 +243,7 @@ struct i915_context unsigned num_vertex_elements; unsigned num_vertex_buffers; - unsigned *batch_start; + struct i915_batchbuffer *batch; /** Vertex buffer */ struct pipe_buffer *vbo; diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index 9b9111167f8..a121dc0af48 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -861,7 +861,7 @@ void i915_dump_batchbuffer( struct i915_context *i915 ) { struct debug_stream stream; - unsigned *start = i915->batch_start; + unsigned *start = 0;/*i915->batch_start;*/ unsigned *end = i915->winsys->batch_start( i915->winsys, 0, 0 ); unsigned long bytes = (unsigned long) (end - start) * 4; boolean done = FALSE; -- cgit v1.2.3 From 7cc23a9eaebc788ae34f6e06c6227524d08a7693 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Mon, 2 Jun 2008 17:22:45 +0200 Subject: i915: Implement and use the reworked batchbuffer code --- src/gallium/drivers/i915simple/i915_batch.h | 20 ++--------- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_debug.c | 3 +- src/gallium/drivers/i915simple/i915_winsys.h | 22 +++++------- src/gallium/winsys/dri/intel/intel_batchbuffer.c | 46 ++++++++++++------------ src/gallium/winsys/dri/intel/intel_batchbuffer.h | 19 +++++----- src/gallium/winsys/dri/intel/intel_winsys_i915.c | 35 +++--------------- 7 files changed, 52 insertions(+), 95 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index 6c62e84bc38..9379d1fb5de 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -40,6 +40,7 @@ struct i915_batchbuffer unsigned char *ptr; size_t size; + size_t actual_size; size_t relocs; size_t max_relocs; @@ -50,39 +51,25 @@ i915_batchbuffer_check( struct i915_batchbuffer *batch, size_t dwords, size_t relocs ) { -#if 0 /* To be used */ /** TODO JB: Check relocs */ return dwords * 4 <= batch->size - (batch->ptr - batch->map); -#else - if (batch->winsys->batch_start( batch->winsys, dwords, relocs )) - return 1; - return 0; -#endif } static INLINE size_t i915_batchbuffer_space( struct i915_batchbuffer *batch ) { -#if 0 /* To be used */ return batch->size - (batch->ptr - batch->map); -#else - return 0; -#endif } static INLINE void i915_batchbuffer_dword( struct i915_batchbuffer *batch, unsigned dword ) { -#if 0 /* To be used */ if (i915_batchbuffer_space(batch) < 4) return; *(unsigned *)batch->ptr = dword; batch->ptr += 4; -#else - batch->winsys->batch_dword( batch->winsys, dword ); -#endif } static INLINE void @@ -90,14 +77,11 @@ i915_batchbuffer_write( struct i915_batchbuffer *batch, void *data, size_t size ) { -#if 0 /* To be used */ if (i915_batchbuffer_space(batch) < size) return; memcpy(data, batch->ptr, size); batch->ptr += size; -#else -#endif } static INLINE void @@ -135,4 +119,4 @@ i915_batchbuffer_flush( struct i915_batchbuffer *batch, i915->hardware_dirty = ~0; \ } while (0) -#endif +#endif diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 378e4a5d4db..243b4a68522 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -181,7 +181,7 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, /* Batch stream debugging is a bit hacked up at the moment: */ - i915->batch = CALLOC_STRUCT(i915_batchbuffer); + i915->batch = i915_winsys->batch_get(i915_winsys); i915->batch->winsys = i915_winsys; return &i915->pipe; diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index a121dc0af48..5e26d1b9057 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -861,8 +861,9 @@ void i915_dump_batchbuffer( struct i915_context *i915 ) { struct debug_stream stream; + /* TODO fix me */ unsigned *start = 0;/*i915->batch_start;*/ - unsigned *end = i915->winsys->batch_start( i915->winsys, 0, 0 ); + unsigned *end = 0;/*i915->winsys->batch_start( i915->winsys, 0, 0 );*/ unsigned long bytes = (unsigned long) (end - start) * 4; boolean done = FALSE; diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index 5e16543f4ec..9afaa16a62c 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -55,6 +55,7 @@ extern "C" { * etc. */ +struct i915_batchbuffer; struct pipe_buffer; struct pipe_fence_handle; struct pipe_winsys; @@ -75,20 +76,10 @@ struct pipe_screen; struct i915_winsys { /** - * Reserve space on batch buffer. - * - * Returns a null pointer if there is insufficient space in the batch buffer - * to hold the requested number of dwords and relocations. - * - * The number of dwords should also include the number of relocations. + * Get the current batch buffer from the winsys. */ - unsigned *(*batch_start)( struct i915_winsys *sws, - unsigned dwords, - unsigned relocs ); - - void (*batch_dword)( struct i915_winsys *sws, - unsigned dword ); - + struct i915_batchbuffer *(*batch_get)( struct i915_winsys *sws ); + /** * Emit a relocation to a buffer. * @@ -103,7 +94,10 @@ struct i915_winsys { struct pipe_buffer *buf, unsigned access_flags, unsigned delta ); - + + /** + * Flush the batch. + */ void (*batch_flush)( struct i915_winsys *sws, struct pipe_fence_handle **fence ); }; diff --git a/src/gallium/winsys/dri/intel/intel_batchbuffer.c b/src/gallium/winsys/dri/intel/intel_batchbuffer.c index 09d4eef4844..aa33c120453 100644 --- a/src/gallium/winsys/dri/intel/intel_batchbuffer.c +++ b/src/gallium/winsys/dri/intel/intel_batchbuffer.c @@ -65,8 +65,10 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) driBOUnrefUserList(batch->list); driBOResetList(batch->list); - batch->size = batch->intel->intelScreen->max_batch_size; - driBOData(batch->buffer, batch->size, NULL, NULL, 0); + /* base.size is the size available to the i915simple driver */ + batch->base.size = batch->intel->intelScreen->max_batch_size - BATCH_RESERVED; + batch->base.actual_size = batch->intel->intelScreen->max_batch_size; + driBOData(batch->buffer, batch->base.actual_size, NULL, NULL, 0); /* * Add the batchbuffer to the validate list. @@ -105,9 +107,9 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->reloc[2] = 0; /* Only a single relocation list. */ batch->reloc[3] = 0; /* Only a single relocation list. */ - batch->map = driBOMap(batch->buffer, DRM_BO_FLAG_WRITE, 0); + batch->base.map = driBOMap(batch->buffer, DRM_BO_FLAG_WRITE, 0); batch->poolOffset = driBOPoolOffset(batch->buffer); - batch->ptr = batch->map; + batch->base.ptr = batch->base.map; batch->dirty_state = ~0; batch->nr_relocs = 0; batch->flags = 0; @@ -142,9 +144,9 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch) DRM_FENCE_TYPE_EXE, GL_FALSE); driFenceUnReference(&batch->last_fence); } - if (batch->map) { + if (batch->base.map) { driBOUnmap(batch->buffer); - batch->map = NULL; + batch->base.map = NULL; } driBOUnReference(batch->buffer); driBOFreeList(batch->list); @@ -191,7 +193,7 @@ intel_offset_relocation(struct intel_batchbuffer *batch, reloc = batch->reloc + (I915_RELOC_HEADER + batch->nr_relocs * I915_RELOC0_STRIDE); - reloc[0] = ((uint8_t *)batch->ptr - batch->drmBOVirtual); + reloc[0] = ((uint8_t *)batch->base.ptr - batch->drmBOVirtual); intel_batchbuffer_emit_dword(batch, req->presumed_offset + pre_add); reloc[1] = pre_add; reloc[2] = itemLoc; @@ -382,7 +384,7 @@ struct _DriFenceObject * intel_batchbuffer_flush(struct intel_batchbuffer *batch) { struct intel_context *intel = batch->intel; - GLuint used = batch->ptr - batch->map; + GLuint used = batch->base.ptr - batch->base.map; GLboolean was_locked = intel->locked; struct _DriFenceObject *fence; @@ -396,32 +398,32 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch) */ #if 0 /* ZZZ JB: what should we do here? */ if (used & 4) { - ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->ptr)[1] = 0; - ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; + ((int *) batch->base.ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->base.ptr)[1] = 0; + ((int *) batch->base.ptr)[2] = MI_BATCH_BUFFER_END; used += 12; } else { - ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; + ((int *) batch->base.ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->base.ptr)[1] = MI_BATCH_BUFFER_END; used += 8; } #else if (used & 4) { - ((int *) batch->ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH; - ((int *) batch->ptr)[1] = 0; - ((int *) batch->ptr)[2] = (0xA<<23); // MI_BATCH_BUFFER_END; + ((int *) batch->base.ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH; + ((int *) batch->base.ptr)[1] = 0; + ((int *) batch->base.ptr)[2] = (0xA<<23); // MI_BATCH_BUFFER_END; used += 12; } else { - ((int *) batch->ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH; - ((int *) batch->ptr)[1] = (0xA<<23); // MI_BATCH_BUFFER_END; + ((int *) batch->base.ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH; + ((int *) batch->base.ptr)[1] = (0xA<<23); // MI_BATCH_BUFFER_END; used += 8; } #endif driBOUnmap(batch->buffer); - batch->ptr = NULL; - batch->map = NULL; + batch->base.ptr = NULL; + batch->base.map = NULL; /* TODO: Just pass the relocation list and dma buffer up to the * kernel. @@ -455,6 +457,6 @@ intel_batchbuffer_data(struct intel_batchbuffer *batch, { assert((bytes & 3) == 0); intel_batchbuffer_require_space(batch, bytes, flags); - memcpy(batch->ptr, data, bytes); - batch->ptr += bytes; + memcpy(batch->base.ptr, data, bytes); + batch->base.ptr += bytes; } diff --git a/src/gallium/winsys/dri/intel/intel_batchbuffer.h b/src/gallium/winsys/dri/intel/intel_batchbuffer.h index 9e4b8043bfb..dcb2121ddfd 100644 --- a/src/gallium/winsys/dri/intel/intel_batchbuffer.h +++ b/src/gallium/winsys/dri/intel/intel_batchbuffer.h @@ -3,6 +3,7 @@ #include "mtypes.h" #include "ws_dri_bufmgr.h" +#include "i915simple/i915_batch.h" struct intel_context; @@ -17,7 +18,8 @@ struct intel_context; struct intel_batchbuffer { - struct bufmgr *bm; + struct i915_batchbuffer base; + struct intel_context *intel; struct _DriBufferObject *buffer; @@ -26,15 +28,11 @@ struct intel_batchbuffer struct _DriBufferList *list; GLuint list_count; - GLubyte *map; - GLubyte *ptr; uint32_t *reloc; GLuint reloc_size; GLuint nr_relocs; - GLuint size; - GLuint dirty_state; GLuint id; @@ -83,7 +81,7 @@ intel_offset_relocation(struct intel_batchbuffer *batch, static INLINE GLuint intel_batchbuffer_space(struct intel_batchbuffer *batch) { - return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); + return (batch->base.size - BATCH_RESERVED) - (batch->base.ptr - batch->base.map); } @@ -92,8 +90,8 @@ intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) { assert(batch->map); assert(intel_batchbuffer_space(batch) >= 4); - *(GLuint *) (batch->ptr) = dword; - batch->ptr += 4; + *(GLuint *) (batch->base.ptr) = dword; + batch->base.ptr += 4; } static INLINE void @@ -114,20 +112,25 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, /* Here are the crusty old macros, to be removed: */ +#undef BATCH_LOCALS #define BATCH_LOCALS +#undef BEGIN_BATCH #define BEGIN_BATCH(n, flags) do { \ assert(!intel->prim.flush); \ intel_batchbuffer_require_space(intel->batch, (n)*4, flags); \ } while (0) +#undef OUT_BATCH #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) +#undef OUT_RELOC #define OUT_RELOC(buf,flags,mask,delta) do { \ assert((delta) >= 0); \ intel_offset_relocation(intel->batch, delta, buf, flags, mask); \ } while (0) +#undef ADVANCE_BATCH #define ADVANCE_BATCH() do { } while(0) diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index a35825d36ab..013291364a1 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -63,28 +63,11 @@ intel_i915_winsys( struct i915_winsys *sws ) /* Simple batchbuffer interface: */ -static unsigned *intel_i915_batch_start( struct i915_winsys *sws, - unsigned dwords, - unsigned relocs ) +static struct i915_batchbuffer* +intel_i915_batch_get( struct i915_winsys *sws ) { struct intel_context *intel = intel_i915_winsys(sws)->intel; - - /* XXX: check relocs. - */ - if (intel_batchbuffer_space( intel->batch ) >= dwords * 4) { - /* XXX: Hmm, the driver can't really do much with this pointer: - */ - return (unsigned *)intel->batch->ptr; - } - else - return NULL; -} - -static void intel_i915_batch_dword( struct i915_winsys *sws, - unsigned dword ) -{ - struct intel_context *intel = intel_i915_winsys(sws)->intel; - intel_batchbuffer_emit_dword( intel->batch, dword ); + return &intel->batch->base; } static void intel_i915_batch_reloc( struct i915_winsys *sws, @@ -106,22 +89,13 @@ static void intel_i915_batch_reloc( struct i915_winsys *sws, mask |= DRM_BO_FLAG_READ; } -#if 0 /* JB old */ - intel_batchbuffer_emit_reloc( intel->batch, - dri_bo( buf ), - flags, mask, - delta ); -#else /* new */ intel_offset_relocation( intel->batch, delta, dri_bo( buf ), flags, mask ); -#endif } - - static void intel_i915_batch_flush( struct i915_winsys *sws, struct pipe_fence_handle **fence ) { @@ -166,8 +140,7 @@ intel_create_i915simple( struct intel_context *intel, /* Fill in this struct with callbacks that i915simple will need to * communicate with the window system, buffer manager, etc. */ - iws->winsys.batch_start = intel_i915_batch_start; - iws->winsys.batch_dword = intel_i915_batch_dword; + iws->winsys.batch_get = intel_i915_batch_get; iws->winsys.batch_reloc = intel_i915_batch_reloc; iws->winsys.batch_flush = intel_i915_batch_flush; iws->pws = winsys; -- cgit v1.2.3 From 6a3aab1983d999a3a9eccbeaab88195820abf467 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Mon, 2 Jun 2008 17:23:30 +0200 Subject: i915: Removed useless defines --- src/gallium/drivers/i915simple/i915_batch.h | 4 ---- src/gallium/drivers/i915simple/i915_blit.c | 4 ---- src/gallium/drivers/i915simple/i915_flush.c | 1 - 3 files changed, 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index 9379d1fb5de..987d0b4d284 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -100,8 +100,6 @@ i915_batchbuffer_flush( struct i915_batchbuffer *batch, batch->winsys->batch_flush( batch->winsys, fence ); } -#define BATCH_LOCALS - #define BEGIN_BATCH( dwords, relocs ) \ (i915_batchbuffer_check( i915->batch, dwords, relocs )) @@ -111,8 +109,6 @@ i915_batchbuffer_flush( struct i915_batchbuffer *batch, #define OUT_RELOC( buf, flags, delta ) \ i915_batchbuffer_reloc( i915->batch, buf, flags, delta ) -#define ADVANCE_BATCH() - #define FLUSH_BATCH(fence) do { \ if (0) i915_dump_batchbuffer( i915 ); \ i915->winsys->batch_flush( i915->winsys, fence ); \ diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c index 24449e3fb33..b500ff36a8b 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -45,7 +45,6 @@ i915_fill_blit(struct i915_context *i915, unsigned color) { unsigned BR13, CMD; - BATCH_LOCALS; dst_pitch *= (short) cpp; @@ -79,7 +78,6 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); OUT_BATCH(color); - ADVANCE_BATCH(); } @@ -100,7 +98,6 @@ i915_copy_blit( struct i915_context *i915, unsigned CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - BATCH_LOCALS; I915_DBG(i915, @@ -156,7 +153,6 @@ i915_copy_blit( struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset); - ADVANCE_BATCH(); } diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index 4c4718d68e1..472e0ab7740 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -62,7 +62,6 @@ static void i915_flush( struct pipe_context *pipe, assert(BEGIN_BATCH(1, 0)); } OUT_BATCH( flush ); - ADVANCE_BATCH(); } /* If there are no flags, just flush pending commands to hardware: -- cgit v1.2.3 From afd635a95056abc0909ebd1503131660d6e78cf6 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Tue, 3 Jun 2008 12:35:01 +0200 Subject: i915: Fix compile on linux-dri-debug --- src/gallium/drivers/i915simple/i915_batch.h | 2 -- src/gallium/drivers/i915simple/i915_blit.c | 1 + src/gallium/winsys/dri/intel/intel_batchbuffer.h | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index 987d0b4d284..45bf4f4028f 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -29,7 +29,6 @@ #define I915_BATCH_H #include "i915_winsys.h" -#include "i915_debug.h" struct i915_batchbuffer { @@ -110,7 +109,6 @@ i915_batchbuffer_flush( struct i915_batchbuffer *batch, i915_batchbuffer_reloc( i915->batch, buf, flags, delta ) #define FLUSH_BATCH(fence) do { \ - if (0) i915_dump_batchbuffer( i915 ); \ i915->winsys->batch_flush( i915->winsys, fence ); \ i915->hardware_dirty = ~0; \ } while (0) diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c index b500ff36a8b..22f91fab926 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -31,6 +31,7 @@ #include "i915_blit.h" #include "i915_reg.h" #include "i915_batch.h" +#include "i915_debug.h" #define FILE_DEBUG_FLAG DEBUG_BLIT diff --git a/src/gallium/winsys/dri/intel/intel_batchbuffer.h b/src/gallium/winsys/dri/intel/intel_batchbuffer.h index dcb2121ddfd..abb7a624f5c 100644 --- a/src/gallium/winsys/dri/intel/intel_batchbuffer.h +++ b/src/gallium/winsys/dri/intel/intel_batchbuffer.h @@ -88,7 +88,7 @@ intel_batchbuffer_space(struct intel_batchbuffer *batch) static INLINE void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) { - assert(batch->map); + assert(batch->base.map); assert(intel_batchbuffer_space(batch) >= 4); *(GLuint *) (batch->base.ptr) = dword; batch->base.ptr += 4; @@ -100,7 +100,7 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, { struct _DriFenceObject *fence; - assert(sz < batch->size - 8); + assert(sz < batch->base.size - 8); if (intel_batchbuffer_space(batch) < sz || (batch->flags != 0 && flags != 0 && batch->flags != flags)) { fence = intel_batchbuffer_flush(batch); -- cgit v1.2.3 From cfc23bc54c8cae2615d447bc199ff87ef7e9298e Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Mon, 9 Jun 2008 16:17:35 +0200 Subject: i915: Disable color buffer writes if no color buffer is attached --- src/gallium/drivers/i915simple/i915_state_immediate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index 704ea4d838c..2501f2d7cb8 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -156,8 +156,12 @@ const struct i915_tracked_state i915_upload_S5 = { */ static void upload_S6( struct i915_context *i915 ) { - unsigned LIS6 = (S6_COLOR_WRITE_ENABLE | - (2 << S6_TRISTRIP_PV_SHIFT)); + unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); + + /* I915_NEW_FRAMEBUFFER + */ + if (i915->framebuffer.cbufs[0]) + LIS6 |= S6_COLOR_WRITE_ENABLE; /* I915_NEW_BLEND */ @@ -174,7 +178,7 @@ static void upload_S6( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S6 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, upload_S6 }; -- cgit v1.2.3 From a70684bf256c3d5bc3a729bf9e9cf1a64cb2064a Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 10 Jun 2008 08:32:52 +0900 Subject: gallium: Deprecate GETENV. Replace by debug_get_bool_option. debug_get_bool_option will interpret "n", "no", "0", "f", or "false" as FALSE; and everything else as TRUE. The default value (used when the variable is not set) is received as a parameter. --- src/gallium/auxiliary/draw/draw_pt.c | 4 ++-- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 4 ++-- src/gallium/include/pipe/p_util.h | 2 -- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f0d7b51ad70..9140faeea9d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -119,8 +119,8 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { - draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; - draw->pt.no_fse = GETENV("DRAW_NO_FSE") != NULL; + draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE); + draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE); draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 243b4a68522..4c01b8d5b17 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -161,7 +161,7 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, */ i915->draw = draw_create(); assert(i915->draw); - if (!GETENV("I915_NO_VBUF")) { + if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 1e0106b86c7..6e14f684f12 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -129,12 +129,12 @@ softpipe_create( struct pipe_screen *screen, uint i; #ifdef PIPE_ARCH_X86 - softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; + softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); #else softpipe->use_sse = FALSE; #endif - softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; + softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); softpipe->pipe.winsys = pipe_winsys; softpipe->pipe.screen = screen; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 5547e578333..8b3003bcefb 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -140,8 +140,6 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) -#define GETENV( X ) debug_get_option( X, NULL ) - /** * Return memory on given byte alignment -- cgit v1.2.3 From 7d3bab537c7559df7bdc7a4fae42f8218185b9f7 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 10 Jun 2008 08:52:10 +0900 Subject: softpipe: Replace GETENV by debug_get_bool_option. --- src/gallium/drivers/softpipe/sp_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 6e14f684f12..626c3a9d4e1 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -225,10 +225,10 @@ softpipe_create( struct pipe_screen *screen, if (!softpipe->setup) goto fail; - if (GETENV( "SP_NO_RAST" ) != NULL) + if (debug_get_bool_option( "SP_NO_RAST", FALSE )) softpipe->no_rast = TRUE; - if (GETENV( "SP_NO_VBUF" ) != NULL) { + if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) { /* Deprecated path -- vbuf is the intended interface to the draw module: */ draw_set_rasterize_stage(softpipe->draw, softpipe->setup); -- cgit v1.2.3 From e9b8df69b40c55953e1b3503690b54f993773223 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 13 Jun 2008 12:21:58 +0200 Subject: i915: Support all primtive types in vbuf path --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 44 +++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index adfb16fbc5f..c7a31fdb70b 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -163,6 +163,10 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, i915_render->hwprim = PRIM3D_LINELIST; i915_render->fallback = 0; return TRUE; + case PIPE_PRIM_LINE_LOOP: + i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = PIPE_PRIM_LINE_LOOP; + return TRUE; case PIPE_PRIM_LINE_STRIP: i915_render->hwprim = PRIM3D_LINESTRIP; i915_render->fallback = 0; @@ -175,6 +179,10 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, i915_render->hwprim = PRIM3D_TRISTRIP; i915_render->fallback = 0; return TRUE; + case PIPE_PRIM_TRIANGLE_FAN: + i915_render->hwprim = PRIM3D_TRIFAN; + i915_render->fallback = 0; + return TRUE; case PIPE_PRIM_QUADS: i915_render->hwprim = PRIM3D_TRILIST; i915_render->fallback = PIPE_PRIM_QUADS; @@ -183,7 +191,12 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, i915_render->hwprim = PRIM3D_TRILIST; i915_render->fallback = PIPE_PRIM_QUAD_STRIP; return TRUE; + case PIPE_PRIM_POLYGON: + i915_render->hwprim = PRIM3D_POLY; + i915_render->fallback = 0; + return TRUE; default: + assert((int)"Error unkown primtive type" & 0); /* Actually, can handle a lot more just fine... Fixme. */ return FALSE; @@ -211,6 +224,13 @@ draw_arrays_generate_indices( struct vbuf_render *render, if (i < end) OUT_BATCH( i ); break; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) { + for (i = start + 1; i < end; i++) + OUT_BATCH( (i-0) | (i+0) << 16 ); + OUT_BATCH( (i-0) | ( 0) << 16 ); + } + break; case PIPE_PRIM_QUADS: for (i = start; i + 3 < end; i += 4) { OUT_BATCH( (i+0) | (i+1) << 16 ); @@ -236,6 +256,11 @@ draw_arrays_calc_nr_indices( uint nr, unsigned type ) switch (type) { case 0: return nr; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) + return nr * 2; + else + return 0; case PIPE_PRIM_QUADS: return (nr / 4) * 6; case PIPE_PRIM_QUAD_STRIP: @@ -262,6 +287,8 @@ draw_arrays_fallback( struct vbuf_render *render, i915_emit_hardware_state( i915 ); nr_indices = draw_arrays_calc_nr_indices( nr, i915_render->fallback ); + if (!nr_indices) + return; if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { FLUSH_BATCH(NULL); @@ -328,6 +355,13 @@ draw_generate_indices( struct vbuf_render *render, OUT_BATCH( indices[i] ); } break; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) { + for (i = 1; i < nr_indices; i++) + OUT_BATCH( indices[i-0] | indices[i+0] << 16 ); + OUT_BATCH( indices[i-0] | indices[0] << 16 ); + } + break; case PIPE_PRIM_QUADS: for (i = 0; i + 3 < nr_indices; i += 4) { OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); @@ -354,6 +388,11 @@ draw_calc_nr_indices( uint nr_indices, unsigned type ) switch (type) { case 0: return nr_indices; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) + return nr_indices * 2; + else + return 0; case PIPE_PRIM_QUADS: return (nr_indices / 4) * 6; case PIPE_PRIM_QUAD_STRIP: @@ -374,9 +413,10 @@ i915_vbuf_render_draw( struct vbuf_render *render, unsigned save_nr_indices; save_nr_indices = nr_indices; - nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback ); - assert(nr_indices); + nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback ); + if (!nr_indices) + return; if (i915->dirty) i915_update_derived( i915 ); -- cgit v1.2.3 From de35bf5bdca83f05dc919bf175588ce749009dfc Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 13 Jun 2008 14:11:20 +0200 Subject: i915: Messed up lineloop now works --- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index c7a31fdb70b..aef3682bbfc 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -228,7 +228,7 @@ draw_arrays_generate_indices( struct vbuf_render *render, if (nr >= 2) { for (i = start + 1; i < end; i++) OUT_BATCH( (i-0) | (i+0) << 16 ); - OUT_BATCH( (i-0) | ( 0) << 16 ); + OUT_BATCH( (i-0) | ( start) << 16 ); } break; case PIPE_PRIM_QUADS: @@ -358,8 +358,8 @@ draw_generate_indices( struct vbuf_render *render, case PIPE_PRIM_LINE_LOOP: if (nr_indices >= 2) { for (i = 1; i < nr_indices; i++) - OUT_BATCH( indices[i-0] | indices[i+0] << 16 ); - OUT_BATCH( indices[i-0] | indices[0] << 16 ); + OUT_BATCH( indices[i-1] | indices[i] << 16 ); + OUT_BATCH( indices[i-1] | indices[0] << 16 ); } break; case PIPE_PRIM_QUADS: -- cgit v1.2.3 From c341094921da7c4ff30f81706279ead39cb3b812 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 13 Jun 2008 19:03:51 +0200 Subject: i915: Fix most of the clear problems trivial/clear-undefined still fails --- src/gallium/drivers/i915simple/i915_clear.c | 1 + src/gallium/drivers/i915simple/i915_texture.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c index cde69daacc0..8a2d3ca43f1 100644 --- a/src/gallium/drivers/i915simple/i915_clear.c +++ b/src/gallium/drivers/i915simple/i915_clear.c @@ -44,4 +44,5 @@ i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_DEFINED; } diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 16354dce509..7a1665d48b2 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -635,6 +635,7 @@ i915_get_tex_surface(struct pipe_screen *screen, ps->pitch = tex->pitch; ps->offset = offset; ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; } return ps; } -- cgit v1.2.3 From d1397fd779ce63416bb59662a3c96f9c0a6ef7f5 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 13 Jun 2008 19:07:29 +0200 Subject: i915: Fix offsets not being used for surface targets --- src/gallium/drivers/i915simple/i915_state_emit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index de0c1a787ab..480b0b54c4d 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -222,7 +222,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_RELOC(cbuf_surface->buffer, I915_BUFFER_ACCESS_WRITE, - 0); + cbuf_surface->offset); } /* What happens if no zbuf?? @@ -238,7 +238,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_RELOC(depth_surface->buffer, I915_BUFFER_ACCESS_WRITE, - 0); + depth_surface->offset); } { -- cgit v1.2.3 From 3da77b33bb0093ff27c16833ed93a3a114c3e95e Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 13 Jun 2008 20:46:14 +0200 Subject: i915: Improved the not used tile code --- src/gallium/drivers/i915simple/i915_state_emit.c | 21 ++++++++++----- src/gallium/drivers/i915simple/i915_texture.c | 33 +++++++++++++++--------- 2 files changed, 36 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 480b0b54c4d..bc801a82f03 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -211,14 +211,18 @@ i915_emit_hardware_state(struct i915_context *i915 ) struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; if (cbuf_surface) { - unsigned pitch = (cbuf_surface->pitch * cbuf_surface->cpp); + unsigned cpitch = (cbuf_surface->pitch * cbuf_surface->cpp); + unsigned ctile = BUF_3D_USE_FENCE; +#if 0 + if (!((cpitch - 1) & cpitch) && cpitch >= 512) + ctile = BUF_3D_TILED_SURFACE; +#endif OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(pitch) | /* pitch in bytes */ -// BUF_3D_TILED_SURFACE); /* JB: Used to force tileing */ - BUF_3D_USE_FENCE); + BUF_3D_PITCH(cpitch) | /* pitch in bytes */ + ctile); OUT_RELOC(cbuf_surface->buffer, I915_BUFFER_ACCESS_WRITE, @@ -229,12 +233,17 @@ i915_emit_hardware_state(struct i915_context *i915 ) */ if (depth_surface) { unsigned zpitch = (depth_surface->pitch * depth_surface->cpp); - + unsigned ztile = BUF_3D_USE_FENCE; +#if 0 + if (!((zpitch - 1) & zpitch) && zpitch >= 512) + ztile = BUF_3D_TILED_SURFACE; +#endif + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(BUF_3D_ID_DEPTH | BUF_3D_PITCH(zpitch) | /* pitch in bytes */ - BUF_3D_USE_FENCE); + ztile); OUT_RELOC(depth_surface->buffer, I915_BUFFER_ACCESS_WRITE, diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 7a1665d48b2..d591b09e3c8 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -104,16 +104,14 @@ i915_miptree_set_image_offset(struct i915_texture *tex, */ } -#if 0 static unsigned power_of_two(unsigned x) { - int value = 1; + unsigned value = 1; while (value <= x) value = value << 1; return value; } -#endif static unsigned round_up(unsigned n, unsigned multiple) @@ -125,8 +123,7 @@ round_up(unsigned n, unsigned multiple) * Special case to deal with display targets. */ static boolean -i915_displaytarget_layout(struct pipe_screen *screen, - struct i915_texture *tex) +i915_displaytarget_layout(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; @@ -139,13 +136,19 @@ i915_displaytarget_layout(struct pipe_screen *screen, 1 ); i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); -#if 0 - tex->pitch = MAX2(512, power_of_two(tex->base.width[0] * pt->cpp)) / pt->cpp; - tex->total_height = round_up(tex->base.height[0], 8); -#else - tex->pitch = round_up(tex->base.width[0], 64 / pt->cpp); - tex->total_height = tex->base.height[0]; -#endif + if (tex->base.width[0] >= 128) { + tex->pitch = power_of_two(tex->base.width[0] * pt->cpp) / pt->cpp; + tex->total_height = round_up(tex->base.height[0], 8); + } else { + tex->pitch = round_up(tex->base.width[0], 64 / pt->cpp); + tex->total_height = tex->base.height[0]; + } + +/* + printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + tex->base.width[0], tex->base.height[0], pt->cpp, + tex->pitch, tex->total_height, tex->pitch * tex->total_height * 4); +*/ return 1; } @@ -161,6 +164,12 @@ i945_miptree_layout_2d( struct i915_texture *tex ) unsigned width = pt->width[0]; unsigned height = pt->height[0]; +#if 0 /* used for tiled display targets */ + if (pt->last_level == 0 && pt->cpp == 4) + if (i915_displaytarget_layout(tex)) + return; +#endif + tex->pitch = pt->width[0]; /* May need to adjust pitch to accomodate the placement of -- cgit v1.2.3 From d9d1e39d95fef4a8da15147956ff0c3e0a188b5b Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Tue, 17 Jun 2008 15:47:01 +0200 Subject: i915: Max and Min lod now works --- src/gallium/drivers/i915simple/i915_context.h | 2 + src/gallium/drivers/i915simple/i915_state.c | 10 ++- .../drivers/i915simple/i915_state_sampler.c | 84 ++++++++++++++++++---- 3 files changed, 79 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 2ee0381648d..892a88fd2cc 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -178,6 +178,8 @@ struct i915_rasterizer_state { struct i915_sampler_state { unsigned state[3]; const struct pipe_sampler_state *templ; + unsigned minlod; + unsigned maxlod; }; diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 39d5d5e9d65..dbb33f26955 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -251,11 +251,17 @@ i915_create_sampler_state(struct pipe_context *pipe, if (sampler->normalized_coords) cso->state[1] |= SS3_NORMALIZED_COORDS; - if (0) /* XXX not tested yet */ { int minlod = (int) (16.0 * sampler->min_lod); + int maxlod = (int) (16.0 * sampler->max_lod); minlod = CLAMP(minlod, 0, 16 * 11); - cso->state[1] |= (minlod << SS3_MIN_LOD_SHIFT); + maxlod = CLAMP(maxlod, 0, 16 * 11); + + if (minlod > maxlod) + maxlod = minlod; + + cso->minlod = minlod; + cso->maxlod = maxlod; } { diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 982eec4a1b0..fbdc1a3b68c 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -35,6 +35,35 @@ #include "i915_state.h" +/* + * A note about min_lod & max_lod. + * + * There is a circular dependancy between the sampler state + * and the map state to be submitted to hw. + * + * Two condition must be meet: + * min_lod =< max_lod == true + * max_lod =< last_level == true + * + * + * This is all fine and dandy if it where for the fact that max_lod + * is set on the map state instead of the sampler state. That is + * the max_lod we submit on map is: + * max_lod = MIN2(last_level, max_lod); + * + * So we need to update the map state when we change samplers and + * we need to be change the sampler state when map state is changed. + * The first part is done by calling i915_update_texture in + * i915_update_samplers and the second part is done else where in + * code tracking the state changes. + */ + +static void +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]); /** * Compute i915 texture sampling state. * @@ -50,6 +79,7 @@ static void update_sampler(struct i915_context *i915, unsigned state[3] ) { const struct pipe_texture *pt = &tex->base; + unsigned minlod, lastlod; /* Need to do this after updating the maps, which call the * intel_finalize_mipmap_tree and hence can update firstLevel: @@ -95,6 +125,15 @@ static void update_sampler(struct i915_context *i915, } #endif + /* See note at the top of file */ + minlod = sampler->minlod; + lastlod = pt->last_level << 4; + + if (lastlod < minlod) { + minlod = lastlod; + } + + state[1] |= (sampler->minlod << SS3_MIN_LOD_SHIFT); state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); } @@ -112,18 +151,23 @@ void i915_update_samplers( struct i915_context *i915 ) /* could also examine the fragment program? */ if (i915->texture[unit]) { update_sampler( i915, - unit, - i915->sampler[unit], /* sampler state */ - i915->texture[unit], /* texture */ - i915->current.sampler[unit] /* the result */ - ); - - i915->current.sampler_enable_nr++; - i915->current.sampler_enable_flags |= (1 << unit); + unit, + i915->sampler[unit], /* sampler state */ + i915->texture[unit], /* texture */ + i915->current.sampler[unit] /* the result */ + ); + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); } } - i915->hardware_dirty |= I915_HW_SAMPLER; + i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; } @@ -179,14 +223,17 @@ translate_texture_format(enum pipe_format pipeFormat) static void -i915_update_texture(struct i915_context *i915, uint unit, +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, uint state[6]) { - const struct i915_texture *tex = i915->texture[unit]; const struct pipe_texture *pt = &tex->base; uint format, pitch; const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; const uint num_levels = pt->last_level; + unsigned max_lod = num_levels * 4; assert(tex); assert(width); @@ -207,16 +254,19 @@ i915_update_texture(struct i915_context *i915, uint unit, | MS3_USE_FENCE_REGS); /* - * XXX sampler->max_lod should be used to program the MAX_LOD field below. - * Also, when min_filter != mag_filter and there's just one mipmap level, + * XXX When min_filter != mag_filter and there's just one mipmap level, * set max_lod = 1 to make sure i915 chooses between min/mag filtering. */ + /* See note at the top of file */ + if (max_lod > (sampler->maxlod >> 2)) + max_lod = sampler->maxlod >> 2; + /* MS4 state */ state[1] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK - | ((num_levels * 4) << MS4_MAX_LOD_SHIFT) + | ((max_lod) << MS4_MAX_LOD_SHIFT) | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); } @@ -231,7 +281,11 @@ i915_update_textures(struct i915_context *i915) /* determine unit enable/disable by looking for a bound texture */ /* could also examine the fragment program? */ if (i915->texture[unit]) { - i915_update_texture(i915, unit, i915->current.texbuffer[unit]); + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); } } -- cgit v1.2.3 From 081c05605f1c308c35fcf4168aac09fbf3c0a108 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Wed, 18 Jun 2008 01:45:21 +0200 Subject: i915: Fix for s8_z24 textures not being shown --- src/gallium/drivers/i915simple/i915_state_sampler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index fbdc1a3b68c..24440843f33 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -212,7 +212,7 @@ translate_texture_format(enum pipe_format pipeFormat) return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); #endif case PIPE_FORMAT_S8Z24_UNORM: - return (MAPSURF_32BIT | MT_32BIT_xL824); + return (MAPSURF_32BIT | MT_32BIT_xI824); default: debug_printf("i915: translate_texture_format() bad image format %x\n", pipeFormat); -- cgit v1.2.3 From 6cbc2734d19567afeaa6c5d93149ef87b08cd167 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 19 Jun 2008 14:42:17 +0200 Subject: i915: Refractored and clean up i915_texture.c --- src/gallium/drivers/i915simple/i915_texture.c | 269 ++++++++++++++------------ 1 file changed, 141 insertions(+), 128 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index d591b09e3c8..09518fafe76 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -42,12 +42,58 @@ #include "i915_debug.h" #include "i915_screen.h" +/* + * Helper function and arrays + */ + +/** + * Initial offset for Cube map. + */ +static const int initial_offsets[6][2] = { + {0, 0}, + {0, 2}, + {1, 0}, + {1, 2}, + {1, 1}, + {1, 3} +}; + +/** + * Step offsets for Cube map. + */ +static const int step_offsets[6][2] = { + {0, 2}, + {0, 2}, + {-1, 2}, + {-1, 2}, + {-1, 1}, + {-1, 1} +}; static unsigned minify( unsigned d ) { return MAX2(1, d>>1); } +static unsigned +power_of_two(unsigned x) +{ + unsigned value = 1; + while (value <= x) + value = value << 1; + return value; +} + +static unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + + +/* + * More advanced helper funcs + */ static void @@ -86,7 +132,6 @@ i915_miptree_set_level_info(struct i915_texture *tex, tex->image_offset[level][0] = 0; } - static void i915_miptree_set_image_offset(struct i915_texture *tex, unsigned level, unsigned img, unsigned x, unsigned y) @@ -97,27 +142,17 @@ i915_miptree_set_image_offset(struct i915_texture *tex, assert(img < tex->nr_images[level]); tex->image_offset[level][img] = (x + y * tex->pitch); - /* - DBG("%s level %d img %d pos %d,%d image_offset %x\n", + printf("%s level %d img %d pos %d,%d image_offset %x\n", __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); */ } -static unsigned -power_of_two(unsigned x) -{ - unsigned value = 1; - while (value <= x) - value = value << 1; - return value; -} -static unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} +/* + * Layout functions + */ + /** * Special case to deal with display targets. @@ -144,11 +179,11 @@ i915_displaytarget_layout(struct i915_texture *tex) tex->total_height = tex->base.height[0]; } -/* + /* printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, tex->base.width[0], tex->base.height[0], pt->cpp, tex->pitch, tex->total_height, tex->pitch * tex->total_height * 4); -*/ + */ return 1; } @@ -221,25 +256,93 @@ i945_miptree_layout_2d( struct i915_texture *tex ) } } +static void +i945_miptree_layout_cube(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; -static const int initial_offsets[6][2] = { - {0, 0}, - {0, 2}, - {1, 0}, - {1, 2}, - {1, 1}, - {1, 3} -}; + const unsigned dim = pt->width[0]; + unsigned face; + unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; -static const int step_offsets[6][2] = { - {0, 2}, - {0, 2}, - {-1, 2}, - {-1, 2}, - {-1, 1}, - {-1, 1} -}; + assert(lvlWidth == lvlHeight); /* cubemap images are square */ + + /* Depending on the size of the largest images, pitch can be + * determined either by the old-style packing of cubemap faces, + * or the final row of 4x4, 2x2 and 1x1 faces below this. + */ + if (dim > 32) + tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; + else + tex->pitch = 14 * 8; + + tex->total_height = dim * 4 + 4; + + /* Set all the levels to effectively occupy the whole rectangular region. + */ + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, 0, 0, lvlWidth, lvlHeight, 1); + lvlWidth /= 2; + lvlHeight /= 2; + } + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * dim; + unsigned y = initial_offsets[face][1] * dim; + unsigned d = dim; + + if (dim == 4 && face >= 4) { + y = tex->total_height - 4; + x = (face - 4) * 8; + } + else if (dim < 4 && (face > 0)) { + y = tex->total_height - 4; + x = face * 8; + } + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + + d >>= 1; + + switch (d) { + case 4: + switch (face) { + case PIPE_TEX_FACE_POS_X: + case PIPE_TEX_FACE_NEG_X: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + case PIPE_TEX_FACE_POS_Y: + case PIPE_TEX_FACE_NEG_Y: + y += 12; + x -= 8; + break; + case PIPE_TEX_FACE_POS_Z: + case PIPE_TEX_FACE_NEG_Z: + y = tex->total_height - 4; + x = (face - 4) * 8; + break; + } + + case 2: + y = tex->total_height - 4; + x = 16 + face * 8; + break; + + case 1: + x += 48; + break; + + default: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + } + } + } +} static boolean i915_miptree_layout(struct i915_texture * tex) @@ -372,99 +475,15 @@ i945_miptree_layout(struct i915_texture * tex) unsigned level; switch (pt->target) { - case PIPE_TEXTURE_CUBE:{ - const unsigned dim = pt->width[0]; - unsigned face; - unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; - - assert(lvlWidth == lvlHeight); /* cubemap images are square */ - - /* Depending on the size of the largest images, pitch can be - * determined either by the old-style packing of cubemap faces, - * or the final row of 4x4, 2x2 and 1x1 faces below this. - */ - if (dim > 32) - tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; - else - tex->pitch = 14 * 8; - - tex->total_height = dim * 4 + 4; - - /* Set all the levels to effectively occupy the whole rectangular region. - */ - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 6, - 0, 0, - lvlWidth, lvlHeight, 1); - lvlWidth /= 2; - lvlHeight /= 2; - } - - - for (face = 0; face < 6; face++) { - unsigned x = initial_offsets[face][0] * dim; - unsigned y = initial_offsets[face][1] * dim; - unsigned d = dim; - - if (dim == 4 && face >= 4) { - y = tex->total_height - 4; - x = (face - 4) * 8; - } - else if (dim < 4 && (face > 0)) { - y = tex->total_height - 4; - x = face * 8; - } - - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_image_offset(tex, level, face, x, y); - - d >>= 1; - - switch (d) { - case 4: - switch (face) { - case PIPE_TEX_FACE_POS_X: - case PIPE_TEX_FACE_NEG_X: - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - break; - case PIPE_TEX_FACE_POS_Y: - case PIPE_TEX_FACE_NEG_Y: - y += 12; - x -= 8; - break; - case PIPE_TEX_FACE_POS_Z: - case PIPE_TEX_FACE_NEG_Z: - y = tex->total_height - 4; - x = (face - 4) * 8; - break; - } - - case 2: - y = tex->total_height - 4; - x = 16 + face * 8; - break; - - case 1: - x += 48; - break; - - default: - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - break; - } - } - } - break; - } + case PIPE_TEXTURE_CUBE: + i945_miptree_layout_cube(tex); + break; case PIPE_TEXTURE_3D:{ unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; unsigned pack_x_pitch, pack_x_nr; unsigned pack_y_pitch; - unsigned level; tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; tex->total_height = 0; @@ -602,11 +621,6 @@ i915_texture_release(struct pipe_screen *screen, *pt = NULL; } - - -/* - * XXX note: same as code in sp_surface.c - */ static struct pipe_surface * i915_get_tex_surface(struct pipe_screen *screen, struct pipe_texture *pt, @@ -631,7 +645,7 @@ i915_get_tex_surface(struct pipe_screen *screen, assert(zslice == 0); } - ps = CALLOC_STRUCT(pipe_surface);//ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface); if (ps) { ps->refcount = 1; ps->winsys = ws; @@ -649,7 +663,6 @@ i915_get_tex_surface(struct pipe_screen *screen, return ps; } - void i915_init_texture_functions(struct i915_context *i915) { -- cgit v1.2.3 From 8d0329fb6af657a53cb010a3d7a8f4282e2715b8 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 19 Jun 2008 17:09:09 +0200 Subject: i915: Fixed cubemap layouts Apprently we shouldn't do all the advanced layout operation for none compressed formats. The compressed code was also broken, its currently disabled, but should be fixed once i915simple starts to support compressed formats. --- src/gallium/drivers/i915simple/i915_texture.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 09518fafe76..9cd32e39191 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -142,6 +142,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex, assert(img < tex->nr_images[level]); tex->image_offset[level][img] = (x + y * tex->pitch); + /* printf("%s level %d img %d pos %d,%d image_offset %x\n", __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); @@ -266,9 +267,17 @@ i945_miptree_layout_cube(struct i915_texture *tex) unsigned face; unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + /* + printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); + */ + assert(lvlWidth == lvlHeight); /* cubemap images are square */ - /* Depending on the size of the largest images, pitch can be + /* + * XXX Should only be used for compressed formats. But lets + * keep this code active just in case. + * + * Depending on the size of the largest images, pitch can be * determined either by the old-style packing of cubemap faces, * or the final row of 4x4, 2x2 and 1x1 faces below this. */ @@ -277,6 +286,9 @@ i945_miptree_layout_cube(struct i915_texture *tex) else tex->pitch = 14 * 8; + /* + * XXX The 4 is only needed for compressed formats. See above. + */ tex->total_height = dim * 4 + 4; /* Set all the levels to effectively occupy the whole rectangular region. @@ -292,6 +304,7 @@ i945_miptree_layout_cube(struct i915_texture *tex) unsigned y = initial_offsets[face][1] * dim; unsigned d = dim; +#if 0 /* Fix and enable this code for compressed formats */ if (dim == 4 && face >= 4) { y = tex->total_height - 4; x = (face - 4) * 8; @@ -300,12 +313,14 @@ i945_miptree_layout_cube(struct i915_texture *tex) y = tex->total_height - 4; x = face * 8; } +#endif for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_image_offset(tex, level, face, x, y); d >>= 1; +#if 0 /* Fix and enable this code for compressed formats */ switch (d) { case 4: switch (face) { @@ -325,7 +340,6 @@ i945_miptree_layout_cube(struct i915_texture *tex) x = (face - 4) * 8; break; } - case 2: y = tex->total_height - 4; x = 16 + face * 8; @@ -334,12 +348,14 @@ i945_miptree_layout_cube(struct i915_texture *tex) case 1: x += 48; break; - default: +#endif x += step_offsets[face][0] * d; y += step_offsets[face][1] * d; +#if 0 break; } +#endif } } } -- cgit v1.2.3 From 5e1d657d50c247d903b865572bd3e74048e8a8f1 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 20 Jun 2008 22:19:22 +0200 Subject: nv30: Add separate nv30 state stuff for fb, based on nv40 one, need to use it now --- src/gallium/drivers/nv30/Makefile | 1 + src/gallium/drivers/nv30/nv30_context.h | 18 ++++ src/gallium/drivers/nv30/nv30_state_fb.c | 151 +++++++++++++++++++++++++++++++ 3 files changed, 170 insertions(+) create mode 100644 src/gallium/drivers/nv30/nv30_state_fb.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 3f80fb87c9b..ec11de76446 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -14,6 +14,7 @@ DRIVER_SOURCES = \ nv30_screen.c \ nv30_state.c \ nv30_state_emit.c \ + nv30_state_fb.c \ nv30_surface.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 333bd4875c6..9fbe66dc74e 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -76,6 +76,10 @@ enum nv30_state_index { #define NV30_NEW_ARRAYS (1 << 11) #define NV30_NEW_UCP (1 << 12) +struct nv30_state { + struct nouveau_stateobj *hw[NV30_STATE_MAX]; +}; + struct nv30_context { struct pipe_context pipe; @@ -85,6 +89,9 @@ struct nv30_context { struct draw_context *draw; + /* HW state derived from pipe states */ + struct nv30_state state; + uint32_t dirty; struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; @@ -93,6 +100,9 @@ struct nv30_context { unsigned fp_samplers; unsigned vp_samplers; + /* Context state */ + struct pipe_framebuffer_state framebuffer; + uint32_t rt_enable; struct pipe_buffer *rt[2]; struct pipe_buffer *zeta; @@ -132,6 +142,14 @@ nv30_context(struct pipe_context *pipe) return (struct nv30_context *)pipe; } +struct nv30_state_entry { + boolean (*validate)(struct nv30_context *nv30); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + extern void nv30_init_state_functions(struct nv30_context *nv30); extern void nv30_init_surface_functions(struct nv30_context *nv30); extern void nv30_init_query_functions(struct nv30_context *nv30); diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c new file mode 100644 index 00000000000..73c97e298ad --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -0,0 +1,151 @@ +#include "nv30_context.h" + +static boolean +nv30_state_framebuffer_validate(struct nv30_context *nv30) +{ + struct pipe_framebuffer_state *fb = &nv30->framebuffer; + struct pipe_surface *rt[4], *zeta = NULL; + uint32_t rt_enable, rt_format; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + + rt_enable = 0; + for (i = 0; i < fb->num_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV34TCL_RT_ENABLE_COLOR2 | + NV34TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV34TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { + uint32_t pitch = rt[0]->pitch * rt[0]->cpp; + if (zeta) { + pitch |= (zeta->pitch * zeta->cpp)<<16; + } else { + pitch |= pitch<<16; + } + + so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1); + so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2); + so_data (so, pitch); + so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1); + so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2); + so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->pitch * rt[1]->cpp); + } +/* + if (rt_enable & NV34TCL_RT_ENABLE_COLOR2) { + so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR2, 1); + so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR2_OFFSET, 1); + so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->pitch * rt[2]->cpp); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR3) { + so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR3, 1); + so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR3_OFFSET, 1); + so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->pitch * rt[3]->cpp); + } +*/ + if (zeta_format) { + so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1); + so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1); + so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + /*so_method(so, nv30->screen->rankine, NV34TCL_ZETA_PITCH, 1); + so_data (so, zeta->pitch * zeta->cpp);*/ + } + + so_method(so, nv30->screen->rankine, NV34TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv30->screen->rankine, NV34TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, nv30->screen->rankine, 0x1d88, 1); + so_data (so, (1 << 12) | h); + + so_ref(so, &nv30->state.hw[NV30_STATE_FB]); + return TRUE; +} + +struct nv30_state_entry nv30_state_framebuffer = { + .validate = nv30_state_framebuffer_validate, + .dirty = { + .pipe = NV30_NEW_FB, + .hw = NV30_STATE_FB + } +}; -- cgit v1.2.3 From f38bb109694f2879036c54c97c1c69ea2fecd6c8 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 20 Jun 2008 15:58:19 -0600 Subject: gallium: fix some surface usage bugs When a surface is created with GPU_WRITE that really means "GPU render" and that can involve reads (blending). Set surface usage to PIPE_BUFFER_USAGE_CPU_READ + WRITE. Fixes progs/demos/lodbias demo. Also, mark texture as 'modified' when mapped for writing so that the tile cache can know when to freshen a cached tile. Fixes glTexSubImage2D(). --- src/gallium/drivers/softpipe/sp_texture.c | 11 +++++++++-- src/gallium/drivers/softpipe/sp_texture.h | 2 ++ src/gallium/drivers/softpipe/sp_tile_cache.c | 10 ++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 1d7a1fffe4d..ef8c5bd6b0c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -207,12 +207,19 @@ softpipe_get_tex_surface(struct pipe_screen *screen, * done with the CPU. Let's adjust the flags to take that into * account. */ - if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) - ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE; + if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) { + /* GPU_WRITE means "render" and that can involve reads (blending) */ + ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_CPU_READ; + } if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; + if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_WRITE)) { + /* Mark the surface as dirty. The tile cache will look for this. */ + spt->modified = TRUE; + } pipe_texture_reference(&ps->texture, pt); ps->face = face; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 779a9d8fc97..0e1017632c9 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -47,6 +47,8 @@ struct softpipe_texture /* The data is held here: */ struct pipe_buffer *buffer; + + boolean modified; }; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 0e4c8c41eea..2d5d2b50f51 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -37,6 +37,7 @@ #include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" +#include "sp_texture.h" #include "sp_tile_cache.h" #define NUM_ENTRIES 32 @@ -506,6 +507,15 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, face, level); struct softpipe_cached_tile *tile = tc->entries + pos; + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + if (spt->modified) { + /* texture was modified, force a cache reload */ + tile->x = -1; + spt->modified = FALSE; + } + } + if (tile_x != tile->x || tile_y != tile->y || z != tile->z || -- cgit v1.2.3 From be4259b06cbb2b4c1d8a2dacc19313a30fa909d8 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 20 Jun 2008 17:28:42 -0600 Subject: gallium: fix invalid call to draw_set_mapped_constant_buffer() We were indexing sp_constants[i] outside the loop so i was 2. Replace i with PIPE_SHADER_VERTEX. Also, replace magic '2' with PIPE_SHADER_TYPES in a few places. --- src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 62eabfb30e8..078886f93c9 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -69,7 +69,7 @@ struct softpipe_context { struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[2]; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index d4d5fa744fb..12b44a82118 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -47,7 +47,7 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; uint i; - for (i = 0; i < 2; i++) { + for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].size) sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); @@ -55,7 +55,7 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) draw_set_mapped_constant_buffer(sp->draw, sp->mapped_constants[PIPE_SHADER_VERTEX], - sp->constants[i].size); + sp->constants[PIPE_SHADER_VERTEX].size); } static void -- cgit v1.2.3 From 582b39ebb9f67e3b67a776be0961fe2e51ee46f7 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 21 Jun 2008 10:11:37 +0200 Subject: nv30: Update nouveau_class.h to get its proper defines instead of using nv40 ones --- src/gallium/drivers/nouveau/nouveau_class.h | 77 ++++++++++++++++++----------- src/gallium/drivers/nv30/nv30_state.c | 4 +- src/gallium/drivers/nv30/nv30_vbo.c | 34 ++++++------- 3 files changed, 66 insertions(+), 49 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 880afe6ce08..58c80ddcd24 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -4,7 +4,7 @@ ************************************************************************** - Copyright (C) 2006-2007 : + Copyright (C) 2006-2008 : Dmitry Baryshkov, Laurent Carlier, Matthieu Castet, @@ -1740,6 +1740,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000 +#define NV10_DX5_TEXTURED_TRIANGLE 0x00000094 + + + #define NV10TCL 0x00000056 #define NV10TCL_NOP 0x00000100 @@ -3871,13 +3875,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 #define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 #define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 -#define NV34TCL_BLEND_FUNC_COLOR 0x0000031c -#define NV34TCL_BLEND_FUNC_EQUATION 0x00000320 -#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_ADD 0x00008006 -#define NV34TCL_BLEND_FUNC_EQUATION_MIN 0x00008007 -#define NV34TCL_BLEND_FUNC_EQUATION_MAX 0x00008008 -#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_SUBTRACT 0x0000800a -#define NV34TCL_BLEND_FUNC_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV34TCL_BLEND_COLOR 0x0000031c +#define NV34TCL_BLEND_EQUATION 0x00000320 +#define NV34TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 +#define NV34TCL_BLEND_EQUATION_MIN 0x00008007 +#define NV34TCL_BLEND_EQUATION_MAX 0x00008008 +#define NV34TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV34TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b #define NV34TCL_COLOR_MASK 0x00000324 #define NV34TCL_COLOR_MASK_B_SHIFT 0 #define NV34TCL_COLOR_MASK_B_MASK 0x000000ff @@ -4217,6 +4221,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008 #define NV34TCL_FRONT_MATERIAL_SHININESS(x) (0x00001400+((x)*4)) #define NV34TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV34TCL_ENABLED_LIGHTS 0x00001420 #define NV34TCL_FP_REG_CONTROL 0x00001450 #define NV34TCL_FP_REG_CONTROL_UNK1_SHIFT 16 #define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000 @@ -4234,12 +4239,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_POLYGON_STIPPLE_ENABLE 0x0000147c #define NV34TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) #define NV34TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 -#define NV34TCL_VERTEX_ATTR_3F_X(x) (0x00001500+((x)*16)) -#define NV34TCL_VERTEX_ATTR_3F_X__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_3F_Y(x) (0x00001504+((x)*16)) -#define NV34TCL_VERTEX_ATTR_3F_Y__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_3F_Z(x) (0x00001508+((x)*16)) -#define NV34TCL_VERTEX_ATTR_3F_Z__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16)) +#define NV34TCL_VTX_ATTR_3F_X__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16)) +#define NV34TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16)) +#define NV34TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 #define NV34TCL_VP_CLIP_PLANE_A(x) (0x00001600+((x)*16)) #define NV34TCL_VP_CLIP_PLANE_A__SIZE 0x00000006 #define NV34TCL_VP_CLIP_PLANE_B(x) (0x00001604+((x)*16)) @@ -4248,22 +4253,22 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_VP_CLIP_PLANE_C__SIZE 0x00000006 #define NV34TCL_VP_CLIP_PLANE_D(x) (0x0000160c+((x)*16)) #define NV34TCL_VP_CLIP_PLANE_D__SIZE 0x00000006 -#define NV34TCL_VERTEX_BUFFER_ADDRESS(x) (0x00001680+((x)*4)) -#define NV34TCL_VERTEX_BUFFER_ADDRESS__SIZE 0x00000010 -#define NV34TCL_VERTEX_BUFFER_ADDRESS_DMA1 (1 << 31) -#define NV34TCL_VERTEX_BUFFER_ADDRESS_OFFSET_SHIFT 0 -#define NV34TCL_VERTEX_BUFFER_ADDRESS_OFFSET_MASK 0x0fffffff -#define NV34TCL_VERTEX_ARRAY_FORMAT(x) (0x00001740+((x)*4)) -#define NV34TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_SHIFT 0 -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_MASK 0x0000000f -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT 0x00000002 -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE 0x00000004 -#define NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_USHORT 0x00000005 -#define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT 4 -#define NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_MASK 0x000000f0 -#define NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT 8 -#define NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_MASK 0x0000ff00 +#define NV34TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4)) +#define NV34TCL_VTXBUF_ADDRESS__SIZE 0x00000010 +#define NV34TCL_VTXBUF_ADDRESS_DMA1 (1 << 31) +#define NV34TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0 +#define NV34TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff +#define NV34TCL_VTXFMT(x) (0x00001740+((x)*4)) +#define NV34TCL_VTXFMT__SIZE 0x00000010 +#define NV34TCL_VTXFMT_TYPE_SHIFT 0 +#define NV34TCL_VTXFMT_TYPE_MASK 0x0000000f +#define NV34TCL_VTXFMT_TYPE_FLOAT 0x00000002 +#define NV34TCL_VTXFMT_TYPE_UBYTE 0x00000004 +#define NV34TCL_VTXFMT_TYPE_USHORT 0x00000005 +#define NV34TCL_VTXFMT_SIZE_SHIFT 4 +#define NV34TCL_VTXFMT_SIZE_MASK 0x000000f0 +#define NV34TCL_VTXFMT_STRIDE_SHIFT 8 +#define NV34TCL_VTXFMT_STRIDE_MASK 0x0000ff00 #define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0 #define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4 #define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8 @@ -4302,6 +4307,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24 #define NV34TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000 #define NV34TCL_VERTEX_DATA 0x00001818 +#define NV34TCL_IDXBUF_ADDRESS 0x0000181c +#define NV34TCL_IDXBUF_FORMAT 0x00001820 +#define NV34TCL_IDXBUF_FORMAT_TYPE_SHIFT 4 +#define NV34TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0 +#define NV34TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000 +#define NV34TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010 +#define NV34TCL_IDXBUF_FORMAT_DMA1 (1 << 0) +#define NV34TCL_VB_INDEX_BATCH 0x00001824 +#define NV34TCL_VB_INDEX_BATCH_COUNT_SHIFT 24 +#define NV34TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000 +#define NV34TCL_VB_INDEX_BATCH_START_SHIFT 0 +#define NV34TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff #define NV34TCL_POLYGON_MODE_FRONT 0x00001828 #define NV34TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 #define NV34TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 620038fa647..84f016eeadc 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -51,7 +51,7 @@ nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) OUT_RING (cb->b_enable); OUT_RING (cb->b_srcfunc); OUT_RING (cb->b_dstfunc); - BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_EQUATION, 1); + BEGIN_RING(rankine, NV34TCL_BLEND_EQUATION, 1); OUT_RING (cb->b_eqn); BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1); @@ -531,7 +531,7 @@ nv30_set_blend_color(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); - BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_COLOR, 1); + BEGIN_RING(rankine, NV34TCL_BLEND_COLOR, 1); OUT_RING ((float_to_ubyte(bcol->color[3]) << 24) | (float_to_ubyte(bcol->color[0]) << 16) | (float_to_ubyte(bcol->color[1]) << 8) | diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index b18a407ec5a..ff0ce6ac810 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -26,12 +26,12 @@ nv30_vbo_type(uint format) { switch (pf_type(format)) { case PIPE_FORMAT_TYPE_FLOAT: - return NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; + return NV34TCL_VTXFMT_TYPE_FLOAT; case PIPE_FORMAT_TYPE_UNORM: - return NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_UBYTE; + return NV34TCL_VTXFMT_TYPE_UBYTE; default: NOUVEAU_ERR("Unknown format 0x%08x\n", format); - return NV40TCL_VTXFMT_TYPE_FLOAT; + return NV34TCL_VTXFMT_TYPE_FLOAT; } } @@ -51,7 +51,7 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib, map += vb->buffer_offset + ve->src_offset; switch (type) { - case NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT: + case NV34TCL_VTXFMT_TYPE_FLOAT: { float *v = map; @@ -121,7 +121,7 @@ nv30_vbo_arrays_update(struct nv30_context *nv30) struct pipe_vertex_buffer *vb; if (!(inputs & (1 << hw))) { - vtxfmt[hw] = NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; + vtxfmt[hw] = NV34TCL_VTXFMT_TYPE_FLOAT; continue; } @@ -129,7 +129,7 @@ nv30_vbo_arrays_update(struct nv30_context *nv30) vb = &nv30->vtxbuf[ve->vertex_buffer_index]; if (vb->pitch == 0) { - vtxfmt[hw] = NV34TCL_VERTEX_ARRAY_FORMAT_TYPE_FLOAT; + vtxfmt[hw] = NV34TCL_VTXFMT_TYPE_FLOAT; if (nv30_vbo_static_attrib(nv30, hw, ve, vb) == TRUE) continue; } @@ -138,13 +138,13 @@ nv30_vbo_arrays_update(struct nv30_context *nv30) nv30->vb[hw].delta = vb->buffer_offset + ve->src_offset; nv30->vb[hw].buffer = vb->buffer; - vtxfmt[hw] = ((vb->pitch << NV34TCL_VERTEX_ARRAY_FORMAT_STRIDE_SHIFT) | + vtxfmt[hw] = ((vb->pitch << NV34TCL_VTXFMT_STRIDE_SHIFT) | (nv30_vbo_ncomp(ve->src_format) << - NV34TCL_VERTEX_ARRAY_FORMAT_SIZE_SHIFT) | + NV34TCL_VTXFMT_SIZE_SHIFT) | nv30_vbo_type(ve->src_format)); } - BEGIN_RING(rankine, NV34TCL_VERTEX_ARRAY_FORMAT(0), num_hw); + BEGIN_RING(rankine, NV34TCL_VTXFMT(0), num_hw); OUT_RINGp (vtxfmt, num_hw); } @@ -167,20 +167,20 @@ nv30_vbo_validate_state(struct nv30_context *nv30, inputs &= ~(1 << a); - BEGIN_RING(rankine, NV34TCL_VERTEX_BUFFER_ADDRESS(a), 1); + BEGIN_RING(rankine, NV34TCL_VTXBUF_ADDRESS(a), 1); OUT_RELOC (nv30->vb[a].buffer, nv30->vb[a].delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_LOW | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0, - NV34TCL_VERTEX_BUFFER_ADDRESS_DMA1); + NV34TCL_VTXBUF_ADDRESS_DMA1); } if (ib) { - BEGIN_RING(rankine, NV40TCL_IDXBUF_ADDRESS, 2); + BEGIN_RING(rankine, NV34TCL_IDXBUF_ADDRESS, 2); OUT_RELOCl(ib, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCd(ib, ib_format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, - 0, NV40TCL_IDXBUF_FORMAT_DMA1); + 0, NV34TCL_IDXBUF_FORMAT_DMA1); } BEGIN_RING(rankine, 0x1710, 1); @@ -360,10 +360,10 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, switch (ib_size) { case 2: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; + type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; break; case 4: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; + type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; break; default: NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); @@ -381,7 +381,7 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr = (count & 0xff); if (nr) { - BEGIN_RING(rankine, NV40TCL_VB_INDEX_BATCH, 1); + BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); OUT_RING (((nr - 1) << 24) | start); start += nr; } @@ -392,7 +392,7 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV40TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); while (push--) { OUT_RING(((0x100 - 1) << 24) | start); start += 0x100; -- cgit v1.2.3 From 5fea663b5f7abcdca00c5ff5d1b77f200b0d06ec Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 21 Jun 2008 12:03:05 +0200 Subject: nv30: Add state for blend --- src/gallium/drivers/nv30/Makefile | 1 + src/gallium/drivers/nv30/nv30_context.h | 9 +++++++ src/gallium/drivers/nv30/nv30_state_blend.c | 40 +++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 src/gallium/drivers/nv30/nv30_state_blend.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index ec11de76446..04f53ee4565 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -13,6 +13,7 @@ DRIVER_SOURCES = \ nv30_query.c \ nv30_screen.c \ nv30_state.c \ + nv30_state_blend.c \ nv30_state_emit.c \ nv30_state_fb.c \ nv30_surface.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 9fbe66dc74e..1695ba5a017 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -76,6 +76,13 @@ enum nv30_state_index { #define NV30_NEW_ARRAYS (1 << 11) #define NV30_NEW_UCP (1 << 12) +/* TODO: rename when removing the old state emitter */ +struct nv30_blend_state_new { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + struct nv30_state { struct nouveau_stateobj *hw[NV30_STATE_MAX]; }; @@ -101,6 +108,8 @@ struct nv30_context { unsigned vp_samplers; /* Context state */ + struct nv30_blend_state_new *blend; + struct pipe_blend_color blend_colour; struct pipe_framebuffer_state framebuffer; uint32_t rt_enable; diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c new file mode 100644 index 00000000000..a1b0100472d --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -0,0 +1,40 @@ +#include "nv30_context.h" + +static boolean +nv30_state_blend_validate(struct nv30_context *nv30) +{ + so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]); + return TRUE; +} + +struct nv30_state_entry nv30_state_blend_new = { + .validate = nv30_state_blend_validate, + .dirty = { + .pipe = NV30_NEW_BLEND, + .hw = NV30_STATE_BLEND + } +}; + +static boolean +nv30_state_blend_colour_validate(struct nv30_context *nv30) +{ + struct nouveau_stateobj *so = so_new(2, 0); + struct pipe_blend_color *bcol = &nv30->blend_colour; + + so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]); + return TRUE; +} + +struct nv30_state_entry nv30_state_blend_colour = { + .validate = nv30_state_blend_colour_validate, + .dirty = { + .pipe = NV30_NEW_BCOL, + .hw = NV30_STATE_BCOL + } +}; -- cgit v1.2.3 From 8c26a521ee80f5d8a1d0aabd0910233aad400322 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 21 Jun 2008 22:59:05 +0200 Subject: Update for extra vertex attributes --- src/gallium/drivers/nouveau/nouveau_class.h | 163 +++++++++++++++------------- src/gallium/drivers/nv30/nv30_vbo.c | 2 +- src/gallium/drivers/nv40/nv40_draw.c | 4 +- src/gallium/drivers/nv40/nv40_vbo.c | 2 +- 4 files changed, 93 insertions(+), 78 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 58c80ddcd24..3c29fa0d1be 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -3763,7 +3763,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_RT_ENABLE_COLOR2 (1 << 2) #define NV34TCL_RT_ENABLE_COLOR1 (1 << 1) #define NV34TCL_RT_ENABLE_COLOR0 (1 << 0) -#define NV34TCL_ZETA_PITCH 0x0000022c +#define NV34TCL_LMA_DEPTH_PITCH 0x0000022c #define NV34TCL_LMA_DEPTH_OFFSET 0x00000230 #define NV34TCL_TX_UNITS_ENABLE 0x0000023c #define NV34TCL_TX_UNITS_ENABLE_TX0 (1 << 0) @@ -4145,14 +4145,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_DEPTH_TEST_ENABLE 0x00000a74 #define NV34TCL_POLYGON_OFFSET_FACTOR 0x00000a78 #define NV34TCL_POLYGON_OFFSET_UNITS 0x00000a7c -#define NV34TCL_VERTEX_NOR_3I_XY 0x00000a90 -#define NV34TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 -#define NV34TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff -#define NV34TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 -#define NV34TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 -#define NV34TCL_VERTEX_NOR_3I_Z 0x00000a94 -#define NV34TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 -#define NV34TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_3I_XY(x) (0x00000a80+((x)*8)) +#define NV34TCL_VTX_ATTR_3I_XY__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3I_XY_X_SHIFT 0 +#define NV34TCL_VTX_ATTR_3I_XY_X_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_3I_XY_Y_SHIFT 16 +#define NV34TCL_VTX_ATTR_3I_XY_Y_MASK 0xffff0000 +#define NV34TCL_VTX_ATTR_3I_Z(x) (0x00000a84+((x)*8)) +#define NV34TCL_VTX_ATTR_3I_Z__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3I_Z_Z_SHIFT 0 +#define NV34TCL_VTX_ATTR_3I_Z_Z_MASK 0x0000ffff #define NV34TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) #define NV34TCL_VP_UPLOAD_INST__SIZE 0x00000004 #define NV34TCL_CLIP_PLANE_A(x) (0x00000e00+((x)*16)) @@ -4336,48 +4338,38 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_FRONT_FACE_CCW 0x00000901 #define NV34TCL_POLYGON_SMOOTH_ENABLE 0x00001838 #define NV34TCL_CULL_FACE_ENABLE 0x0000183c -#define NV34TCL_VERTEX_ATTR_2F_X(x) (0x00001880+((x)*8)) -#define NV34TCL_VERTEX_ATTR_2F_X__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_2F_Y(x) (0x00001884+((x)*8)) -#define NV34TCL_VERTEX_ATTR_2F_Y__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_2I(x) (0x00001900+((x)*4)) -#define NV34TCL_VERTEX_ATTR_2I__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_2I_Y_SHIFT 16 -#define NV34TCL_VERTEX_ATTR_2I_Y_MASK 0xffff0000 -#define NV34TCL_VERTEX_ATTR_2I_X_SHIFT 0 -#define NV34TCL_VERTEX_ATTR_2I_X_MASK 0x0000ffff -#define NV34TCL_VERTEX_COL_4I(x) (0x0000194c+((x)*4)) -#define NV34TCL_VERTEX_COL_4I__SIZE 0x00000002 -#define NV34TCL_VERTEX_COL_4I_R_SHIFT 0 -#define NV34TCL_VERTEX_COL_4I_R_MASK 0x000000ff -#define NV34TCL_VERTEX_COL_4I_G_SHIFT 8 -#define NV34TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 -#define NV34TCL_VERTEX_COL_4I_B_SHIFT 16 -#define NV34TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 -#define NV34TCL_VERTEX_COL_4I_A_SHIFT 24 -#define NV34TCL_VERTEX_COL_4I_A_MASK 0xff000000 -#define NV34TCL_VERTEX_POS_4I_XY 0x00001980 -#define NV34TCL_VERTEX_POS_4I_XY_X_SHIFT 0 -#define NV34TCL_VERTEX_POS_4I_XY_X_MASK 0x0000ffff -#define NV34TCL_VERTEX_POS_4I_XY_Y_SHIFT 16 -#define NV34TCL_VERTEX_POS_4I_XY_Y_MASK 0xffff0000 -#define NV34TCL_VERTEX_POS_4I_ZW 0x00001984 -#define NV34TCL_VERTEX_POS_4I_ZW_Z_SHIFT 0 -#define NV34TCL_VERTEX_POS_4I_ZW_Z_MASK 0x0000ffff -#define NV34TCL_VERTEX_POS_4I_ZW_W_SHIFT 16 -#define NV34TCL_VERTEX_POS_4I_ZW_W_MASK 0xffff0000 -#define NV34TCL_VERTEX_TX_4I_ST(x) (0x000019c0+((x)*8)) -#define NV34TCL_VERTEX_TX_4I_ST__SIZE 0x00000004 -#define NV34TCL_VERTEX_TX_4I_ST_S_SHIFT 0 -#define NV34TCL_VERTEX_TX_4I_ST_S_MASK 0x0000ffff -#define NV34TCL_VERTEX_TX_4I_ST_T_SHIFT 16 -#define NV34TCL_VERTEX_TX_4I_ST_T_MASK 0xffff0000 -#define NV34TCL_VERTEX_TX_4I_RQ(x) (0x000019c4+((x)*8)) -#define NV34TCL_VERTEX_TX_4I_RQ__SIZE 0x00000004 -#define NV34TCL_VERTEX_TX_4I_RQ_R_SHIFT 0 -#define NV34TCL_VERTEX_TX_4I_RQ_R_MASK 0x0000ffff -#define NV34TCL_VERTEX_TX_4I_RQ_Q_SHIFT 16 -#define NV34TCL_VERTEX_TX_4I_RQ_Q_MASK 0xffff0000 +#define NV34TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8)) +#define NV34TCL_VTX_ATTR_2F_X__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8)) +#define NV34TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4)) +#define NV34TCL_VTX_ATTR_2I__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_2I_X_SHIFT 0 +#define NV34TCL_VTX_ATTR_2I_X_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_2I_Y_SHIFT 16 +#define NV34TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 +#define NV34TCL_VTX_ATTR_4UB(x) (0x00001940+((x)*4)) +#define NV34TCL_VTX_ATTR_4UB__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4UB_X_SHIFT 0 +#define NV34TCL_VTX_ATTR_4UB_X_MASK 0x000000ff +#define NV34TCL_VTX_ATTR_4UB_Y_SHIFT 8 +#define NV34TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00 +#define NV34TCL_VTX_ATTR_4UB_Z_SHIFT 16 +#define NV34TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000 +#define NV34TCL_VTX_ATTR_4UB_W_SHIFT 24 +#define NV34TCL_VTX_ATTR_4UB_W_MASK 0xff000000 +#define NV34TCL_VTX_ATTR_4I_XY(x) (0x00001980+((x)*8)) +#define NV34TCL_VTX_ATTR_4I_XY__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4I_XY_X_SHIFT 0 +#define NV34TCL_VTX_ATTR_4I_XY_X_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_4I_XY_Y_SHIFT 16 +#define NV34TCL_VTX_ATTR_4I_XY_Y_MASK 0xffff0000 +#define NV34TCL_VTX_ATTR_4I_ZW(x) (0x00001984+((x)*8)) +#define NV34TCL_VTX_ATTR_4I_ZW__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4I_ZW_Z_SHIFT 0 +#define NV34TCL_VTX_ATTR_4I_ZW_Z_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_4I_ZW_W_SHIFT 16 +#define NV34TCL_VTX_ATTR_4I_ZW_W_MASK 0xffff0000 #define NV34TCL_TX_OFFSET(x) (0x00001a00+((x)*32)) #define NV34TCL_TX_OFFSET__SIZE 0x00000004 #define NV34TCL_TX_FORMAT(x) (0x00001a04+((x)*32)) @@ -4534,14 +4526,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000 #define NV34TCL_TX_BORDER_COLOR_A_SHIFT 24 #define NV34TCL_TX_BORDER_COLOR_A_MASK 0xff000000 -#define NV34TCL_VERTEX_ATTR_4F_X(x) (0x00001c00+((x)*16)) -#define NV34TCL_VERTEX_ATTR_4F_X__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_4F_Y(x) (0x00001c04+((x)*16)) -#define NV34TCL_VERTEX_ATTR_4F_Y__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_4F_Z(x) (0x00001c08+((x)*16)) -#define NV34TCL_VERTEX_ATTR_4F_Z__SIZE 0x00000010 -#define NV34TCL_VERTEX_ATTR_4F_W(x) (0x00001c0c+((x)*16)) -#define NV34TCL_VERTEX_ATTR_4F_W__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16)) +#define NV34TCL_VTX_ATTR_4F_X__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16)) +#define NV34TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16)) +#define NV34TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16)) +#define NV34TCL_VTX_ATTR_4F_W__SIZE 0x00000010 #define NV34TCL_FP_CONTROL 0x00001d60 #define NV34TCL_FP_CONTROL_USES_KIL (1 << 7) #define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_SHIFT 0 @@ -4573,7 +4565,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000 #define NV34TCL_BACK_MATERIAL_SHININESS(x) (0x00001e20+((x)*4)) #define NV34TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006 -#define NV34TCL_VERTEX_FOG_1F 0x00001e54 +#define NV34TCL_VTX_ATTR_1F(x) (0x00001e40+((x)*4)) +#define NV34TCL_VTX_ATTR_1F__SIZE 0x00000010 #define NV34TCL_VP_UPLOAD_FROM_ID 0x00001e9c #define NV34TCL_VP_START_FROM_ID 0x00001ea0 #define NV34TCL_POINT_PARAMETERS(x) (0x00001ec0+((x)*4)) @@ -4986,6 +4979,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_DEPTH_TEST_ENABLE 0x00000a74 #define NV40TCL_POLYGON_OFFSET_FACTOR 0x00000a78 #define NV40TCL_POLYGON_OFFSET_UNITS 0x00000a7c +#define NV40TCL_VTX_ATTR_3I_XY(x) (0x00000a80+((x)*8)) +#define NV40TCL_VTX_ATTR_3I_XY__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_3I_XY_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_3I_XY_X_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_3I_XY_Y_SHIFT 16 +#define NV40TCL_VTX_ATTR_3I_XY_Y_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_3I_Z(x) (0x00000a84+((x)*8)) +#define NV40TCL_VTX_ATTR_3I_Z__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_3I_Z_Z_SHIFT 0 +#define NV40TCL_VTX_ATTR_3I_Z_Z_MASK 0x0000ffff #define NV40TCL_UNK0B40(x) (0x00000b40+((x)*4)) #define NV40TCL_UNK0B40__SIZE 0x00000008 #define NV40TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) @@ -5095,22 +5098,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 #define NV40TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4)) #define NV40TCL_VTX_ATTR_2I__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_2I_Y_SHIFT 16 -#define NV40TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 #define NV40TCL_VTX_ATTR_2I_X_SHIFT 0 #define NV40TCL_VTX_ATTR_2I_X_MASK 0x0000ffff -#define NV40TCL_VTX_ATTR_4I_0(x) (0x00001900+((x)*8)) -#define NV40TCL_VTX_ATTR_4I_0__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_4I_0_Y_SHIFT 16 -#define NV40TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000 -#define NV40TCL_VTX_ATTR_4I_0_X_SHIFT 0 -#define NV40TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff -#define NV40TCL_VTX_ATTR_4I_1(x) (0x00001904+((x)*8)) -#define NV40TCL_VTX_ATTR_4I_1__SIZE 0x00000010 -#define NV40TCL_VTX_ATTR_4I_1_W_SHIFT 16 -#define NV40TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000 -#define NV40TCL_VTX_ATTR_4I_1_Z_SHIFT 0 -#define NV40TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_2I_Y_SHIFT 16 +#define NV40TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_4UB(x) (0x00001940+((x)*4)) +#define NV40TCL_VTX_ATTR_4UB__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4UB_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_4UB_X_MASK 0x000000ff +#define NV40TCL_VTX_ATTR_4UB_Y_SHIFT 8 +#define NV40TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00 +#define NV40TCL_VTX_ATTR_4UB_Z_SHIFT 16 +#define NV40TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000 +#define NV40TCL_VTX_ATTR_4UB_W_SHIFT 24 +#define NV40TCL_VTX_ATTR_4UB_W_MASK 0xff000000 +#define NV40TCL_VTX_ATTR_4I_XY(x) (0x00001980+((x)*8)) +#define NV40TCL_VTX_ATTR_4I_XY__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4I_XY_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_4I_XY_X_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_4I_XY_Y_SHIFT 16 +#define NV40TCL_VTX_ATTR_4I_XY_Y_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_4I_ZW(x) (0x00001984+((x)*8)) +#define NV40TCL_VTX_ATTR_4I_ZW__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4I_ZW_Z_SHIFT 0 +#define NV40TCL_VTX_ATTR_4I_ZW_Z_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_4I_ZW_W_SHIFT 16 +#define NV40TCL_VTX_ATTR_4I_ZW_W_MASK 0xffff0000 #define NV40TCL_TEX_OFFSET(x) (0x00001a00+((x)*32)) #define NV40TCL_TEX_OFFSET__SIZE 0x00000010 #define NV40TCL_TEX_FORMAT(x) (0x00001a04+((x)*32)) @@ -5307,6 +5320,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff #define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16 #define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_1F(x) (0x00001e40+((x)*4)) +#define NV40TCL_VTX_ATTR_1F__SIZE 0x00000010 #define NV40TCL_VP_UPLOAD_FROM_ID 0x00001e9c #define NV40TCL_VP_START_FROM_ID 0x00001ea0 #define NV40TCL_POINT_SIZE 0x00001ee0 diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index ff0ce6ac810..359e443bb1c 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -55,7 +55,7 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib, { float *v = map; - BEGIN_RING(rankine, NV34TCL_VERTEX_ATTR_4F_X(attrib), 4); + BEGIN_RING(rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4); switch (ncomp) { case 4: OUT_RINGf(v[0]); diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index a9a939af0c0..1d78324dda8 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -39,7 +39,7 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) case EMIT_OMIT: break; case EMIT_1F: - BEGIN_RING(curie, 0x1e40 + (hw * 4), 1); + BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); OUT_RING (fui(v->data[idx][0])); break; case EMIT_2F: @@ -61,7 +61,7 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) OUT_RING (fui(v->data[idx][3])); break; case EMIT_4UB: - BEGIN_RING(curie, 0x1940 + (hw * 4), 1); + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), float_to_ubyte(v->data[idx][1]), float_to_ubyte(v->data[idx][2]), diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index e5f9bd5668a..93669e6192f 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -149,7 +149,7 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, so_data (so, fui(v[1])); break; case 1: - so_method(so, curie, 0x1e40 + (attrib * 4), 1); + so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; default: -- cgit v1.2.3 From 5a01060eb95cb2cb168cb7224ecc805020584c91 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 23 Jun 2008 00:14:21 +1000 Subject: nouveau: update for interface changes + hack around gallium x86_64 bustage --- src/gallium/auxiliary/draw/draw_context.c | 2 ++ src/gallium/drivers/nv10/nv10_context.h | 1 + src/gallium/drivers/nv10/nv10_state.c | 2 ++ src/gallium/drivers/nv10/nv10_vbo.c | 4 +++- src/gallium/drivers/nv30/nv30_fragprog.c | 6 +++--- src/gallium/drivers/nv30/nv30_vertprog.c | 2 +- src/gallium/drivers/nv40/nv40_context.h | 1 + src/gallium/drivers/nv40/nv40_draw.c | 4 +++- src/gallium/drivers/nv40/nv40_fragprog.c | 8 ++++---- src/gallium/drivers/nv40/nv40_state.c | 5 +++-- src/gallium/drivers/nv40/nv40_vertprog.c | 10 +++++----- 11 files changed, 28 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2f263cf06a9..0e6f55a928b 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -63,8 +63,10 @@ struct draw_context *draw_create( void ) if (!draw_pt_init( draw )) goto fail; +#ifndef PIPE_ARCH_X86 if (!draw_vs_init( draw )) goto fail; +#endif return draw; diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 5636dfc9d22..2bdba53db8a 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -66,6 +66,7 @@ struct nv10_context { //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; float *constbuf[PIPE_SHADER_TYPES][32][4]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; struct vertex_info vertex_info; diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 4dcb9a31ab1..11664fae2af 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -469,6 +469,8 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) { memcpy(nv10->constbuf[shader], mapped, buf->size); + nv10->constbuf_nr[shader] = + buf->size / (4 * sizeof(float)); ws->buffer_unmap(ws, buf->buffer); } } diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 2a334e137d9..f024f534209 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -44,7 +44,9 @@ boolean nv10_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, nv10->constbuf[PIPE_SHADER_VERTEX]); + draw_set_mapped_constant_buffer(draw, + nv10->constbuf[PIPE_SHADER_VERTEX], + nv10->constbuf_nr[PIPE_SHADER_VERTEX]); /* draw! */ draw_arrays(nv10->draw, prim, start, count); diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 51000bd6fcd..54d6bea55f7 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -645,7 +645,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->u.DeclarationRange.First] = hw; + fpc->attrib_map[fdec->DeclarationRange.First] = hw; return TRUE; } @@ -655,10 +655,10 @@ nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, { switch (fdec->Semantic.SemanticName) { case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->u.DeclarationRange.First; + fpc->depth_id = fdec->DeclarationRange.First; break; case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->u.DeclarationRange.First; + fpc->colour_id = fdec->DeclarationRange.First; break; default: NOUVEAU_ERR("bad output semantic\n"); diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 71aea3a59cb..1b34d2e4b29 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -530,7 +530,7 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, return FALSE; } - vpc->output_map[fdec->u.DeclarationRange.First] = hw; + vpc->output_map[fdec->DeclarationRange.First] = hw; return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index d8d1891dff0..8e60a81e68e 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -136,6 +136,7 @@ struct nv40_context { struct nv40_vertex_program *vertprog; struct nv40_fragment_program *fragprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; struct nv40_rasterizer_state *rasterizer; struct nv40_zsa_state *zsa; struct nv40_blend_state *blend; diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 1d78324dda8..2cf58e2950a 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -253,9 +253,11 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, } if (nv40->constbuf[PIPE_SHADER_VERTEX]) { + const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; + map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, map); + draw_set_mapped_constant_buffer(nv40->draw, map, nr); } draw_arrays(nv40->draw, mode, start, count); diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 4b7667e0381..428348c3388 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -708,7 +708,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->u.DeclarationRange.First] = hw; + fpc->attrib_map[fdec->DeclarationRange.First] = hw; return TRUE; } @@ -716,7 +716,7 @@ static boolean nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, const struct tgsi_full_declaration *fdec) { - unsigned idx = fdec->u.DeclarationRange.First; + unsigned idx = fdec->DeclarationRange.First; unsigned hw; switch (fdec->Semantic.SemanticName) { @@ -770,9 +770,9 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) goto out_err; break; case TGSI_FILE_TEMPORARY: - if (fdec->u.DeclarationRange.Last > high_temp) { + if (fdec->DeclarationRange.Last > high_temp) { high_temp = - fdec->u.DeclarationRange.Last; + fdec->DeclarationRange.Last; } break; default: diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 2d921d2b8a4..afcf336a65b 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -603,12 +603,13 @@ nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, { struct nv40_context *nv40 = nv40_context(pipe); + nv40->constbuf[shader] = buf->buffer; + nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + if (shader == PIPE_SHADER_VERTEX) { - nv40->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; nv40->dirty |= NV40_NEW_VERTPROG; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv40->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; nv40->dirty |= NV40_NEW_FRAGPROG; } } diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index eb14869bfe0..1e486a66ef0 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -603,7 +603,7 @@ static boolean nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, const struct tgsi_full_declaration *fdec) { - unsigned idx = fdec->u.DeclarationRange.First; + unsigned idx = fdec->DeclarationRange.First; int hw; switch (fdec->Semantic.SemanticName) { @@ -678,16 +678,16 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { case TGSI_FILE_TEMPORARY: - if (fdec->u.DeclarationRange.Last > high_temp) { + if (fdec->DeclarationRange.Last > high_temp) { high_temp = - fdec->u.DeclarationRange.Last; + fdec->DeclarationRange.Last; } break; #if 0 /* this would be nice.. except gallium doesn't track it */ case TGSI_FILE_ADDRESS: - if (fdec->u.DeclarationRange.Last > high_addr) { + if (fdec->DeclarationRange.Last > high_addr) { high_addr = - fdec->u.DeclarationRange.Last; + fdec->DeclarationRange.Last; } break; #endif -- cgit v1.2.3 From d40ff294510236faff4bb9a58b81282705b98562 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Thu, 19 Jun 2008 20:28:20 +0200 Subject: i915: Removed level_offset from i915_texture All offsets are now on image_offset. --- src/gallium/drivers/i915simple/i915_context.h | 4 ---- src/gallium/drivers/i915simple/i915_texture.c | 27 ++++++++++++--------------- 2 files changed, 12 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 892a88fd2cc..9e02f787147 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -202,10 +202,6 @@ struct i915_texture { */ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - /* The data is held here: */ struct pipe_buffer *buffer; diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 9cd32e39191..d9b33df1d51 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -100,7 +100,7 @@ static void i915_miptree_set_level_info(struct i915_texture *tex, unsigned level, unsigned nr_images, - unsigned x, unsigned y, unsigned w, unsigned h, unsigned d) + unsigned w, unsigned h, unsigned d) { struct pipe_texture *pt = &tex->base; @@ -110,7 +110,6 @@ i915_miptree_set_level_info(struct i915_texture *tex, pt->height[level] = h; pt->depth[level] = d; - tex->level_offset[level] = (x + y * tex->pitch) * pt->cpp; tex->nr_images[level] = nr_images; /* @@ -166,7 +165,7 @@ i915_displaytarget_layout(struct i915_texture *tex) if (pt->last_level > 0 || pt->cpp != 4) return 0; - i915_miptree_set_level_info( tex, 0, 1, 0, 0, + i915_miptree_set_level_info( tex, 0, 1, tex->base.width[0], tex->base.height[0], 1 ); @@ -230,7 +229,8 @@ i945_miptree_layout_2d( struct i915_texture *tex ) for (level = 0; level <= pt->last_level; level++) { unsigned img_height; - i915_miptree_set_level_info(tex, level, 1, x, y, width, height, 1); + i915_miptree_set_level_info(tex, level, 1, width, height, 1); + i915_miptree_set_image_offset(tex, level, 0, x, y); if (pt->compressed) img_height = MAX2(1, height/4); @@ -294,7 +294,7 @@ i945_miptree_layout_cube(struct i915_texture *tex) /* Set all the levels to effectively occupy the whole rectangular region. */ for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 6, 0, 0, lvlWidth, lvlHeight, 1); + i915_miptree_set_level_info(tex, level, 6, lvlWidth, lvlHeight, 1); lvlWidth /= 2; lvlHeight /= 2; } @@ -380,8 +380,6 @@ i915_miptree_layout(struct i915_texture * tex) for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 6, - 0, 0, - /*OLD: tex->pitch, tex->total_height,*/ lvlWidth, lvlHeight, 1); lvlWidth /= 2; @@ -416,7 +414,7 @@ i915_miptree_layout(struct i915_texture * tex) */ for (level = 0; level <= MAX2(8, pt->last_level); level++) { - i915_miptree_set_level_info(tex, level, depth, 0, tex->total_height, + i915_miptree_set_level_info(tex, level, depth, width, height, depth); @@ -458,8 +456,9 @@ i915_miptree_layout(struct i915_texture * tex) for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 1, - 0, tex->total_height, width, height, 1); + i915_miptree_set_image_offset(tex, level, 0, + 0, tex->total_height); if (pt->compressed) img_height = MAX2(1, height / 4); @@ -515,12 +514,11 @@ i945_miptree_layout(struct i915_texture * tex) unsigned q, j; i915_miptree_set_level_info(tex, level, nr_images, - 0, tex->total_height, width, height, depth); for (q = 0; q < nr_images;) { for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { - i915_miptree_set_image_offset(tex, level, q, x, y); + i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_height); x += pack_x_pitch; } @@ -648,15 +646,14 @@ i915_get_tex_surface(struct pipe_screen *screen, struct pipe_surface *ps; unsigned offset; /* in bytes */ - offset = tex->level_offset[level]; - if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face] * pt->cpp; + offset = tex->image_offset[level][face] * pt->cpp; } else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice] * pt->cpp; + offset = tex->image_offset[level][zslice] * pt->cpp; } else { + offset = tex->image_offset[level][0] * pt->cpp; assert(face == 0); assert(zslice == 0); } -- cgit v1.2.3 From 92d48a4cb9481adc7abd7cdf9550fbf5a9c9613d Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 23 Jun 2008 08:44:30 -0600 Subject: gallium: fix bad srcy coord if do_flip --- src/gallium/drivers/softpipe/sp_surface.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 29a1e92416e..acedebfcc58 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -35,7 +35,11 @@ -/* Assumes all values are within bounds -- no checking at this level - +/** + * Copy a rectangular region from one surface to another. + * Surfaces must have same bpp. + * + * Assumes all values are within bounds -- no checking at this level - * do it higher up if required. */ static void @@ -54,9 +58,11 @@ sp_surface_copy(struct pipe_context *pipe, src, PIPE_BUFFER_USAGE_CPU_READ ); - assert( dst->cpp == src->cpp ); - assert(src_map && dst_map); + assert(dst->cpp == src->cpp); + assert(src_map); + assert(dst_map); + /* If do_flip, invert src_y position and pass negative src stride */ pipe_copy_rect(dst_map, dst->cpp, dst->pitch, @@ -64,7 +70,7 @@ sp_surface_copy(struct pipe_context *pipe, width, height, src_map, do_flip ? -(int) src->pitch : src->pitch, - srcx, do_flip ? 1 - srcy - height : srcy); + srcx, do_flip ? src->height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); pipe->screen->surface_unmap(pipe->screen, dst); -- cgit v1.2.3 From f1601c2b75ca4c2223079f676cf796843b284df2 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 23 Jun 2008 08:51:00 -0600 Subject: gallium: fix bad srcy coord if do_flip --- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index a35db0ef991..4a098101e19 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -76,7 +76,7 @@ cell_surface_copy(struct pipe_context *pipe, width, height, pipe_surface_map(src), do_flip ? -src->pitch : src->pitch, - srcx, do_flip ? 1 - srcy - height : srcy); + srcx, do_flip ? height - 1 - srcy : srcy); pipe_surface_unmap(src); pipe_surface_unmap(dst); diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 98367ac0739..1729ea26f5d 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -66,7 +66,7 @@ i915_surface_copy(struct pipe_context *pipe, width, height, src_map, do_flip ? -(int) src->pitch : src->pitch, - srcx, do_flip ? 1 - srcy - height : srcy); + srcx, do_flip ? height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); pipe->screen->surface_unmap(pipe->screen, dst); diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 3e3736b2806..62d75bc251e 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -66,7 +66,7 @@ brw_surface_copy(struct pipe_context *pipe, width, height, src_map, do_flip ? -(int) src->pitch : src->pitch, - srcx, do_flip ? 1 - srcy - height : srcy); + srcx, do_flip ? height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); pipe->screen->surface_unmap(pipe->screen, dst); -- cgit v1.2.3 From e14126ec811e4f37cf085be27cac4f750d9e011a Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 23 Jun 2008 08:54:16 -0600 Subject: gallium: change surface_copy()'s do_flip to boolean --- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 2 +- src/gallium/include/pipe/p_context.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 4a098101e19..18f37919241 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -60,7 +60,7 @@ cell_surface_data(struct pipe_context *pipe, static void cell_surface_copy(struct pipe_context *pipe, - unsigned do_flip, + boolean do_flip, struct pipe_surface *dst, unsigned dstx, unsigned dsty, struct pipe_surface *src, diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 1729ea26f5d..cc55a0910ed 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -41,7 +41,7 @@ */ static void i915_surface_copy(struct pipe_context *pipe, - unsigned do_flip, + boolean do_flip, struct pipe_surface *dst, unsigned dstx, unsigned dsty, struct pipe_surface *src, diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 62d75bc251e..3d98a2bf199 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -41,7 +41,7 @@ */ static void brw_surface_copy(struct pipe_context *pipe, - unsigned do_flip, + boolean do_flip, struct pipe_surface *dst, unsigned dstx, unsigned dsty, struct pipe_surface *src, diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index acedebfcc58..9fd48aeccc5 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -44,7 +44,7 @@ */ static void sp_surface_copy(struct pipe_context *pipe, - unsigned do_flip, + boolean do_flip, struct pipe_surface *dst, unsigned dstx, unsigned dsty, struct pipe_surface *src, diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index faf112c6d61..2646706ff23 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -192,7 +192,7 @@ struct pipe_context { */ /*@{*/ void (*surface_copy)(struct pipe_context *pipe, - unsigned do_flip, /*<< flip surface contents vertically */ + boolean do_flip,/**< flip surface contents vertically */ struct pipe_surface *dest, unsigned destx, unsigned desty, struct pipe_surface *src, /* don't make this const - -- cgit v1.2.3 From f52ab4cc22bfb6708724f3c3966ce734d605cddd Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Mon, 23 Jun 2008 17:57:45 +0200 Subject: i915: Add render and texture support for tiled texture and buffers This is step towards tiled textures and buffer support for i915. But the tiled attribute is never set. --- src/gallium/drivers/i915simple/i915_context.h | 2 ++ src/gallium/drivers/i915simple/i915_state_emit.c | 12 ++++++------ src/gallium/drivers/i915simple/i915_state_sampler.c | 8 +++++++- src/gallium/drivers/i915simple/i915_texture.c | 1 + 4 files changed, 16 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 9e02f787147..5d411a66486 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -192,6 +192,8 @@ struct i915_texture { unsigned depth_pitch; /* per-image on i945? */ unsigned total_height; + unsigned tiled; + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; /* Explicitly store the offset of each image for each cube face or diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index bc801a82f03..19d968fd8b6 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -213,10 +213,10 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (cbuf_surface) { unsigned cpitch = (cbuf_surface->pitch * cbuf_surface->cpp); unsigned ctile = BUF_3D_USE_FENCE; -#if 0 - if (!((cpitch - 1) & cpitch) && cpitch >= 512) + if (cbuf_surface->texture && + ((struct i915_texture*)(cbuf_surface->texture))->tiled) { ctile = BUF_3D_TILED_SURFACE; -#endif + } OUT_BATCH(_3DSTATE_BUF_INFO_CMD); @@ -234,10 +234,10 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (depth_surface) { unsigned zpitch = (depth_surface->pitch * depth_surface->cpp); unsigned ztile = BUF_3D_USE_FENCE; -#if 0 - if (!((zpitch - 1) & zpitch) && zpitch >= 512) + if (depth_surface->texture && + ((struct i915_texture*)(depth_surface->texture))->tiled) { ztile = BUF_3D_TILED_SURFACE; -#endif + } OUT_BATCH(_3DSTATE_BUF_INFO_CMD); diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 24440843f33..379aff38460 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -234,6 +234,7 @@ i915_update_texture(struct i915_context *i915, const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; const uint num_levels = pt->last_level; unsigned max_lod = num_levels * 4; + unsigned tiled = MS3_USE_FENCE_REGS; assert(tex); assert(width); @@ -246,12 +247,17 @@ i915_update_texture(struct i915_context *i915, assert(format); assert(pitch); + if (tex->tiled) { + assert(!((pitch - 1) & pitch)); + tiled = MS3_TILED_SURFACE; + } + /* MS3 state */ state[0] = (((height - 1) << MS3_HEIGHT_SHIFT) | ((width - 1) << MS3_WIDTH_SHIFT) | format - | MS3_USE_FENCE_REGS); + | tiled); /* * XXX When min_filter != mag_filter and there's just one mipmap level, diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index d9b33df1d51..ae107c66762 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -174,6 +174,7 @@ i915_displaytarget_layout(struct i915_texture *tex) if (tex->base.width[0] >= 128) { tex->pitch = power_of_two(tex->base.width[0] * pt->cpp) / pt->cpp; tex->total_height = round_up(tex->base.height[0], 8); + tex->tiled = 1; } else { tex->pitch = round_up(tex->base.width[0], 64 / pt->cpp); tex->total_height = tex->base.height[0]; -- cgit v1.2.3 From 25da42a650048cd960c81af56744e5fdadd923ad Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 23 Jun 2008 11:27:44 -0600 Subject: gallium: in softpipe_get_tex_surface() use the pitch specified in the softpipe_texture object. Fixes a pitch/width mix-up. --- src/gallium/drivers/softpipe/sp_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index ef8c5bd6b0c..2ef17a220b4 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -198,7 +198,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = ps->width; + ps->pitch = spt->pitch[level]; ps->offset = spt->level_offset[level]; ps->usage = usage; -- cgit v1.2.3 From 8b72737a0ba33343673111261265c59546b408c6 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Mon, 23 Jun 2008 20:37:27 +0200 Subject: nv30: add state rasterizer, based on nv40 one --- src/gallium/drivers/nv30/Makefile | 1 + src/gallium/drivers/nv30/nv30_context.h | 7 +++++++ src/gallium/drivers/nv30/nv30_state_rasterizer.c | 17 +++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 src/gallium/drivers/nv30/nv30_state_rasterizer.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 04f53ee4565..c5208fabf5e 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -16,6 +16,7 @@ DRIVER_SOURCES = \ nv30_state_blend.c \ nv30_state_emit.c \ nv30_state_fb.c \ + nv30_state_rasterizer.c \ nv30_surface.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 1695ba5a017..43ee8d13a0c 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -76,6 +76,12 @@ enum nv30_state_index { #define NV30_NEW_ARRAYS (1 << 11) #define NV30_NEW_UCP (1 << 12) +/* TODO: rename when removing the old state emitter */ +struct nv30_rasterizer_state_new { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + /* TODO: rename when removing the old state emitter */ struct nv30_blend_state_new { struct pipe_blend_state pipe; @@ -111,6 +117,7 @@ struct nv30_context { struct nv30_blend_state_new *blend; struct pipe_blend_color blend_colour; struct pipe_framebuffer_state framebuffer; + struct nv30_rasterizer_state_new *rasterizer; uint32_t rt_enable; struct pipe_buffer *rt[2]; diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c new file mode 100644 index 00000000000..6d1b60e043d --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv30_context.h" + +static boolean +nv30_state_rasterizer_validate(struct nv30_context *nv30) +{ + so_ref(nv30->rasterizer->so, + &nv30->state.hw[NV30_STATE_RAST]); + return TRUE; +} + +struct nv30_state_entry nv30_state_rasterizer = { + .validate = nv30_state_rasterizer_validate, + .dirty = { + .pipe = NV30_NEW_RAST, + .hw = NV30_STATE_RAST + } +}; -- cgit v1.2.3 From e5bbb18441f34824dc4f9f857b30c71c4ff6466f Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Mon, 23 Jun 2008 20:43:22 +0200 Subject: nv30: add state scissor, based on nv40 one --- src/gallium/drivers/nv30/Makefile | 1 + src/gallium/drivers/nv30/nv30_context.h | 3 +++ src/gallium/drivers/nv30/nv30_state_scissor.c | 35 +++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 src/gallium/drivers/nv30/nv30_state_scissor.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index c5208fabf5e..134130c0e00 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -17,6 +17,7 @@ DRIVER_SOURCES = \ nv30_state_emit.c \ nv30_state_fb.c \ nv30_state_rasterizer.c \ + nv30_state_scissor.c \ nv30_surface.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 43ee8d13a0c..428f17ae0d6 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -90,6 +90,8 @@ struct nv30_blend_state_new { struct nv30_state { + unsigned scissor_enabled; + struct nouveau_stateobj *hw[NV30_STATE_MAX]; }; @@ -104,6 +106,7 @@ struct nv30_context { /* HW state derived from pipe states */ struct nv30_state state; + struct pipe_scissor_state scissor; uint32_t dirty; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c new file mode 100644 index 00000000000..1db9bc17955 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv30_context.h" + +static boolean +nv30_state_scissor_validate(struct nv30_context *nv30) +{ + struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; + struct pipe_scissor_state *s = &nv30->scissor; + struct nouveau_stateobj *so; + + if (nv30->state.hw[NV30_STATE_SCISSOR] && + (rast->scissor == 0 && nv30->state.scissor_enabled == 0)) + return FALSE; + nv30->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); + if (nv30->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]); + return TRUE; +} + +struct nv30_state_entry nv30_state_scissor = { + .validate = nv30_state_scissor_validate, + .dirty = { + .pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST, + .hw = NV30_STATE_SCISSOR + } +}; -- cgit v1.2.3 From d4bc56ca49ef39f9f083a2e5adeb3e89ca3bf538 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Mon, 23 Jun 2008 21:13:27 +0200 Subject: nv30: add state stipple, based on nv40 one --- src/gallium/drivers/nv30/Makefile | 1 + src/gallium/drivers/nv30/nv30_context.h | 2 ++ src/gallium/drivers/nv30/nv30_state_stipple.c | 39 +++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 src/gallium/drivers/nv30/nv30_state_stipple.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 134130c0e00..91601087a34 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -18,6 +18,7 @@ DRIVER_SOURCES = \ nv30_state_fb.c \ nv30_state_rasterizer.c \ nv30_state_scissor.c \ + nv30_state_stipple.c \ nv30_surface.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 428f17ae0d6..446e43a726d 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -91,6 +91,7 @@ struct nv30_blend_state_new { struct nv30_state { unsigned scissor_enabled; + unsigned stipple_enabled; struct nouveau_stateobj *hw[NV30_STATE_MAX]; }; @@ -121,6 +122,7 @@ struct nv30_context { struct pipe_blend_color blend_colour; struct pipe_framebuffer_state framebuffer; struct nv30_rasterizer_state_new *rasterizer; + unsigned stipple[32]; uint32_t rt_enable; struct pipe_buffer *rt[2]; diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c new file mode 100644 index 00000000000..41b42813b49 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv30_context.h" + +static boolean +nv30_state_stipple_validate(struct nv30_context *nv30) +{ + struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nouveau_stateobj *so; + + if (nv30->state.hw[NV30_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv30->stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]); + return TRUE; +} + +struct nv30_state_entry nv30_state_stipple = { + .validate = nv30_state_stipple_validate, + .dirty = { + .pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST, + .hw = NV30_STATE_STIPPLE, + } +}; -- cgit v1.2.3 From 0e1b36b0b257bfba4427a1e6e12c918e744b9977 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Mon, 23 Jun 2008 21:37:41 +0200 Subject: nv30: add state viewport, based on nv40 one --- src/gallium/drivers/nv30/Makefile | 1 + src/gallium/drivers/nv30/nv30_context.h | 2 + src/gallium/drivers/nv30/nv30_state_viewport.c | 70 ++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 src/gallium/drivers/nv30/nv30_state_viewport.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 91601087a34..a6a8cf1df9c 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -19,6 +19,7 @@ DRIVER_SOURCES = \ nv30_state_rasterizer.c \ nv30_state_scissor.c \ nv30_state_stipple.c \ + nv30_state_viewport.c \ nv30_surface.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 446e43a726d..eb7e5a8b124 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -92,6 +92,7 @@ struct nv30_blend_state_new { struct nv30_state { unsigned scissor_enabled; unsigned stipple_enabled; + unsigned viewport_bypass; struct nouveau_stateobj *hw[NV30_STATE_MAX]; }; @@ -120,6 +121,7 @@ struct nv30_context { /* Context state */ struct nv30_blend_state_new *blend; struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; struct nv30_rasterizer_state_new *rasterizer; unsigned stipple[32]; diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c new file mode 100644 index 00000000000..951d40ebfdd --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -0,0 +1,70 @@ +#include "nv30_context.h" + +static boolean +nv30_state_viewport_validate(struct nv30_context *nv30) +{ + struct pipe_viewport_state *vpt = &nv30->viewport; + struct nouveau_stateobj *so; + unsigned bypass; + + if (/*nv30->render_mode == HW &&*/ !nv30->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv30->state.hw[NV30_STATE_VIEWPORT] && + (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) && + nv30->state.viewport_bypass == bypass) + return FALSE; + nv30->state.viewport_bypass = bypass; + + so = so_new(11, 0); + if (!bypass) { + so_method(so, nv30->screen->rankine, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); +/* so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 1); +*/ } else { + so_method(so, nv30->screen->rankine, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(0.0)); + /* Not entirely certain what this is yet. The DDX uses this + * value also as it fixes rendering when you pass + * pre-transformed vertices to the GPU. My best gusss is that + * this bypasses some culling/clipping stage. Might be worth + * noting that points/lines are uneffected by whatever this + * value fixes, only filled polygons are effected. + */ +/* so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 0x110); +*/ } + /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */ + so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 1); + + so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]); + return TRUE; +} + +struct nv30_state_entry nv30_state_viewport = { + .validate = nv30_state_viewport_validate, + .dirty = { + .pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST, + .hw = NV30_STATE_VIEWPORT + } +}; -- cgit v1.2.3 From 95fe122f67024f55d555e2816a95409a8b53a49e Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Mon, 23 Jun 2008 21:46:51 +0200 Subject: nv30: add state zsa, based on nv40 one --- src/gallium/drivers/nv30/Makefile | 1 + src/gallium/drivers/nv30/nv30_context.h | 6 ++++++ src/gallium/drivers/nv30/nv30_state_zsa.c | 17 +++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 src/gallium/drivers/nv30/nv30_state_zsa.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index a6a8cf1df9c..69f2790dfe2 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -20,6 +20,7 @@ DRIVER_SOURCES = \ nv30_state_scissor.c \ nv30_state_stipple.c \ nv30_state_viewport.c \ + nv30_state_zsa.c \ nv30_surface.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index eb7e5a8b124..dce077b2b4b 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -82,6 +82,11 @@ struct nv30_rasterizer_state_new { struct nouveau_stateobj *so; }; +struct nv30_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + /* TODO: rename when removing the old state emitter */ struct nv30_blend_state_new { struct pipe_blend_state pipe; @@ -124,6 +129,7 @@ struct nv30_context { struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; struct nv30_rasterizer_state_new *rasterizer; + struct nv30_zsa_state *zsa; unsigned stipple[32]; uint32_t rt_enable; diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c new file mode 100644 index 00000000000..0940b7269b2 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv30_context.h" + +static boolean +nv30_state_zsa_validate(struct nv30_context *nv30) +{ + so_ref(nv30->zsa->so, + &nv30->state.hw[NV30_STATE_ZSA]); + return TRUE; +} + +struct nv30_state_entry nv30_state_zsa = { + .validate = nv30_state_zsa_validate, + .dirty = { + .pipe = NV30_NEW_ZSA, + .hw = NV30_STATE_ZSA + } +}; -- cgit v1.2.3 From 19dad109bb7b271e0bb4b55e1374c299770b107e Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Tue, 24 Jun 2008 14:19:30 +0200 Subject: i915: Added texture_blanket function --- src/gallium/drivers/i915simple/i915_texture.c | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index ae107c66762..9d8a6d6de44 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -677,6 +677,39 @@ i915_get_tex_surface(struct pipe_screen *screen, return ps; } +static struct pipe_texture * +i915_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *pitch, + struct pipe_buffer *buffer) +{ + struct i915_texture *tex; + assert(screen); + assert(templat); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + tex = CALLOC_STRUCT(i915_texture); + if (!tex) + return NULL; + + tex->base = *base; + + tex->pitch = pitch[0]; + + i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + pipe_buffer_reference(screen->winsys, &tex->buffer, buffer); + + return &tex->base; +} + void i915_init_texture_functions(struct i915_context *i915) { @@ -713,5 +746,6 @@ i915_init_screen_texture_functions(struct pipe_screen *screen) screen->texture_create = i915_texture_create; screen->texture_release = i915_texture_release; screen->get_tex_surface = i915_get_tex_surface; + screen->texture_blanket = i915_texture_blanket; screen->tex_surface_release = i915_tex_surface_release; } -- cgit v1.2.3 From 2b692335101d248106b4e1c2d116a1f95712fd25 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 24 Jun 2008 08:45:59 -0600 Subject: gallium: remove bad assertion --- src/gallium/drivers/i915simple/i915_texture.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 9d8a6d6de44..b2e490c7db1 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -685,7 +685,6 @@ i915_texture_blanket(struct pipe_screen * screen, { struct i915_texture *tex; assert(screen); - assert(templat); /* Only supports one type */ if (base->target != PIPE_TEXTURE_2D || -- cgit v1.2.3 From b831aa06dc0d099185bcaa180683ad10942feaa0 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Tue, 24 Jun 2008 21:04:37 +0200 Subject: nv30: add context value --- src/gallium/drivers/nv30/nv30_screen.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h index 816ece94c4a..b7ddc2a9594 100644 --- a/src/gallium/drivers/nv30/nv30_screen.h +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -8,6 +8,8 @@ struct nv30_screen { struct nouveau_winsys *nvws; + unsigned cur_pctx; + /* HW graphics objects */ struct nouveau_grobj *rankine; struct nouveau_notifier *sync; -- cgit v1.2.3 From 6106db4c5da6fc5ae9ef157c939ce0834cdc5b92 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 27 Jun 2008 13:01:37 +0900 Subject: scons: Fix i965/xlib build. --- src/gallium/drivers/i965simple/SConscript | 1 + src/gallium/winsys/xlib/SConscript | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript index c0825c4de37..43fc2a40052 100644 --- a/src/gallium/drivers/i965simple/SConscript +++ b/src/gallium/drivers/i965simple/SConscript @@ -26,6 +26,7 @@ i965simple = env.ConvenienceLibrary( 'brw_gs_emit.c', 'brw_gs_state.c', 'brw_misc_state.c', + 'brw_screen.c', 'brw_sf.c', 'brw_sf_emit.c', 'brw_sf_state.c', diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index 5e98a36abc2..14a85ae0f2b 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -3,7 +3,11 @@ Import('*') -if env['platform'] == 'linux' and 'mesa' in env['statetrackers'] and not env['dri']: +if env['platform'] == 'linux' \ + and 'mesa' in env['statetrackers'] \ + and 'softpipe' in env['drivers'] \ + and 'i965simple' in env['drivers'] \ + and not env['dri']: env = env.Clone() @@ -24,6 +28,7 @@ if env['platform'] == 'linux' and 'mesa' in env['statetrackers'] and not env['dr drivers = [ softpipe, + i965simple ] # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions -- cgit v1.2.3 From 4ddd65967915ca4846f2831bc676c878a29dae4a Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 27 Jun 2008 19:37:56 +0900 Subject: gallium: Drop pipe_texture->cpp and pipe_surface->cpp. The chars-per-pixel concept falls apart with compressed and yuv images, where more than one pixel are coded in a single data block. --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 6 +- src/gallium/auxiliary/util/p_tile.c | 98 ++++------- src/gallium/auxiliary/util/p_util.c | 100 +++++++++-- src/gallium/auxiliary/util/u_blit.c | 2 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 8 +- src/gallium/drivers/i915simple/i915_blit.c | 2 - src/gallium/drivers/i915simple/i915_context.h | 6 +- src/gallium/drivers/i915simple/i915_state_emit.c | 4 +- .../drivers/i915simple/i915_state_sampler.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 68 ++----- src/gallium/drivers/i915simple/i915_texture.c | 196 +++++++++++---------- src/gallium/drivers/i965simple/brw_context.h | 4 +- src/gallium/drivers/i965simple/brw_misc_state.c | 8 +- src/gallium/drivers/i965simple/brw_surface.c | 69 ++------ src/gallium/drivers/i965simple/brw_tex_layout.c | 117 ++++++------ .../drivers/i965simple/brw_wm_surface_state.c | 6 +- src/gallium/drivers/softpipe/sp_surface.c | 45 ++--- src/gallium/drivers/softpipe/sp_texture.c | 20 ++- src/gallium/drivers/softpipe/sp_texture.h | 2 +- src/gallium/include/pipe/p_format.h | 41 +++++ src/gallium/include/pipe/p_state.h | 11 +- src/gallium/include/pipe/p_util.h | 15 +- src/gallium/winsys/xlib/brw_aub.c | 9 +- src/gallium/winsys/xlib/xm_winsys.c | 24 +-- src/gallium/winsys/xlib/xm_winsys_aub.c | 31 ++-- src/mesa/state_tracker/st_cb_accum.c | 24 +-- src/mesa/state_tracker/st_cb_bitmap.c | 4 +- src/mesa/state_tracker/st_cb_drawpixels.c | 11 +- src/mesa/state_tracker/st_cb_fbo.c | 8 +- src/mesa/state_tracker/st_cb_readpixels.c | 6 +- src/mesa/state_tracker/st_cb_texture.c | 17 +- src/mesa/state_tracker/st_gen_mipmap.c | 4 +- src/mesa/state_tracker/st_texture.c | 26 ++- 34 files changed, 513 insertions(+), 485 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index ecdebca5f12..3dd7ee19fd1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -398,7 +398,7 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; texTemp.depth[0] = 1; - texTemp.cpp = 1; + pf_get_block(texTemp.format, &texTemp.block); aaline->texture = screen->texture_create(screen, &texTemp); if (!aaline->texture) @@ -439,7 +439,7 @@ aaline_create_texture(struct aaline_stage *aaline) else { d = 255; } - data[i * surface->pitch + j] = d; + data[i * surface->stride + j] = d; } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 4087cf7a491..1f63f943656 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -394,11 +394,11 @@ pstip_update_texture(struct pstip_stage *pstip) for (j = 0; j < 32; j++) { if (stipple[i] & (bit31 >> j)) { /* fragment "on" */ - data[i * surface->pitch + j] = 0; + data[i * surface->stride + j] = 0; } else { /* fragment "off" */ - data[i * surface->pitch + j] = 255; + data[i * surface->stride + j] = 255; } } } @@ -426,7 +426,7 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.width[0] = 32; texTemp.height[0] = 32; texTemp.depth[0] = 1; - texTemp.cpp = 1; + pf_get_block(texTemp.format, &texTemp.block); pstip->texture = screen->texture_create(screen, &texTemp); if (pstip->texture == NULL) diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 5728757d2fb..ab603ff6e4d 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -48,34 +48,23 @@ void pipe_get_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, - void *p, int dst_stride) + void *dst, int dst_stride) { struct pipe_screen *screen = pipe->screen; - const uint cpp = ps->cpp; - const ubyte *pSrc; - const uint src_stride = ps->pitch * cpp; - ubyte *pDest; - uint i; - - if (dst_stride == 0) { - dst_stride = w * cpp; - } + const void *src; if (pipe_clip_tile(x, y, &w, &h, ps)) return; - pSrc = (const ubyte *) screen->surface_map(screen, ps, - PIPE_BUFFER_USAGE_CPU_READ); - assert(pSrc); /* XXX: proper error handling! */ + if (dst_stride == 0) + dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - pSrc += (y * ps->pitch + x) * cpp; - pDest = (ubyte *) p; + src = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + assert(src); + if(!src) + return; - for (i = 0; i < h; i++) { - memcpy(pDest, pSrc, w * cpp); - pDest += dst_stride; - pSrc += src_stride; - } + pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); screen->surface_unmap(screen, ps); } @@ -89,34 +78,23 @@ void pipe_put_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, - const void *p, int src_stride) + const void *src, int src_stride) { struct pipe_screen *screen = pipe->screen; - const uint cpp = ps->cpp; - const ubyte *pSrc; - const uint dst_stride = ps->pitch * cpp; - ubyte *pDest; - uint i; - - if (src_stride == 0) { - src_stride = w * cpp; - } + void *dst; if (pipe_clip_tile(x, y, &w, &h, ps)) return; - pSrc = (const ubyte *) p; + if (src_stride == 0) + src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - pDest = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); - assert(pDest); /* XXX: proper error handling */ - - pDest += (y * ps->pitch + x) * cpp; + dst = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(dst); + if(!dst) + return; - for (i = 0; i < h; i++) { - memcpy(pDest, pSrc, w * cpp); - pDest += dst_stride; - pSrc += src_stride; - } + pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); screen->surface_unmap(screen, ps); } @@ -692,12 +670,12 @@ pipe_get_tile_rgba(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - packed = MALLOC(h * w * ps->cpp); + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); if (!packed) return; - pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp); + pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, 0); switch (ps->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -774,7 +752,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - packed = MALLOC(h * w * ps->cpp); + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); if (!packed) return; @@ -829,7 +807,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe, assert(0); } - pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp); + pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, 0); FREE(packed); } @@ -846,14 +824,14 @@ pipe_get_tile_z(struct pipe_context *pipe, { struct pipe_screen *screen = pipe->screen; const uint dstStride = w; - void *map; + ubyte *map; uint *pDest = z; uint i, j; if (pipe_clip_tile(x, y, &w, &h, ps)) return; - map = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + map = (ubyte *)screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); if (!map) { assert(0); return; @@ -863,11 +841,11 @@ pipe_get_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_Z32_UNORM: { const uint *pSrc - = (const uint *)map + (y * ps->pitch + x); + = (const uint *)(map + y * ps->stride + x*4); for (i = 0; i < h; i++) { memcpy(pDest, pSrc, 4 * w); pDest += dstStride; - pSrc += ps->pitch; + pSrc += ps->stride/4; } } break; @@ -875,28 +853,28 @@ pipe_get_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_X8Z24_UNORM: { const uint *pSrc - = (const uint *)map + (y * ps->pitch + x); + = (const uint *)(map + y * ps->stride + x*4); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 24-bit Z to 32-bit Z */ pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); } pDest += dstStride; - pSrc += ps->pitch; + pSrc += ps->stride/4; } } break; case PIPE_FORMAT_Z16_UNORM: { const ushort *pSrc - = (const ushort *)map + (y * ps->pitch + x); + = (const ushort *)(map + y * ps->stride + x*2); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 16-bit Z to 32-bit Z */ pDest[j] = (pSrc[j] << 16) | pSrc[j]; } pDest += dstStride; - pSrc += ps->pitch; + pSrc += ps->stride/2; } } break; @@ -917,13 +895,13 @@ pipe_put_tile_z(struct pipe_context *pipe, struct pipe_screen *screen = pipe->screen; const uint srcStride = w; const uint *pSrc = zSrc; - void *map; + ubyte *map; uint i, j; if (pipe_clip_tile(x, y, &w, &h, ps)) return; - map = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + map = (ubyte *)screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); if (!map) { assert(0); return; @@ -932,10 +910,10 @@ pipe_put_tile_z(struct pipe_context *pipe, switch (ps->format) { case PIPE_FORMAT_Z32_UNORM: { - uint *pDest = (uint *) map + (y * ps->pitch + x); + uint *pDest = (uint *) (map + y * ps->stride + x*4); for (i = 0; i < h; i++) { memcpy(pDest, pSrc, 4 * w); - pDest += ps->pitch; + pDest += ps->stride/4; pSrc += srcStride; } } @@ -943,26 +921,26 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: { - uint *pDest = (uint *) map + (y * ps->pitch + x); + uint *pDest = (uint *) (map + y * ps->stride + x*4); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z (0 stencil) */ pDest[j] = pSrc[j] >> 8; } - pDest += ps->pitch; + pDest += ps->stride/4; pSrc += srcStride; } } break; case PIPE_FORMAT_Z16_UNORM: { - ushort *pDest = (ushort *) map + (y * ps->pitch + x); + ushort *pDest = (ushort *) (map + y * ps->stride + x*2); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 16-bit Z */ pDest[j] = pSrc[j] >> 16; } - pDest += ps->pitch; + pDest += ps->stride/2; pSrc += srcStride; } } diff --git a/src/gallium/auxiliary/util/p_util.c b/src/gallium/auxiliary/util/p_util.c index 4e60b1b8418..271be4edf14 100644 --- a/src/gallium/auxiliary/util/p_util.c +++ b/src/gallium/auxiliary/util/p_util.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "pipe/p_format.h" /** @@ -41,42 +42,109 @@ */ void pipe_copy_rect(ubyte * dst, - unsigned cpp, - unsigned dst_pitch, + const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, const ubyte * src, - int src_pitch, + int src_stride, unsigned src_x, int src_y) { unsigned i; - int src_pitch_pos = src_pitch < 0 ? -src_pitch : src_pitch; + int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; - assert(cpp > 0); + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); assert(src_x >= 0); assert(src_y >= 0); assert(dst_x >= 0); assert(dst_y >= 0); - dst_pitch *= cpp; - src_pitch *= cpp; - src_pitch_pos *= cpp; - dst += dst_x * cpp; - src += src_x * cpp; - dst += dst_y * dst_pitch; - src += src_y * src_pitch_pos; - width *= cpp; + dst_x /= block->width; + dst_y /= block->height; + width = (width + block->width - 1)/block->width; + height = (height + block->height - 1)/block->height; + src_x /= block->width; + src_y /= block->height; + + dst += dst_x * block->size; + src += src_x * block->size; + dst += dst_y * dst_stride; + src += src_y * src_stride_pos; + width *= block->size; - if (width == dst_pitch && width == src_pitch) + if (width == dst_stride && width == src_stride) memcpy(dst, src, height * width); else { for (i = 0; i < height; i++) { memcpy(dst, src, width); - dst += dst_pitch; - src += src_pitch; + dst += dst_stride; + src += src_stride; } } } + +void +pipe_fill_rect(ubyte * dst, + const struct pipe_format_block *block, + unsigned dst_stride, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + uint32_t value) +{ + unsigned i, j; + unsigned width_size; + + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); + assert(dst_x >= 0); + assert(dst_y >= 0); + + dst_x /= block->width; + dst_y /= block->height; + width = (width + block->width - 1)/block->width; + height = (height + block->height - 1)/block->height; + + dst += dst_x * block->size; + dst += dst_y * dst_stride; + width_size = width * block->size; + + switch (block->size) { + case 1: + if(dst_stride == width_size) + memset(dst, (ubyte) value, height * width_size); + else { + for (i = 0; i < height; i++) { + memset(dst, (ubyte) value, width_size); + dst += dst_stride; + } + } + break; + case 2: + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) value; + dst += dst_stride; + } + break; + case 4: + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) + *row++ = value; + dst += dst_stride; + } + break; + default: + assert(0); + break; + } +} diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 6555dcd5882..ae779335dc0 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -324,7 +324,7 @@ util_blit_pixels(struct blit_state *ctx, texTemp.height[0] = srcH; texTemp.depth[0] = 1; texTemp.compressed = 0; - texTemp.cpp = pf_get_size(src->format); + pf_get_block(src->format, &texTemp.block); tex = screen->texture_create(screen, &texTemp); if (!tex) diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 7d71aefda9d..5313a8008a9 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -625,7 +625,9 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, struct pipe_winsys *winsys = pipe->winsys; const uint zslice = 0; uint dstLevel; - const int bpt = pf_get_size(pt->format); + + assert(pt->block.width == 1); + assert(pt->block.height == 1); for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; @@ -646,9 +648,9 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, reduce_2d(pt->format, srcSurf->width, srcSurf->height, - srcSurf->pitch * bpt, srcMap, + srcSurf->stride, srcMap, dstSurf->width, dstSurf->height, - dstSurf->pitch * bpt, dstMap); + dstSurf->stride, dstMap); winsys->buffer_unmap(winsys, srcSurf->buffer); winsys->buffer_unmap(winsys, dstSurf->buffer); diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c index 22f91fab926..45fae4c9995 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -47,8 +47,6 @@ i915_fill_blit(struct i915_context *i915, { unsigned BR13, CMD; - dst_pitch *= (short) cpp; - switch (cpp) { case 1: case 2: diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 5d411a66486..c8db4f608c5 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -188,9 +188,9 @@ struct i915_texture { /* Derived from the above: */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; + unsigned stride; + unsigned depth_stride; /* per-image on i945? */ + unsigned total_nblocksy; unsigned tiled; diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 19d968fd8b6..9bd6f92323d 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -211,7 +211,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; if (cbuf_surface) { - unsigned cpitch = (cbuf_surface->pitch * cbuf_surface->cpp); + unsigned cpitch = cbuf_surface->stride; unsigned ctile = BUF_3D_USE_FENCE; if (cbuf_surface->texture && ((struct i915_texture*)(cbuf_surface->texture))->tiled) { @@ -232,7 +232,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* What happens if no zbuf?? */ if (depth_surface) { - unsigned zpitch = (depth_surface->pitch * depth_surface->cpp); + unsigned zpitch = depth_surface->stride; unsigned ztile = BUF_3D_USE_FENCE; if (depth_surface->texture && ((struct i915_texture*)(depth_surface->texture))->tiled) { diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 379aff38460..7868f21ca6a 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -242,7 +242,7 @@ i915_update_texture(struct i915_context *i915, assert(depth); format = translate_texture_format(pt->format); - pitch = tex->pitch * pt->cpp; + pitch = tex->stride; assert(format); assert(pitch); diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index cc55a0910ed..0061b22f268 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -48,7 +48,9 @@ i915_surface_copy(struct pipe_context *pipe, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { assert( dst != src ); - assert( dst->cpp == src->cpp ); + assert( dst->block.size == src->block.size ); + assert( dst->block.width == src->block.height ); + assert( dst->block.height == src->block.height ); if (0) { void *dst_map = pipe->screen->surface_map( pipe->screen, @@ -60,38 +62,30 @@ i915_surface_copy(struct pipe_context *pipe, PIPE_BUFFER_USAGE_CPU_READ ); pipe_copy_rect(dst_map, - dst->cpp, - dst->pitch, + &dst->block, + dst->stride, dstx, dsty, width, height, src_map, - do_flip ? -(int) src->pitch : src->pitch, + do_flip ? -(int) src->stride : src->stride, srcx, do_flip ? height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); pipe->screen->surface_unmap(pipe->screen, dst); } else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); i915_copy_blit( i915_context(pipe), do_flip, - dst->cpp, - (short) src->pitch, src->buffer, src->offset, - (short) dst->pitch, dst->buffer, dst->offset, + dst->block.size, + (short) src->stride/src->block.size, src->buffer, src->offset, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); } } -/* Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; -} - - static void i915_surface_fill(struct pipe_context *pipe, struct pipe_surface *dst, @@ -99,50 +93,20 @@ i915_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { if (0) { - unsigned i, j; void *dst_map = pipe->screen->surface_map( pipe->screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); - - switch (dst->cpp) { - case 1: { - ubyte *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - memset(row, value, width); - row += dst->pitch; - } - } - break; - case 2: { - ushort *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = (ushort) value; - row += dst->pitch; - } - } - break; - case 4: { - unsigned *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = value; - row += dst->pitch; - } - } - break; - default: - assert(0); - break; - } + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); pipe->screen->surface_unmap(pipe->screen, dst); } else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); i915_fill_blit( i915_context(pipe), - dst->cpp, - (short) dst->pitch, + dst->block.size, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, (short) dstx, (short) dsty, (short) width, (short) height, diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index b2e490c7db1..2815e61345c 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -109,6 +109,9 @@ i915_miptree_set_level_info(struct i915_texture *tex, pt->width[level] = w; pt->height[level] = h; pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); tex->nr_images[level] = nr_images; @@ -140,7 +143,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex, assert(img < tex->nr_images[level]); - tex->image_offset[level][img] = (x + y * tex->pitch); + tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; /* printf("%s level %d img %d pos %d,%d image_offset %x\n", @@ -162,7 +165,7 @@ i915_displaytarget_layout(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - if (pt->last_level > 0 || pt->cpp != 4) + if (pt->last_level > 0 || pt->block.size != 4) return 0; i915_miptree_set_level_info( tex, 0, 1, @@ -172,18 +175,18 @@ i915_displaytarget_layout(struct i915_texture *tex) i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); if (tex->base.width[0] >= 128) { - tex->pitch = power_of_two(tex->base.width[0] * pt->cpp) / pt->cpp; - tex->total_height = round_up(tex->base.height[0], 8); + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); tex->tiled = 1; } else { - tex->pitch = round_up(tex->base.width[0], 64 / pt->cpp); - tex->total_height = tex->base.height[0]; + tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64); + tex->total_nblocksy = tex->base.nblocksy[0]; } /* printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->cpp, - tex->pitch, tex->total_height, tex->pitch * tex->total_height * 4); + tex->base.width[0], tex->base.height[0], pt->block.size, + tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); */ return 1; @@ -193,12 +196,14 @@ static void i945_miptree_layout_2d( struct i915_texture *tex ) { struct pipe_texture *pt = &tex->base; - int align_h = 2, align_w = 4; + const int align_x = 2, align_y = 4; unsigned level; unsigned x = 0; unsigned y = 0; unsigned width = pt->width[0]; unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; #if 0 /* used for tiled display targets */ if (pt->last_level == 0 && pt->cpp == 4) @@ -206,7 +211,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) return; #endif - tex->pitch = pt->width[0]; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); /* May need to adjust pitch to accomodate the placement of * the 2nd mipmap level. This occurs when the alignment @@ -214,47 +219,43 @@ i945_miptree_layout_2d( struct i915_texture *tex ) * 2nd mipmap level out past the width of its parent. */ if (pt->last_level > 0) { - unsigned mip1_width = align_int(minify(pt->width[0]), align_w) - + minify(minify(pt->width[0])); + unsigned mip1_nblocksx + = align_int(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); - if (mip1_width > pt->width[0]) - tex->pitch = mip1_width; + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; } - /* Pitch must be a whole number of dwords, even though we - * express it in texels. + /* Pitch must be a whole number of dwords */ - tex->pitch = align_int(tex->pitch * pt->cpp, 64) / pt->cpp; - tex->total_height = 0; + tex->stride = align_int(tex->stride, 64); + tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { - unsigned img_height; - i915_miptree_set_level_info(tex, level, 1, width, height, 1); i915_miptree_set_image_offset(tex, level, 0, x, y); - if (pt->compressed) - img_height = MAX2(1, height/4); - else - img_height = align_int(height, align_h); - + nblocksy = align_int(nblocksy, align_y); /* Because the images are packed better, the final offset * might not be the maximal one: */ - tex->total_height = MAX2(tex->total_height, y + img_height); + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); /* Layout_below: step right after second mipmap level. */ if (level == 1) { - x += align_int(width, align_w); + x += align_int(nblocksx, align_x); } else { - y += img_height; + y += nblocksy; } width = minify(width); height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } } @@ -264,15 +265,16 @@ i945_miptree_layout_cube(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; unsigned level; - const unsigned dim = pt->width[0]; + const unsigned nblocks = pt->nblocksx[0]; unsigned face; - unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; /* printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); */ - assert(lvlWidth == lvlHeight); /* cubemap images are square */ + assert(width == height); /* cubemap images are square */ /* * XXX Should only be used for compressed formats. But lets @@ -282,35 +284,32 @@ i945_miptree_layout_cube(struct i915_texture *tex) * determined either by the old-style packing of cubemap faces, * or the final row of 4x4, 2x2 and 1x1 faces below this. */ - if (dim > 32) - tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; + if (nblocks > 32) + tex->stride = round_up(nblocks * pt->block.size * 2, 4); else - tex->pitch = 14 * 8; + tex->stride = 14 * 8 * pt->block.size; - /* - * XXX The 4 is only needed for compressed formats. See above. - */ - tex->total_height = dim * 4 + 4; + tex->total_nblocksy = nblocks * 4; /* Set all the levels to effectively occupy the whole rectangular region. */ for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 6, lvlWidth, lvlHeight, 1); - lvlWidth /= 2; - lvlHeight /= 2; + i915_miptree_set_level_info(tex, level, 6, width, height, 1); + width /= 2; + height /= 2; } for (face = 0; face < 6; face++) { - unsigned x = initial_offsets[face][0] * dim; - unsigned y = initial_offsets[face][1] * dim; - unsigned d = dim; + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; #if 0 /* Fix and enable this code for compressed formats */ - if (dim == 4 && face >= 4) { + if (nblocks == 4 && face >= 4) { y = tex->total_height - 4; x = (face - 4) * 8; } - else if (dim < 4 && (face > 0)) { + else if (nblocks < 4 && (face > 0)) { y = tex->total_height - 4; x = face * 8; } @@ -369,28 +368,28 @@ i915_miptree_layout(struct i915_texture * tex) switch (pt->target) { case PIPE_TEXTURE_CUBE: { - const unsigned dim = pt->width[0]; + const unsigned nblocks = pt->nblocksx[0]; unsigned face; - unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + unsigned width = pt->width[0], height = pt->height[0]; - assert(lvlWidth == lvlHeight); /* cubemap images are square */ + assert(width == height); /* cubemap images are square */ /* double pitch for cube layouts */ - tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; - tex->total_height = dim * 4; + tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->total_nblocksy = nblocks * 4; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 6, - lvlWidth, lvlHeight, + width, height, 1); - lvlWidth /= 2; - lvlHeight /= 2; + width /= 2; + height /= 2; } for (face = 0; face < 6; face++) { - unsigned x = initial_offsets[face][0] * dim; - unsigned y = initial_offsets[face][1] * dim; - unsigned d = dim; + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_image_offset(tex, level, face, x, y); @@ -405,25 +404,29 @@ i915_miptree_layout(struct i915_texture * tex) unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; - unsigned stack_height = 0; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned stack_nblocksy = 0; /* Calculate the size of a single slice. */ - tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); /* XXX: hardware expects/requires 9 levels at minimum. */ for (level = 0; level <= MAX2(8, pt->last_level); level++) { i915_miptree_set_level_info(tex, level, depth, - width, height, depth); + width, height, depth); - stack_height += MAX2(2, height); + stack_nblocksy += MAX2(2, nblocksy); width = minify(width); height = minify(height); depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } /* Fixup depth image_offsets: @@ -433,7 +436,7 @@ i915_miptree_layout(struct i915_texture * tex) unsigned i; for (i = 0; i < depth; i++) i915_miptree_set_image_offset(tex, level, i, - 0, i * stack_height); + 0, i * stack_nblocksy); depth = minify(depth); } @@ -443,33 +446,33 @@ i915_miptree_layout(struct i915_texture * tex) * remarkable how wasteful of memory the i915 texture layouts * are. They are largely fixed in the i945. */ - tex->total_height = stack_height * pt->depth[0]; + tex->total_nblocksy = stack_nblocksy * pt->depth[0]; break; } default:{ unsigned width = pt->width[0]; unsigned height = pt->height[0]; - unsigned img_height; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; - tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; - tex->total_height = 0; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 1, - width, height, 1); + width, height, 1); i915_miptree_set_image_offset(tex, level, 0, - 0, tex->total_height); + 0, tex->total_nblocksy); - if (pt->compressed) - img_height = MAX2(1, height / 4); - else - img_height = (MAX2(2, height) + 1) & ~1; + nblocksy = round_up(MAX2(2, nblocksy), 2); - tex->total_height += img_height; + tex->total_nblocksy += nblocksy; width = minify(width); height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } break; } @@ -477,7 +480,7 @@ i915_miptree_layout(struct i915_texture * tex) /* DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, tex->pitch, - tex->total_height, pt->cpp, tex->pitch * tex->total_height * pt->cpp); + tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); */ return TRUE; @@ -498,14 +501,16 @@ i945_miptree_layout(struct i915_texture * tex) unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; unsigned pack_x_pitch, pack_x_nr; unsigned pack_y_pitch; - tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; - tex->total_height = 0; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; - pack_y_pitch = MAX2(pt->height[0], 2); - pack_x_pitch = tex->pitch; + pack_y_pitch = MAX2(pt->nblocksy[0], 2); + pack_x_pitch = tex->stride / pt->block.size; pack_x_nr = 1; for (level = 0; level <= pt->last_level; level++) { @@ -515,11 +520,11 @@ i945_miptree_layout(struct i915_texture * tex) unsigned q, j; i915_miptree_set_level_info(tex, level, nr_images, - width, height, depth); + width, height, depth); for (q = 0; q < nr_images;) { for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { - i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_height); + i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_nblocksy); x += pack_x_pitch; } @@ -528,12 +533,12 @@ i945_miptree_layout(struct i915_texture * tex) } - tex->total_height += y; + tex->total_nblocksy += y; if (pack_x_pitch > 4) { pack_x_pitch >>= 1; pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= tex->pitch); + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); } if (pack_y_pitch > 2) { @@ -543,6 +548,8 @@ i945_miptree_layout(struct i915_texture * tex) width = minify(width); height = minify(height); depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } break; } @@ -560,7 +567,7 @@ i945_miptree_layout(struct i915_texture * tex) /* DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, tex->pitch, - tex->total_height, pt->cpp, tex->pitch * tex->total_height * pt->cpp); + tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); */ return TRUE; @@ -582,6 +589,9 @@ i915_texture_create(struct pipe_screen *screen, tex->base.refcount = 1; tex->base.screen = screen; + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + if (i915screen->is_i945) { if (!i945_miptree_layout(tex)) goto fail; @@ -592,8 +602,8 @@ i915_texture_create(struct pipe_screen *screen, tex->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + tex->stride * + tex->total_nblocksy); if (!tex->buffer) goto fail; @@ -648,13 +658,13 @@ i915_get_tex_surface(struct pipe_screen *screen, unsigned offset; /* in bytes */ if (pt->target == PIPE_TEXTURE_CUBE) { - offset = tex->image_offset[level][face] * pt->cpp; + offset = tex->image_offset[level][face]; } else if (pt->target == PIPE_TEXTURE_3D) { - offset = tex->image_offset[level][zslice] * pt->cpp; + offset = tex->image_offset[level][zslice]; } else { - offset = tex->image_offset[level][0] * pt->cpp; + offset = tex->image_offset[level][0]; assert(face == 0); assert(zslice == 0); } @@ -666,10 +676,12 @@ i915_get_tex_surface(struct pipe_screen *screen, pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = tex->pitch; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = tex->stride; ps->offset = offset; ps->usage = flags; ps->status = PIPE_SURFACE_STATUS_DEFINED; @@ -680,7 +692,7 @@ i915_get_tex_surface(struct pipe_screen *screen, static struct pipe_texture * i915_texture_blanket(struct pipe_screen * screen, const struct pipe_texture *base, - const unsigned *pitch, + const unsigned *stride, struct pipe_buffer *buffer) { struct i915_texture *tex; @@ -699,7 +711,7 @@ i915_texture_blanket(struct pipe_screen * screen, tex->base = *base; - tex->pitch = pitch[0]; + tex->stride = stride[0]; i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 8ac6b4e6897..2cae7665f78 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -231,9 +231,9 @@ struct brw_texture { /* Derived from the above: */ - unsigned pitch; + unsigned stride; unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; + unsigned total_nblocksy; unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c index 925049ecc19..be812c5da92 100644 --- a/src/gallium/drivers/i965simple/brw_misc_state.c +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -224,7 +224,9 @@ static void upload_depthbuffer(struct brw_context *brw) } else { unsigned int format; - switch (depth_surface->cpp) { + assert(depth_surface->block.width == 1); + assert(depth_surface->block.height == 1); + switch (depth_surface->block.size) { case 2: format = BRW_DEPTHFORMAT_D16_UNORM; break; @@ -239,7 +241,7 @@ static void upload_depthbuffer(struct brw_context *brw) return; } - OUT_BATCH(((depth_surface->pitch * depth_surface->cpp) - 1) | + OUT_BATCH((depth_surface->stride - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | // (depth_surface->region->tiled << 27) | @@ -247,7 +249,7 @@ static void upload_depthbuffer(struct brw_context *brw) OUT_RELOC(depth_surface->buffer, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | - ((depth_surface->pitch - 1) << 6) | + ((depth_surface->stride/depth_surface->block.size - 1) << 6) | ((depth_surface->height - 1) << 19)); OUT_BATCH(0); } diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 3d98a2bf199..0be3dfc7438 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -47,8 +47,10 @@ brw_surface_copy(struct pipe_context *pipe, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - assert(dst != src); - assert(dst->cpp == src->cpp); + assert( dst != src ); + assert( dst->block.size == src->block.size ); + assert( dst->block.width == src->block.height ); + assert( dst->block.height == src->block.height ); if (0) { void *dst_map = pipe->screen->surface_map( pipe->screen, @@ -60,37 +62,30 @@ brw_surface_copy(struct pipe_context *pipe, PIPE_BUFFER_USAGE_CPU_READ ); pipe_copy_rect(dst_map, - dst->cpp, - dst->pitch, + &dst->block, + dst->stride, dstx, dsty, width, height, src_map, - do_flip ? -(int) src->pitch : src->pitch, + do_flip ? -(int) src->stride : src->stride, srcx, do_flip ? height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); pipe->screen->surface_unmap(pipe->screen, dst); } else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); brw_copy_blit(brw_context(pipe), do_flip, - dst->cpp, - (short) src->pitch, src->buffer, src->offset, FALSE, - (short) dst->pitch, dst->buffer, dst->offset, FALSE, + dst->block.size, + (short) src->stride/src->block.size, src->buffer, src->offset, FALSE, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height, PIPE_LOGICOP_COPY); } } -/* Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; -} - static void brw_surface_fill(struct pipe_context *pipe, @@ -99,50 +94,20 @@ brw_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { if (0) { - unsigned i, j; void *dst_map = pipe->screen->surface_map( pipe->screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); - - switch (dst->cpp) { - case 1: { - ubyte *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - memset(row, value, width); - row += dst->pitch; - } - } - break; - case 2: { - ushort *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = (ushort) value; - row += dst->pitch; - } - } - break; - case 4: { - unsigned *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = value; - row += dst->pitch; - } - } - break; - default: - assert(0); - break; - } + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); pipe->screen->surface_unmap(pipe->screen, dst); } else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); brw_fill_blit(brw_context(pipe), - dst->cpp, - (short) dst->pitch, + dst->block.size, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, (short) dstx, (short) dsty, (short) width, (short) height, diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 78ae0b1223e..8c7725605be 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -81,7 +81,7 @@ static void intel_miptree_set_image_offset(struct brw_texture *tex, assert(x == 0 && y == 0); assert(img < tex->nr_images[level]); - tex->image_offset[level][img] = (x + y * tex->pitch) * pt->cpp; + tex->image_offset[level][img] = y * tex->stride + x * pt->block.size; } static void intel_miptree_set_level_info(struct brw_texture *tex, @@ -97,8 +97,11 @@ static void intel_miptree_set_level_info(struct brw_texture *tex, pt->width[level] = w; pt->height[level] = h; pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); - tex->level_offset[level] = (x + y * tex->pitch) * pt->cpp; + tex->level_offset[level] = y * tex->stride + x * tex->base.block.size; tex->nr_images[level] = nr_images; /* @@ -123,77 +126,60 @@ static void intel_miptree_set_level_info(struct brw_texture *tex, static void i945_miptree_layout_2d(struct brw_texture *tex) { struct pipe_texture *pt = &tex->base; - unsigned align_h = 2, align_w = 4; + const int align_x = 2, align_y = 4; unsigned level; unsigned x = 0; unsigned y = 0; unsigned width = pt->width[0]; unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; - tex->pitch = pt->width[0]; - -#if 0 - if (pt->compressed) { - align_w = intel_compressed_alignment(pt->internal_format); - tex->pitch = ALIGN(pt->width[0], align_w); - } -#endif + tex->stride = align(pt->nblocksx[0] * pt->block.size, 4); /* May need to adjust pitch to accomodate the placement of - * the 2nd mipmap. This occurs when the alignment + * the 2nd mipmap level. This occurs when the alignment * constraints of mipmap placement push the right edge of the - * 2nd mipmap out past the width of its parent. + * 2nd mipmap level out past the width of its parent. */ if (pt->last_level > 0) { - unsigned mip1_width; - - if (pt->compressed) { - mip1_width = align(minify(pt->width[0]), align_w) - + align(minify(minify(pt->width[0])), align_w); - } else { - mip1_width = align(minify(pt->width[0]), align_w) - + minify(minify(pt->width[0])); - } + unsigned mip1_nblocksx + = align_int(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); - if (mip1_width > tex->pitch) { - tex->pitch = mip1_width; - } + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; } - /* Pitch must be a whole number of dwords, even though we - * express it in texels. + /* Pitch must be a whole number of dwords */ - tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp; - tex->total_height = 0; + tex->stride = align_int(tex->stride, 64); + tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { - unsigned img_height; - intel_miptree_set_level_info(tex, level, 1, x, y, width, height, 1); - if (pt->compressed) - img_height = MAX2(1, height/4); - else - img_height = align(height, align_h); - + nblocksy = align_int(nblocksy, align_y); /* Because the images are packed better, the final offset * might not be the maximal one: */ - tex->total_height = MAX2(tex->total_height, y + img_height); + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); - /* Layout_below: step right after second mipmap. + /* Layout_below: step right after second mipmap level. */ if (level == 1) { - x += align(width, align_w); + x += align_int(nblocksx, align_x); } else { - y += img_height; + y += nblocksy; } width = minify(width); height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } } @@ -210,26 +196,20 @@ static boolean brw_miptree_layout(struct brw_texture *tex) unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; unsigned pack_x_pitch, pack_x_nr; unsigned pack_y_pitch; unsigned level; unsigned align_h = 2; unsigned align_w = 4; - tex->total_height = 0; -#if 0 - if (pt->compressed) { - align_w = intel_compressed_alignment(pt->internal_format); - pt->pitch = align(width, align_w); - pack_y_pitch = (height + 3) / 4; - } else -#endif - { - tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp; - pack_y_pitch = align(pt->height[0], align_h); - } + tex->total_nblocksy = 0; + + tex->stride = align(pt->nblocksx[0], 4); + pack_y_pitch = align(pt->nblocksy[0], align_h); - pack_x_pitch = tex->pitch; + pack_x_pitch = tex->stride / pt->block.size; pack_x_nr = 1; for (level = 0; level <= pt->last_level; level++) { @@ -239,7 +219,7 @@ static boolean brw_miptree_layout(struct brw_texture *tex) uint q, j; intel_miptree_set_level_info(tex, level, nr_images, - 0, tex->total_height, + 0, tex->total_nblocksy, width, height, depth); for (q = 0; q < nr_images;) { @@ -253,10 +233,12 @@ static boolean brw_miptree_layout(struct brw_texture *tex) } - tex->total_height += y; + tex->total_nblocksy += y; width = minify(width); height = minify(height); depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); if (pt->compressed) { pack_y_pitch = (height + 3) / 4; @@ -269,7 +251,7 @@ static boolean brw_miptree_layout(struct brw_texture *tex) if (pack_x_pitch > 4) { pack_x_pitch >>= 1; pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= tex->pitch); + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); } if (pack_y_pitch > 2) { @@ -289,9 +271,9 @@ static boolean brw_miptree_layout(struct brw_texture *tex) #if 0 PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, pt->pitch, - pt->total_height, - pt->cpp, - pt->pitch * pt->total_height * pt->cpp ); + pt->total_nblocksy, + pt->block.size, + pt->stride * pt->total_nblocksy ); #endif return TRUE; @@ -309,11 +291,14 @@ brw_texture_create_screen(struct pipe_screen *screen, tex->base = *templat; tex->base.refcount = 1; + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + if (brw_miptree_layout(tex)) tex->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + tex->stride * + tex->total_nblocksy); if (!tex->buffer) { FREE(tex); @@ -370,10 +355,10 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, offset = tex->level_offset[level]; if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face] * pt->cpp; + offset += tex->image_offset[level][face]; } else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice] * pt->cpp; + offset += tex->image_offset[level][zslice]; } else { assert(face == 0); @@ -386,10 +371,12 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, assert(ps->refcount); pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = tex->pitch; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = tex->stride; ps->offset = offset; } return ps; diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index 69e56dc8bd3..1a326f99186 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -160,7 +160,7 @@ void brw_update_texture_surface( struct brw_context *brw, surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; surf.ss3.tiled_surface = 0; /* always zero */ - surf.ss3.pitch = tObj->pitch - 1; + surf.ss3.pitch = tObj->stride - 1; surf.ss3.depth = tObj->base.depth[0] - 1; surf.ss4.min_lod = 0; @@ -197,7 +197,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) memset(&surf, 0, sizeof(surf)); if (pipe_surface != NULL) { - if (pipe_surface->cpp == 4) + if (pipe_surface->block.size == 4) surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; else surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -210,7 +210,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss2.height = pipe_surface->height - 1; surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; surf.ss3.tiled_surface = 0; - surf.ss3.pitch = (pipe_surface->pitch * pipe_surface->cpp) - 1; + surf.ss3.pitch = pipe_surface->stride - 1; } else { surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; surf.ss0.surface_type = BRW_SURFACE_NULL; diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 9fd48aeccc5..7dc15c38d10 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -58,18 +58,20 @@ sp_surface_copy(struct pipe_context *pipe, src, PIPE_BUFFER_USAGE_CPU_READ ); - assert(dst->cpp == src->cpp); + assert(dst->block.size == src->block.size); + assert(dst->block.width == src->block.width); + assert(dst->block.height == src->block.height); assert(src_map); assert(dst_map); /* If do_flip, invert src_y position and pass negative src stride */ pipe_copy_rect(dst_map, - dst->cpp, - dst->pitch, + &dst->block, + dst->stride, dstx, dsty, width, height, src_map, - do_flip ? -(int) src->pitch : src->pitch, + do_flip ? -(int) src->stride : src->stride, srcx, do_flip ? src->height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); @@ -80,7 +82,7 @@ sp_surface_copy(struct pipe_context *pipe, static void * get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) { - return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; + return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size; } @@ -102,39 +104,14 @@ sp_surface_fill(struct pipe_context *pipe, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); - assert(dst->pitch > 0); - assert(width <= dst->pitch); + assert(dst->stride > 0); - switch (dst->cpp) { + switch (dst->block.size) { case 1: - { - ubyte *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - memset(row, value, width); - row += dst->pitch; - } - } - break; case 2: - { - ushort *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = (ushort) value; - row += dst->pitch; - } - } - break; case 4: - { - unsigned *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = value; - row += dst->pitch; - } - } + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); break; case 8: { @@ -155,7 +132,7 @@ sp_surface_fill(struct pipe_context *pipe, row[j*4+2] = val2; row[j*4+3] = val3; } - row += dst->pitch * 4; + row += dst->stride/2; } } break; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 2ef17a220b4..4db045cdc3d 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -71,13 +71,15 @@ softpipe_texture_layout(struct pipe_screen *screen, pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; - spt->pitch[level] = width; + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + spt->stride[level] = pt->nblocksx[level]*pt->block.size; spt->level_offset[level] = buffer_size; - buffer_size += (((pt->compressed) ? MAX2(1, height/4) : height) * + buffer_size += (pt->nblocksy[level] * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - width * pt->cpp); + spt->stride[level]); width = minify(width); height = minify(height); @@ -121,7 +123,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, /* Now extract the goodies: */ spt->buffer = surf.buffer; - spt->pitch[0] = surf.pitch; + spt->stride[0] = surf.stride; return spt->buffer != NULL; } @@ -195,10 +197,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen, assert(ps->winsys); pipe_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; + ps->block = pt->block; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = spt->pitch[level]; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = spt->stride[level]; ps->offset = spt->level_offset[level]; ps->usage = usage; @@ -228,8 +232,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - (pt->compressed ? ps->height/4 : ps->height) * - ps->width * ps->cpp; + ps->nblocksy * + ps->stride; } else { assert(face == 0); diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 0e1017632c9..bf437a7c618 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -42,7 +42,7 @@ struct softpipe_texture struct pipe_texture base; unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; /* The data is held here: */ diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index d973fb357be..a2c6155d018 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -501,6 +501,47 @@ pf_get_block(enum pipe_format format, struct pipe_format_block *block) } } +static INLINE unsigned +pf_get_nblocksx(const struct pipe_format_block *block, unsigned x) +{ + return (x + block->width - 1)/block->width; +} + +static INLINE unsigned +pf_get_nblocksy(const struct pipe_format_block *block, unsigned y) +{ + return (y + block->height - 1)/block->height; +} + +static INLINE unsigned +pf_get_nblocks(const struct pipe_format_block *block, unsigned width, unsigned height) +{ + return pf_get_nblocksx(block, width)*pf_get_nblocksy(block, height); +} + +static INLINE void +pipe_rect_to_blocks(const struct pipe_format_block *block, + unsigned *width, unsigned *height, + unsigned *src_x, unsigned *src_y, + unsigned *dst_x, unsigned *dst_y) +{ + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); + if(width) + *width = pf_get_nblocksx(block, *width); + if(height) + *height = pf_get_nblocksy(block, *height); + if(src_x) + *src_x /= block->width; + if(src_y) + *src_y /= block->height; + if(dst_x) + *dst_x /= block->width; + if(dst_y) + *dst_y /= block->height; +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index e7ee8c97ed7..2992e2f3b30 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -267,10 +267,12 @@ struct pipe_surface enum pipe_format format; /**< PIPE_FORMAT_x */ unsigned status; /**< PIPE_SURFACE_STATUS_x */ unsigned clear_value; /**< XXX may be temporary */ - unsigned cpp; /**< bytes per pixel */ unsigned width; unsigned height; - unsigned pitch; /**< in pixels */ + struct pipe_format_block block; + unsigned nblocksx; + unsigned nblocksy; + unsigned stride; /**< in bytes */ unsigned layout; /**< PIPE_SURFACE_LAYOUT_x */ unsigned offset; /**< offset from start of buffer, in bytes */ unsigned refcount; @@ -303,7 +305,10 @@ struct pipe_texture unsigned height[PIPE_MAX_TEXTURE_LEVELS]; unsigned depth[PIPE_MAX_TEXTURE_LEVELS]; - unsigned cpp:8; + struct pipe_format_block block; + unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; + unsigned nblocksy[PIPE_MAX_TEXTURE_LEVELS]; + unsigned last_level:8; /**< Index of last mipmap level present/defined */ unsigned compressed:1; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index cf2447822ab..7dcdd282875 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -31,6 +31,7 @@ #include "p_config.h" #include "p_compiler.h" #include "p_debug.h" +#include "p_format.h" #include "p_pointer.h" #include <math.h> #include <stdarg.h> @@ -401,11 +402,15 @@ static INLINE int align(int value, int alignment) /* util/p_util.c */ -extern void pipe_copy_rect(ubyte * dst, unsigned cpp, unsigned dst_pitch, - unsigned dst_x, unsigned dst_y, unsigned width, - unsigned height, const ubyte * src, - int src_pitch, unsigned src_x, int src_y); - +extern void pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, const ubyte * src, + int src_stride, unsigned src_x, int src_y); + +extern void +pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, uint32_t value); #if defined(_MSC_VER) diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index 10eedd8402d..6e814ce5d11 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -322,7 +322,10 @@ void brw_aub_dump_bmp( struct brw_aubfile *aubfile, struct aub_dump_bmp db; unsigned format; - if (surface->cpp == 4) + assert(surface->block.width == 1); + assert(surface->block.height == 1); + + if (surface->block.size == 4) format = 0x7; else format = 0x3; @@ -331,8 +334,8 @@ void brw_aub_dump_bmp( struct brw_aubfile *aubfile, db.xmin = 0; db.ymin = 0; db.format = format; - db.bpp = surface->cpp * 8; - db.pitch = surface->pitch; + db.bpp = surface->block.size * 8; + db.pitch = surface->stride/surface->block.size; db.xsize = surface->width; db.ysize = surface->height; db.addr = gtt_offset; diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index b14758f3336..9225ee510df 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -364,9 +364,10 @@ xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf) return; } - if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) { - alloc_shm_ximage(xm_buf, b, surf->pitch, surf->height); + assert(surf->block.width == 1); + assert(surf->block.height == 1); + alloc_shm_ximage(xm_buf, b, surf->stride/surf->block.size, surf->height); } ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage; @@ -386,7 +387,7 @@ xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf) /* update XImage's fields */ ximage->width = surf->width; ximage->height = surf->height; - ximage->bytes_per_line = surf->pitch * surf->cpp; + ximage->bytes_per_line = surf->stride; XPutImage(b->xm_visual->display, b->drawable, b->gc, ximage, 0, 0, 0, 0, surf->width, surf->height); @@ -497,18 +498,21 @@ xm_surface_alloc_storage(struct pipe_winsys *winsys, surf->width = width; surf->height = height; surf->format = format; - surf->cpp = pf_get_size(format); - surf->pitch = round_up(width, alignment / surf->cpp); + pf_get_block(format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); surf->usage = flags; -#ifdef GALLIUM_CELL /* XXX a bit of a hack */ - height = round_up(height, TILE_SIZE); -#endif - assert(!surf->buffer); surf->buffer = winsys->buffer_create(winsys, alignment, PIPE_BUFFER_USAGE_PIXEL, - surf->pitch * surf->cpp * height); +#ifdef GALLIUM_CELL /* XXX a bit of a hack */ + surf->stride * round_up(surf->nblocksy, TILE_SIZE)); +#else + surf->stride * surf->nblocksy); +#endif + if(!surf->buffer) return -1; diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index 77376099f0d..7fc9debdd54 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -279,22 +279,25 @@ aub_i915_surface_alloc_storage(struct pipe_winsys *winsys, unsigned flags, unsigned tex_usage) { - const unsigned alignment = 64; - - surf->width = width; - surf->height = height; - surf->format = format; - surf->cpp = pf_get_size(format); - surf->pitch = round_up(width, alignment / surf->cpp); - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->pitch * surf->cpp * height); + const unsigned alignment = 64; + + surf->width = width; + surf->height = height; + surf->format = format; + pf_get_block(format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); + surf->usage = flags; + + assert(!surf->buffer); + surf->buffer = winsys->buffer_create(winsys, alignment, + PIPE_BUFFER_USAGE_PIXEL, + surf->stride * surf->nblocksy); if(!surf->buffer) - return -1; + return -1; - return 0; + return 0; } static void diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 8098d75e18f..809a906ba35 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -66,15 +66,17 @@ acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, uint x, uint y, uint w, uint h, float *p) { const enum pipe_format f = acc_ps->format; - const int cpp = acc_ps->cpp; + const struct pipe_format_block b = acc_ps->block; acc_ps->format = DEFAULT_ACCUM_PIPE_FORMAT; - acc_ps->cpp = 8; + acc_ps->block.size = 8; + acc_ps->block.width = 1; + acc_ps->block.height = 1; pipe_get_tile_rgba(pipe, acc_ps, x, y, w, h, p); acc_ps->format = f; - acc_ps->cpp = cpp; + acc_ps->block = b; } @@ -88,15 +90,17 @@ acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, uint x, uint y, uint w, uint h, const float *p) { enum pipe_format f = acc_ps->format; - const int cpp = acc_ps->cpp; + const struct pipe_format_block b = acc_ps->block; acc_ps->format = DEFAULT_ACCUM_PIPE_FORMAT; - acc_ps->cpp = 8; + acc_ps->block.size = 8; + acc_ps->block.width = 1; + acc_ps->block.height = 1; pipe_put_tile_rgba(pipe, acc_ps, x, y, w, h, p); acc_ps->format = f; - acc_ps->cpp = cpp; + acc_ps->block = b; } @@ -111,7 +115,7 @@ st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) const GLint ypos = ctx->DrawBuffer->_Ymin; const GLint width = ctx->DrawBuffer->_Xmax - xpos; const GLint height = ctx->DrawBuffer->_Ymax - ypos; - GLvoid *map; + GLubyte *map; acc_ps = screen->get_tex_surface(screen, acc_strb->texture, 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE); @@ -128,8 +132,7 @@ st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) GLshort a = FLOAT_TO_SHORT(ctx->Accum.ClearColor[3]); int i, j; for (i = 0; i < height; i++) { - GLshort *dst = ((GLshort *) map - + ((ypos + i) * acc_ps->pitch + xpos) * 4); + GLshort *dst = (GLshort *) (map + (ypos + i) * acc_ps->stride + xpos * 8); for (j = 0; j < width; j++) { dst[0] = r; dst[1] = g; @@ -168,8 +171,7 @@ accum_mad(GLcontext *ctx, GLfloat scale, GLfloat bias, { int i, j; for (i = 0; i < height; i++) { - GLshort *acc = ((GLshort *) map - + ((ypos + i) * acc_ps->pitch + xpos) * 4); + GLshort *acc = (GLshort *) (map + (ypos + i) * acc_ps->stride + xpos * 8); for (j = 0; j < width * 4; j++) { float val = SHORT_TO_FLOAT(acc[j]) * scale + bias; acc[j] = FLOAT_TO_SHORT(val); diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 6fa3cbd533c..f3bc3b85841 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -341,9 +341,9 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, dest = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); /* Put image into texture surface */ - memset(dest, 0xff, height * surface->pitch); + memset(dest, 0xff, height * surface->stride); unpack_bitmap(ctx->st, 0, 0, width, height, unpack, bitmap, - dest, surface->pitch); + dest, surface->stride); _mesa_unmap_bitmap_pbo(ctx, unpack); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index d8f1d2367c1..a7781f3ab2a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -382,7 +382,7 @@ make_texture(struct st_context *st, mformat, /* gl_texture_format */ dest, /* dest */ 0, 0, 0, /* dstX/Y/Zoffset */ - surface->pitch * cpp, /* dstRowStride, bytes */ + surface->stride, /* dstRowStride, bytes */ &dstImageOffsets, /* dstImageOffsets */ width, height, 1, /* size */ format, type, /* src format/type */ @@ -786,13 +786,13 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, switch (ps->format) { case PIPE_FORMAT_S8_UNORM: { - ubyte *dest = stmap + spanY * ps->pitch + spanX; + ubyte *dest = stmap + spanY * ps->stride + spanX; memcpy(dest, values, spanWidth); } break; case PIPE_FORMAT_S8Z24_UNORM: { - uint *dest = (uint *) stmap + spanY * ps->pitch + spanX; + uint *dest = (uint *) (stmap + spanY * ps->stride + spanX*4); GLint k; for (k = 0; k < spanWidth; k++) { uint p = dest[k]; @@ -903,6 +903,9 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, psDraw = screen->get_tex_surface(screen, rbDraw->texture, 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(psDraw->block.width == 1); + assert(psDraw->block.height == 1); + /* map the stencil buffer */ drawMap = screen->surface_map(screen, psDraw, PIPE_BUFFER_USAGE_CPU_WRITE); @@ -919,7 +922,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, y = ctx->DrawBuffer->Height - y - 1; } - dst = drawMap + (y * psDraw->pitch + dstx) * psDraw->cpp; + dst = drawMap + y * psDraw->stride + dstx * psDraw->block.size; src = buffer + i * width; switch (psDraw->format) { diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index db25ddd6152..1067caf9b37 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -113,7 +113,7 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, template.target = PIPE_TEXTURE_2D; template.compressed = 0; - template.cpp = pf_get_size(template.format); + pf_get_block(template.format, &template.block); template.width[0] = width; template.height[0] = height; template.depth[0] = 1; @@ -171,10 +171,12 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, assert(strb->surface->buffer); assert(strb->surface->format); - assert(strb->surface->cpp); + assert(strb->surface->block.size); + assert(strb->surface->block.width); + assert(strb->surface->block.height); assert(strb->surface->width == width); assert(strb->surface->height == height); - assert(strb->surface->pitch); + assert(strb->surface->stride); return strb->surface != NULL; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index c5193631a75..09d9c29e446 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -94,13 +94,13 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, switch (ps->format) { case PIPE_FORMAT_S8_UNORM: { - const ubyte *src = stmap + srcY * ps->pitch + x; + const ubyte *src = stmap + srcY * ps->stride + x; memcpy(values, src, width); } break; case PIPE_FORMAT_S8Z24_UNORM: { - const uint *src = (uint *) stmap + srcY * ps->pitch + x; + const uint *src = (uint *) (stmap + srcY * ps->stride + x*4); GLint k; for (k = 0; k < width; k++) { values[k] = src[k] >> 24; @@ -109,7 +109,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, break; case PIPE_FORMAT_Z24S8_UNORM: { - const uint *src = (uint *) stmap + srcY * ps->pitch + x; + const uint *src = (uint *) (stmap + srcY * ps->stride + x*4); GLint k; for (k = 0; k < width; k++) { values[k] = src[k] & 0xff; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 7d52d1da1b6..b9aa513d723 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -658,7 +658,7 @@ st_TexImage(GLcontext * ctx, texImage->Data = st_texture_image_map(ctx->st, stImage, 0, PIPE_BUFFER_USAGE_CPU_WRITE); if (stImage->surface) - dstRowStride = stImage->surface->pitch * stImage->surface->cpp; + dstRowStride = stImage->surface->stride; } else { /* Allocate regular memory and store the image there temporarily. */ @@ -820,7 +820,7 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, */ texImage->Data = st_texture_image_map(ctx->st, stImage, 0, PIPE_BUFFER_USAGE_CPU_READ); - texImage->RowStride = stImage->surface->pitch; + texImage->RowStride = stImage->surface->stride / stImage->pt->block.size; } else { /* Otherwise, the image should actually be stored in @@ -925,7 +925,7 @@ st_TexSubimage(GLcontext * ctx, texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset, PIPE_BUFFER_USAGE_CPU_WRITE); if (stImage->surface) - dstRowStride = stImage->surface->pitch * stImage->surface->cpp; + dstRowStride = stImage->surface->stride; } if (!texImage->Data) { @@ -1425,9 +1425,11 @@ copy_image_data_to_texture(struct st_context *st, stImage->face, dstLevel, stImage->base.Data, - stImage->base.RowStride, + stImage->base.RowStride * + stObj->pt->block.size, stImage->base.RowStride * - stImage->base.Height); + stImage->base.Height * + stObj->pt->block.size); _mesa_align_free(stImage->base.Data); stImage->base.Data = NULL; } @@ -1477,6 +1479,7 @@ st_finalize_texture(GLcontext *ctx, pipe_texture_reference(&stObj->pt, firstImage->pt); } + /* FIXME: determine format block instead of cpp */ if (firstImage->base.IsCompressed) { comp_byte = compressed_num_bytes(firstImage->base.TexFormat->MesaFormat); cpp = comp_byte; @@ -1497,7 +1500,9 @@ st_finalize_texture(GLcontext *ctx, stObj->pt->width[0] != firstImage->base.Width2 || stObj->pt->height[0] != firstImage->base.Height2 || stObj->pt->depth[0] != firstImage->base.Depth2 || - stObj->pt->cpp != cpp || + stObj->pt->block.size != cpp || + stObj->pt->block.width != 1 || + stObj->pt->block.height != 1 || stObj->pt->compressed != firstImage->base.IsCompressed) { pipe_texture_release(&stObj->pt); ctx->st->dirty.st |= ST_NEW_FRAMEBUFFER; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 851f17c3b46..2fc00df4298 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -137,10 +137,10 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, _mesa_generate_mipmap_level(target, datatype, comps, 0 /*border*/, pt->width[srcLevel], pt->height[srcLevel], pt->depth[srcLevel], - srcSurf->pitch * srcSurf->cpp, /* stride in bytes */ + srcSurf->stride, /* stride in bytes */ srcData, pt->width[dstLevel], pt->height[dstLevel], pt->depth[dstLevel], - dstSurf->pitch * dstSurf->cpp, /* stride in bytes */ + dstSurf->stride, /* stride in bytes */ dstData); pipe_buffer_unmap(pipe, srcSurf->buffer); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 9553b34e317..8222826e7a7 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -98,7 +98,7 @@ st_texture_create(struct st_context *st, pt.height[0] = height0; pt.depth[0] = depth0; pt.compressed = compress_byte ? 1 : 0; - pt.cpp = pt.compressed ? compress_byte : st_sizeof_format(format); + pf_get_block(format, &pt.block); pt.tex_usage = usage; newtex = screen->texture_create(screen, &pt); @@ -231,16 +231,19 @@ static void st_surface_data(struct pipe_context *pipe, struct pipe_surface *dst, unsigned dstx, unsigned dsty, - const void *src, unsigned src_pitch, + const void *src, unsigned src_stride, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { struct pipe_screen *screen = pipe->screen; void *map = screen->surface_map(screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE); pipe_copy_rect(map, - dst->cpp, - dst->pitch, - dstx, dsty, width, height, src, src_pitch, srcx, srcy); + &dst->block, + dst->stride, + dstx, dsty, + width, height, + src, src_stride, + srcx, srcy); screen->surface_unmap(screen, dst); } @@ -254,34 +257,29 @@ st_texture_image_data(struct pipe_context *pipe, GLuint face, GLuint level, void *src, - GLuint src_row_pitch, GLuint src_image_pitch) + GLuint src_row_stride, GLuint src_image_stride) { struct pipe_screen *screen = pipe->screen; GLuint depth = dst->depth[level]; GLuint i; - GLuint height = 0; const GLubyte *srcUB = src; struct pipe_surface *dst_surface; DBG("%s\n", __FUNCTION__); for (i = 0; i < depth; i++) { - height = dst->height[level]; - if(dst->compressed) - height /= 4; - dst_surface = screen->get_tex_surface(screen, dst, face, level, i, PIPE_BUFFER_USAGE_CPU_WRITE); st_surface_data(pipe, dst_surface, 0, 0, /* dstx, dsty */ srcUB, - src_row_pitch, + src_row_stride, 0, 0, /* source x, y */ - dst->width[level], height); /* width, height */ + dst->width[level], dst->height[level]); /* width, height */ screen->tex_surface_release(screen, &dst_surface); - srcUB += src_image_pitch * dst->cpp; + srcUB += src_image_stride; } } -- cgit v1.2.3 From 891469a3a5525a741094fd3f1e4a1270ec8b3c3b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 27 Jun 2008 20:10:04 +0900 Subject: cell: Update for cpp removal. Not tested -- just mymic the softpipe changes. --- src/gallium/drivers/cell/ppu/cell_surface.c | 65 ++++------------------------- src/gallium/drivers/cell/ppu/cell_texture.c | 16 ++++--- 2 files changed, 18 insertions(+), 63 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 18f37919241..5549eb496d8 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -34,30 +34,6 @@ #include "cell_surface.h" -/* Upload data to a rectangular sub-region. Lots of choices how to do this: - * - * - memcpy by span to current destination - * - upload data as new buffer and blit - * - * Currently always memcpy. - */ -static void -cell_surface_data(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - const void *src, unsigned src_pitch, - unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - pipe_copy_rect(pipe_surface_map(dst), - dst->cpp, - dst->pitch, - dstx, dsty, width, height, src, src_pitch, srcx, srcy); - - pipe_surface_unmap(dst); -} - - static void cell_surface_copy(struct pipe_context *pipe, boolean do_flip, @@ -70,12 +46,12 @@ cell_surface_copy(struct pipe_context *pipe, assert( dst->cpp == src->cpp ); pipe_copy_rect(pipe_surface_map(dst), - dst->cpp, - dst->pitch, + &dst->block, + dst->stride, dstx, dsty, width, height, pipe_surface_map(src), - do_flip ? -src->pitch : src->pitch, + do_flip ? -src->stride : src->stride, srcx, do_flip ? height - 1 - srcy : srcy); pipe_surface_unmap(src); @@ -86,7 +62,7 @@ cell_surface_copy(struct pipe_context *pipe, static void * get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) { - return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; + return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size; } @@ -106,38 +82,13 @@ cell_surface_fill(struct pipe_context *pipe, unsigned i, j; void *dst_map = pipe_surface_map(dst); - assert(dst->pitch > 0); - assert(width <= dst->pitch); + assert(dst->stride > 0); - switch (dst->cpp) { + switch (dst->block.size) { case 1: - { - ubyte *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - memset(row, value, width); - row += dst->pitch; - } - } - break; case 2: - { - ushort *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = (ushort) value; - row += dst->pitch; - } - } - break; case 4: - { - unsigned *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = value; - row += dst->pitch; - } - } + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); break; case 8: { @@ -158,7 +109,7 @@ cell_surface_fill(struct pipe_context *pipe, row[j*4+2] = val2; row[j*4+3] = val3; } - row += dst->pitch * 4; + row += dst->stride/2; } } break; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 07717da8f6a..533b64227d6 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -65,12 +65,14 @@ cell_texture_layout(struct cell_texture * spt) pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); spt->level_offset[level] = spt->buffer_size; - spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + spt->buffer_size += (pt->nblocksy[level] * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - width * pt->cpp; + pt->nblocksx[level] * pt->block.size; width = minify(width); height = minify(height); @@ -157,16 +159,18 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, assert(ps->winsys); pipe_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; + ps->block = pt->block; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = ps->width; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = spt->stride[level]; ps->offset = spt->level_offset[level]; if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - (pt->compressed ? ps->height/4 : ps->height) * - ps->width * ps->cpp; + ps->nblocksy * + ps->stride; } else { assert(face == 0); assert(zslice == 0); -- cgit v1.2.3 From 5b9d823545ec588ea97cc599a278626b99430d81 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 27 Jun 2008 14:16:42 +0200 Subject: i915: Fix the last of the stride/pitch changes --- src/gallium/drivers/i915simple/i915_surface.c | 18 +++++++++--------- src/gallium/winsys/dri/intel/intel_screen.c | 4 ++-- src/gallium/winsys/dri/intel/intel_swapbuffers.c | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 0061b22f268..4430e816263 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -64,10 +64,10 @@ i915_surface_copy(struct pipe_context *pipe, pipe_copy_rect(dst_map, &dst->block, dst->stride, - dstx, dsty, - width, height, - src_map, - do_flip ? -(int) src->stride : src->stride, + dstx, dsty, + width, height, + src_map, + do_flip ? -(int) src->stride : src->stride, srcx, do_flip ? height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); @@ -79,8 +79,8 @@ i915_surface_copy(struct pipe_context *pipe, i915_copy_blit( i915_context(pipe), do_flip, dst->block.size, - (short) src->stride/src->block.size, src->buffer, src->offset, - (short) dst->stride/dst->block.size, dst->buffer, dst->offset, + (short) src->stride, src->buffer, src->offset, + (short) dst->stride, dst->buffer, dst->offset, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); } } @@ -106,10 +106,10 @@ i915_surface_fill(struct pipe_context *pipe, assert(dst->block.height == 1); i915_fill_blit( i915_context(pipe), dst->block.size, - (short) dst->stride/dst->block.size, + (short) dst->stride, dst->buffer, dst->offset, - (short) dstx, (short) dsty, - (short) width, (short) height, + (short) dstx, (short) dsty, + (short) width, (short) height, value ); } } diff --git a/src/gallium/winsys/dri/intel/intel_screen.c b/src/gallium/winsys/dri/intel/intel_screen.c index 18427a4586f..89de188ada8 100644 --- a/src/gallium/winsys/dri/intel/intel_screen.c +++ b/src/gallium/winsys/dri/intel/intel_screen.c @@ -78,10 +78,10 @@ intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, templat.last_level = 0; templat.depth[0] = 1; templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.cpp = intelScreen->front.cpp; templat.width[0] = intelScreen->front.width; templat.height[0] = intelScreen->front.height; - pitch = intelScreen->front.pitch / intelScreen->front.cpp; + pf_get_block(templat.format, &templat.block); + pitch = intelScreen->front.pitch; texture = screen->texture_blanket(screen, &templat, diff --git a/src/gallium/winsys/dri/intel/intel_swapbuffers.c b/src/gallium/winsys/dri/intel/intel_swapbuffers.c index 7f3babd98ef..f58da97c64a 100644 --- a/src/gallium/winsys/dri/intel/intel_swapbuffers.c +++ b/src/gallium/winsys/dri/intel/intel_swapbuffers.c @@ -89,7 +89,7 @@ intelDisplaySurface(__DRIdrawablePrivate *dPriv, const drm_clip_rect_t *pbox = dPriv->pClipRects; const int pitch = intelScreen->front.pitch / intelScreen->front.cpp; const int cpp = intelScreen->front.cpp; - const int srcpitch = surf->pitch; + const int srcpitch = surf->stride / cpp; int BR13, CMD; int i; -- cgit v1.2.3 From b6478021d572d9ec30212d6e6992496ee4cf347d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 29 Jun 2008 13:26:04 +1000 Subject: nouveau: adapt to cpp->pf_block changes --- src/gallium/drivers/nv10/nv10_miptree.c | 12 +++-- src/gallium/drivers/nv10/nv10_state_emit.c | 4 +- src/gallium/drivers/nv30/nv30_miptree.c | 10 ++-- src/gallium/drivers/nv30/nv30_state.c | 10 ++-- src/gallium/drivers/nv30/nv30_state_fb.c | 8 +-- src/gallium/drivers/nv40/nv40_miptree.c | 29 ++++++++--- src/gallium/drivers/nv40/nv40_state_fb.c | 10 ++-- src/gallium/drivers/nv40/nv40_surface.c | 2 +- src/gallium/drivers/nv50/nv50_miptree.c | 6 ++- src/gallium/winsys/dri/nouveau/nouveau_context.c | 9 +++- src/gallium/winsys/dri/nouveau/nouveau_screen.c | 7 ++- src/gallium/winsys/dri/nouveau/nv04_surface.c | 63 +++++++++++++----------- src/gallium/winsys/dri/nouveau/nv50_surface.c | 42 +++++++++------- 13 files changed, 129 insertions(+), 83 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 1b9947354d6..f1486a35df5 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -25,11 +25,13 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) pt->width[l] = width; pt->height[l] = height; pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (swizzled) - nv10mt->level[l].pitch = pt->width[l] * pt->cpp; + nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; else - nv10mt->level[l].pitch = pt->width[0] * pt->cpp; + nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63; nv10mt->level[l].image_offset = @@ -117,10 +119,12 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, return NULL; pipe_buffer_reference(ws, &ps->buffer, nv10mt->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = nv10mt->level[level].pitch / ps->cpp; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv10mt->level[level].pitch; if (pt->target == PIPE_TEXTURE_CUBE) { ps->offset = nv10mt->level[level].image_offset[face]; diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 41422c88825..d21368d33f3 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -143,10 +143,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) if (zeta) { BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING ( (rt->pitch * rt->cpp) | ( (zeta->pitch * zeta->cpp) << 16) ); + OUT_RING (rt->stride | (zeta->stride << 16)); } else { BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING ( (rt->pitch * rt->cpp) | ( (rt->pitch * rt->cpp) << 16) ); + OUT_RING (rt->stride | (rt->stride << 16)); } nv10->rt[0] = rt->buffer; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 6078b1865e2..ad0b257fe2b 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -28,12 +28,14 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) pt->width[l] = width; pt->height[l] = height; pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (swizzled) pitch = pt->width[l]; pitch = (pitch + 63) & ~63; - nv30mt->level[l].pitch = pitch * pt->cpp; + nv30mt->level[l].pitch = pitch * pt->block.size; nv30mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); @@ -114,10 +116,12 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = nv30mt->level[level].pitch / ps->cpp; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->block = pt->block; + ps->stride = nv30mt->level[level].pitch; if (pt->target == PIPE_TEXTURE_CUBE) { ps->offset = nv30mt->level[level].image_offset[face]; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 84f016eeadc..c1618041bb0 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -631,21 +631,21 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, } if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { - uint32_t pitch = rt[0]->pitch * rt[0]->cpp; + uint32_t pitch = rt[0]->stride; if (zeta) { - pitch |= (zeta->pitch * zeta->cpp)<<16; + pitch |= (zeta->stride << 16); } else { - pitch |= pitch<<16; + pitch |= (pitch << 16); } BEGIN_RING(rankine, NV34TCL_COLOR0_PITCH, 1); - OUT_RING ( pitch ); + OUT_RING (pitch); nv30->rt[0] = rt[0]->buffer; } if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { BEGIN_RING(rankine, NV34TCL_COLOR1_PITCH, 1); - OUT_RING (rt[1]->pitch * rt[1]->cpp); + OUT_RING (rt[1]->stride); nv30->rt[1] = rt[1]->buffer; } diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 73c97e298ad..a20df9f75d1 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -59,11 +59,11 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) } if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { - uint32_t pitch = rt[0]->pitch * rt[0]->cpp; + uint32_t pitch = rt[0]->stride; if (zeta) { - pitch |= (zeta->pitch * zeta->cpp)<<16; + pitch |= (zeta->stride << 16); } else { - pitch |= pitch<<16; + pitch |= (pitch << 16); } so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1); @@ -84,7 +84,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2); so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch * rt[1]->cpp); + so_data (so, rt[1]->stride); } /* if (rt_enable & NV34TCL_RT_ENABLE_COLOR2) { diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 23da6e36a3e..38e1a5f04ca 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -28,19 +28,20 @@ nv40_miptree_layout(struct nv40_miptree *nv40mt) pt->width[l] = width; pt->height[l] = height; pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (swizzled) - pitch = pt->width[l]; - pitch = (pitch + 63) & ~63; + pitch = pt->nblocksx[l]; + pitch = align_int(pitch, 64); - nv40mt->level[l].pitch = pitch * pt->cpp; + nv40mt->level[l].pitch = pitch * pt->block.size; nv40mt->level[l].image_offset = CALLOC(nr_faces, sizeof(unsigned)); width = MAX2(1, width >> 1); height = MAX2(1, height >> 1); depth = MAX2(1, depth >> 1); - } for (f = 0; f < nr_faces; f++) { @@ -109,15 +110,20 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; struct pipe_surface *ps; - ps = ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface); if (!ps) return NULL; + pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(ws, &ps->buffer, nv40mt->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = nv40mt->level[level].pitch / ps->cpp; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv40mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; if (pt->target == PIPE_TEXTURE_CUBE) { ps->offset = nv40mt->level[level].image_offset[face]; @@ -135,6 +141,15 @@ static void nv40_miptree_surface_del(struct pipe_screen *pscreen, struct pipe_surface **psurface) { + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL); + FREE(ps); } void diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 88baf61ffb3..0e4e60eaa75 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -64,7 +64,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); - so_data (so, rt[0]->pitch * rt[0]->cpp); + so_data (so, rt[0]->stride); so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } @@ -77,7 +77,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch * rt[1]->cpp); + so_data (so, rt[1]->stride); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { @@ -89,7 +89,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch * rt[2]->cpp); + so_data (so, rt[2]->stride); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { @@ -101,7 +101,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch * rt[3]->cpp); + so_data (so, rt[3]->stride); } if (zeta_format) { @@ -113,7 +113,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) so_reloc (so, zeta->buffer, zeta->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch * zeta->cpp); + so_data (so, zeta->stride); } so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1); diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c index c0d135eb36d..0916555d564 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -34,7 +34,7 @@ #include "util/p_tile.h" static void -nv40_surface_copy(struct pipe_context *pipe, unsigned do_flip, +nv40_surface_copy(struct pipe_context *pipe, boolean do_flip, struct pipe_surface *dest, unsigned destx, unsigned desty, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index ccb916d6acc..6c838998fd0 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -72,10 +72,12 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, pipe_buffer_reference(ws, &ps->buffer, mt->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = ps->width; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = ps->width * ps->block.size; ps->offset = 0; return ps; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c index ef5fb7ec0d3..74413c408f3 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_context.c @@ -167,9 +167,14 @@ nouveau_context_create(const __GLcontextModes *glVis, fb_buf->bo = &fb_bo->base; fb_surf = calloc(1, sizeof(struct pipe_surface)); - fb_surf->cpp = nv_screen->front_cpp; - fb_surf->pitch = nv_screen->front_pitch / fb_surf->cpp; + if (nv_screen->front_cpp == 2) + fb_surf->format = PIPE_FORMAT_R5G6B5_UNORM; + else + fb_surf->format = PIPE_FORMAT_A8R8G8B8_UNORM; + pf_get_block(fb_surf->format, &fb_surf->block); + fb_surf->width = nv_screen->front_pitch / nv_screen->front_cpp; fb_surf->height = nv_screen->front_height; + fb_surf->stride = fb_surf->width * fb_surf->block.size; fb_surf->refcount = 1; fb_surf->buffer = &fb_buf->base; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.c b/src/gallium/winsys/dri/nouveau/nouveau_screen.c index 9041275a88f..b15ee7509cc 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.c @@ -209,8 +209,9 @@ nouveau_fill_in_modes(unsigned pixel_bits, unsigned depth_bits, GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML }; - u_int8_t depth_bits_array[4] = { 0, 16, 24, 24 }; - u_int8_t stencil_bits_array[4] = { 0, 0, 0, 8 }; + uint8_t depth_bits_array[4] = { 0, 16, 24, 24 }; + uint8_t stencil_bits_array[4] = { 0, 0, 0, 8 }; + uint8_t msaa_samples_array[1] = { 0 }; depth_buffer_factor = 4; back_buffer_factor = (have_back_buffer) ? 3 : 1; @@ -229,6 +230,7 @@ nouveau_fill_in_modes(unsigned pixel_bits, unsigned depth_bits, depth_buffer_factor, back_buffer_modes, back_buffer_factor, + msaa_samples_array, 1, GLX_TRUE_COLOR)) { fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", __func__, __LINE__ ); @@ -242,6 +244,7 @@ nouveau_fill_in_modes(unsigned pixel_bits, unsigned depth_bits, depth_buffer_factor, back_buffer_modes, back_buffer_factor, + msaa_samples_array, 1, GLX_DIRECT_COLOR)) { fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", __func__, __LINE__ ); diff --git a/src/gallium/winsys/dri/nouveau/nv04_surface.c b/src/gallium/winsys/dri/nouveau/nv04_surface.c index 83c790db17a..0085b1c345d 100644 --- a/src/gallium/winsys/dri/nouveau/nv04_surface.c +++ b/src/gallium/winsys/dri/nouveau/nv04_surface.c @@ -1,26 +1,35 @@ #include "pipe/p_context.h" +#include "pipe/p_format.h" #include "nouveau_context.h" static INLINE int -nv04_surface_format(int cpp) +nv04_surface_format(enum pipe_format format) { - switch (cpp) { - case 1: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; - case 2: return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; - case 4: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; + switch (format) { + case PIPE_FORMAT_A8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; default: return -1; } } static INLINE int -nv04_rect_format(int cpp) +nv04_rect_format(enum pipe_format format) { - switch (cpp) { - case 1: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - case 2: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; - case 4: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + switch (format) { + case PIPE_FORMAT_A8_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; default: return -1; } @@ -35,8 +44,8 @@ nv04_surface_copy_m2mf(struct nouveau_context *nv, unsigned dx, unsigned dy, struct pipe_surface *src = nv->surf_src; unsigned dst_offset, src_offset; - dst_offset = dst->offset + (dy * dst->pitch + dx) * dst->cpp; - src_offset = src->offset + (sy * src->pitch + sx) * src->cpp; + dst_offset = dst->offset + (dy * dst->stride) + (dx * dst->block.size); + src_offset = src->offset + (sy * src->stride) + (sx * src->block.size); while (h) { int count = (h > 2047) ? 2047 : h; @@ -47,16 +56,16 @@ nv04_surface_copy_m2mf(struct nouveau_context *nv, unsigned dx, unsigned dy, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, src->pitch * src->cpp); - OUT_RING (chan, dst->pitch * dst->cpp); - OUT_RING (chan, w * src->cpp); + OUT_RING (chan, src->stride); + OUT_RING (chan, dst->stride); + OUT_RING (chan, w * src->block.size); OUT_RING (chan, count); OUT_RING (chan, 0x0101); OUT_RING (chan, 0); h -= count; - src_offset += src->pitch * src->cpp * count; - dst_offset += dst->pitch * dst->cpp * count; + src_offset += src->stride * count; + dst_offset += dst->stride * count; } } @@ -79,7 +88,7 @@ nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, struct nouveau_channel *chan = nv->nvc->channel; int format; - if (src->cpp != dst->cpp) + if (src->format != dst->format) return 1; /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback @@ -100,8 +109,8 @@ nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, } - if ((format = nv04_surface_format(dst->cpp)) < 0) { - NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); + if ((format = nv04_surface_format(dst->format)) < 0) { + NOUVEAU_ERR("Bad surface format 0x%x\n", dst->format); return 1; } nv->surface_copy = nv04_surface_copy_blit; @@ -116,8 +125,7 @@ nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, BEGIN_RING(chan, nv->nvc->NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); OUT_RING (chan, format); - OUT_RING (chan, ((dst->pitch * dst->cpp) << 16) | - (src->pitch * src->cpp)); + OUT_RING (chan, (dst->stride << 16) | src->stride); OUT_RELOCl(chan, nouveau_buffer(src->buffer)->bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, @@ -142,13 +150,13 @@ nv04_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, struct nouveau_grobj *rect = nv->nvc->NvGdiRect; int cs2d_format, gdirect_format; - if ((cs2d_format = nv04_surface_format(dst->cpp)) < 0) { - NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); + if ((cs2d_format = nv04_surface_format(dst->format)) < 0) { + NOUVEAU_ERR("Bad format = %d\n", dst->format); return 1; } - if ((gdirect_format = nv04_rect_format(dst->cpp)) < 0) { - NOUVEAU_ERR("Bad cpp = %d\n", dst->cpp); + if ((gdirect_format = nv04_rect_format(dst->format)) < 0) { + NOUVEAU_ERR("Bad format = %d\n", dst->format); return 1; } @@ -159,8 +167,7 @@ nv04_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); OUT_RING (chan, cs2d_format); - OUT_RING (chan, ((dst->pitch * dst->cpp) << 16) | - (dst->pitch * dst->cpp)); + OUT_RING (chan, (dst->stride << 16) | dst->stride); OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, diff --git a/src/gallium/winsys/dri/nouveau/nv50_surface.c b/src/gallium/winsys/dri/nouveau/nv50_surface.c index d6da116b774..cf76d76cf52 100644 --- a/src/gallium/winsys/dri/nouveau/nv50_surface.c +++ b/src/gallium/winsys/dri/nouveau/nv50_surface.c @@ -1,15 +1,21 @@ #include "pipe/p_context.h" +#include "pipe/p_format.h" #include "nouveau_context.h" static INLINE int -nv50_format(int cpp) +nv50_format(enum pipe_format format) { - switch (cpp) { - case 4: return NV50_2D_DST_FORMAT_32BPP; - case 3: return NV50_2D_DST_FORMAT_24BPP; - case 2: return NV50_2D_DST_FORMAT_16BPP; - case 1: return NV50_2D_DST_FORMAT_8BPP; + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV50_2D_DST_FORMAT_32BPP; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return NV50_2D_DST_FORMAT_24BPP; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV50_2D_DST_FORMAT_16BPP; + case PIPE_FORMAT_A8_UNORM: + return NV50_2D_DST_FORMAT_8BPP; default: return -1; } @@ -23,9 +29,9 @@ nv50_surface_copy_prep(struct nouveau_context *nv, struct nouveau_grobj *eng2d = nv->nvc->Nv2D; int surf_format; - assert(src->cpp == dst->cpp); + assert(src->format == dst->format); - surf_format = nv50_format(dst->cpp); + surf_format = nv50_format(dst->format); assert(surf_format >= 0); BEGIN_RING(chan, eng2d, NV50_2D_DMA_IN_MEMORY0, 2); @@ -38,8 +44,8 @@ nv50_surface_copy_prep(struct nouveau_context *nv, OUT_RING (chan, surf_format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 5); - OUT_RING (chan, dst->pitch * dst->cpp); - OUT_RING (chan, dst->pitch); + OUT_RING (chan, dst->stride); + OUT_RING (chan, dst->width); OUT_RING (chan, dst->height); OUT_RELOCh(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); @@ -48,15 +54,15 @@ nv50_surface_copy_prep(struct nouveau_context *nv, BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); OUT_RING (chan, 0); OUT_RING (chan, 0); - OUT_RING (chan, dst->pitch); + OUT_RING (chan, dst->width); OUT_RING (chan, dst->height); BEGIN_RING(chan, eng2d, NV50_2D_SRC_FORMAT, 2); OUT_RING (chan, surf_format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, NV50_2D_SRC_PITCH, 5); - OUT_RING (chan, src->pitch * src->cpp); - OUT_RING (chan, src->pitch); + OUT_RING (chan, src->stride); + OUT_RING (chan, src->width); OUT_RING (chan, src->height); OUT_RELOCh(chan, nouveau_buffer(src->buffer)->bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); @@ -105,11 +111,11 @@ nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, struct nouveau_grobj *eng2d = nv->nvc->Nv2D; int surf_format, rect_format; - surf_format = nv50_format(dst->cpp); + surf_format = nv50_format(dst->format); if (surf_format < 0) return 1; - rect_format = nv50_format(dst->cpp); + rect_format = nv50_format(dst->format); if (rect_format < 0) return 1; @@ -120,8 +126,8 @@ nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, OUT_RING (chan, surf_format); OUT_RING (chan, 1); BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 5); - OUT_RING (chan, dst->pitch * dst->cpp); - OUT_RING (chan, dst->pitch); + OUT_RING (chan, dst->stride); + OUT_RING (chan, dst->width); OUT_RING (chan, dst->height); OUT_RELOCh(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); @@ -130,7 +136,7 @@ nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); OUT_RING (chan, 0); OUT_RING (chan, 0); - OUT_RING (chan, dst->pitch); + OUT_RING (chan, dst->width); OUT_RING (chan, dst->height); BEGIN_RING(chan, eng2d, 0x0580, 3); -- cgit v1.2.3 From f722fd937db2f3cacf1947d538c66528fd16eb89 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 1 Jun 2008 22:41:40 +1000 Subject: nv50: import current "state of the art" nv50 code --- src/gallium/drivers/nv50/Makefile | 1 + src/gallium/drivers/nv50/nv50_context.h | 18 + src/gallium/drivers/nv50/nv50_program.c | 742 +++++++++++++++++++++++++ src/gallium/drivers/nv50/nv50_screen.c | 68 ++- src/gallium/drivers/nv50/nv50_screen.h | 3 + src/gallium/drivers/nv50/nv50_state.c | 54 +- src/gallium/drivers/nv50/nv50_state.h | 30 + src/gallium/drivers/nv50/nv50_state_validate.c | 29 +- src/gallium/drivers/nv50/nv50_vbo.c | 85 ++- 9 files changed, 1017 insertions(+), 13 deletions(-) create mode 100644 src/gallium/drivers/nv50/nv50_program.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile index a62a4d961bc..2cc6e9de287 100644 --- a/src/gallium/drivers/nv50/Makefile +++ b/src/gallium/drivers/nv50/Makefile @@ -9,6 +9,7 @@ DRIVER_SOURCES = \ nv50_draw.c \ nv50_miptree.c \ nv50_query.c \ + nv50_program.c \ nv50_screen.c \ nv50_state.c \ nv50_state_validate.c \ diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index e68c702deae..d4d716b94bf 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -9,6 +9,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_stateobj.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv50_screen *ctx = nv50->screen @@ -30,6 +31,9 @@ #define NV50_NEW_VIEWPORT (1 << 5) #define NV50_NEW_RASTERIZER (1 << 6) #define NV50_NEW_FRAMEBUFFER (1 << 7) +#define NV50_NEW_VERTPROG (1 << 8) +#define NV50_NEW_FRAGPROG (1 << 9) +#define NV50_NEW_ARRAYS (1 << 10) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -63,6 +67,13 @@ struct nv50_context { struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; + struct nv50_program *vertprog; + struct nv50_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; }; static INLINE struct nv50_context * @@ -88,11 +99,18 @@ extern boolean nv50_draw_elements(struct pipe_context *pipe, unsigned indexSize, unsigned mode, unsigned start, unsigned count); +extern void nv50_vbo_validate(struct nv50_context *nv50); /* nv50_clear.c */ extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue); +/* nv50_program.c */ +extern void nv50_vertprog_validate(struct nv50_context *nv50); +extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); + +/* nv50_state_validate.c */ extern boolean nv50_state_validate(struct nv50_context *nv50); #endif diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c new file mode 100644 index 00000000000..30953b7d8af --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -0,0 +1,742 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" + +#include "nv50_context.h" +#include "nv50_state.h" + +#define OP_MOV 0x001 +#define OP_RCP 0x009 +#define OP_ADD 0x00b +#define OP_MUL 0x00c +#define NV50_SU_MAX_TEMP 64 + +struct nv50_reg { + enum { + P_TEMP, + P_ATTR, + P_RESULT, + P_CONST, + P_IMMD + } type; + int index; + + int hw; +}; + +struct nv50_pc { + struct nv50_program *p; + + /* hw resources */ + struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; + + /* tgsi resources */ + struct nv50_reg *temp; + int temp_nr; + struct nv50_reg *attr; + int attr_nr; + struct nv50_reg *result; + int result_nr; + struct nv50_reg *param; + int param_nr; + struct nv50_reg *immd; + float *immd_buf; + int immd_nr; +}; + +static void +alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) +{ + int i; + + if (reg->type != P_TEMP || reg->hw >= 0) + return; + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!(pc->r_temp[i])) { + pc->r_temp[i] = reg; + reg->hw = i; + if (pc->p->cfg.vp.high_temp < (i + 1)) + pc->p->cfg.vp.high_temp = i + 1; + return; + } + } + + assert(0); +} + +static struct nv50_reg * +alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) +{ + struct nv50_reg *r; + int i; + + if (dst && dst->type == P_TEMP && dst->hw == -1) + return dst; + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!pc->r_temp[i]) { + r = CALLOC_STRUCT(nv50_reg); + r->type = P_TEMP; + r->index = -1; + r->hw = i; + pc->r_temp[i] = r; + return r; + } + } + + assert(0); + return NULL; +} + +static void +free_temp(struct nv50_pc *pc, struct nv50_reg *r) +{ + if (r->index == -1) { + FREE(pc->r_temp[r->hw]); + pc->r_temp[r->hw] = NULL; + } +} + +#if 0 +static struct nv50_reg * +constant(struct nv50_pc *pc, int pipe, int c, float v) +{ + struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + struct nv50_program *p = pc->p; + struct nv50_program_data *pd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < p->nr_consts; idx++) { + if (p->consts[idx].index == pipe) + return nv40_sr(NV40SR_CONST, idx); + } + } + + idx = p->nr_consts++; + p->consts = realloc(p->consts, sizeof(*pd) * p->nr_consts); + pd = &p->consts[idx]; + + pd->index = pipe; + pd->component = c; + pd->value = v; + return nv40_sr(NV40SR_CONST, idx); +} +#endif + +static void +emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program *p = pc->p; + struct nv50_reg *tmp = NULL, *tmp2 = NULL; + unsigned inst[2] = { 0, 0 }; + + /* Grr.. Fun restrictions on where attribs can be sourced from.. */ + if (src1 && src1->type == P_ATTR) { + tmp = alloc_temp(pc, dst); + emit(pc, 1, tmp, src1, NULL, NULL); + src1 = tmp; + } + + if (src2 && src2->type == P_ATTR) { + tmp2 = alloc_temp(pc, dst); + emit(pc, 1, tmp2, src2, NULL, NULL); + src2 = tmp2; + } + + /* Get this out of the way first. What type of opcode do we + * want/need to build? + */ + if ((op & 0x3f0) || dst->type == P_RESULT || + (src0 && src0->type == P_ATTR) || src1 || src2) + inst[0] |= 0x00000001; + + if (inst[0] & 0x00000001) { + inst[0] |= ((op & 0xf) << 28); + inst[1] |= ((op >> 4) << 26); + + alloc_reg(pc, dst); + if (dst->type == P_RESULT) + inst[1] |= 0x00000008; + inst[0] |= (dst->hw << 2); + + if (src0) { + if (src0->type == P_ATTR) + inst[1] |= 0x00200000; + else + if (src0->type == P_CONST || src0->type == P_IMMD) + assert(0); + alloc_reg(pc, src0); + inst[0] |= (src0->hw << 9); + } + + if (src1) { + if (src1->type == P_CONST || src1->type == P_IMMD) { + inst[0] |= 0x00800000; /* src1 is const */ + /*XXX: does src1 come from "src2" now? */ + alloc_reg(pc, src1); + inst[0] |= (src1->hw << 16); + } else { + alloc_reg(pc, src1); + if (op == 0xc || op == 0xe) + inst[0] |= (src1->hw << 16); + else + inst[1] |= (src1->hw << 14); + } + } else { + inst[1] |= 0x0003c000; /*XXX FIXME */ + } + + if (src2) { + if (src2->type == P_CONST || src2->type == P_IMMD) { + inst[0] |= 0x01000000; /* src2 is const */ + inst[1] |= (src2->hw << 14); + } else { + alloc_reg(pc, src2); + if (inst[0] & 0x00800000 || op ==0xe) + inst[1] |= (src2->hw << 14); + else + inst[0] |= (src2->hw << 16); + } + } + + /*XXX: FIXME */ + if (op == 0xb || op == 0xc || op == 0x9 || op == 0xe) { + /* 0x04000000 negates arg0 */ + /* 0x08000000 negates arg1 */ + /*XXX: true for !0xb also ? */ + inst[1] |= 0x00000780; + } else { + /* 0x04000000 == arg0 32 bit, otherwise 16 bit */ + inst[1] |= 0x04000780; + } + } else { + inst[0] |= ((op & 0xf) << 28); + + alloc_reg(pc, dst); + inst[0] |= (dst->hw << 2); + + if (src0) { + alloc_reg(pc, src0); + inst[0] |= (src0->hw << 9); + } + + /*XXX: NFI if this even works - probably not.. */ + if (src1) { + alloc_reg(pc, src1); + inst[0] |= (src1->hw << 16); + } + } + + if (tmp) free_temp(pc, tmp); + if (tmp2) free_temp(pc, tmp2); + + if (inst[0] & 1) { + p->insns_nr += 2; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); + } else { + p->insns_nr += 1; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); + } +} + +static struct nv50_reg * +tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) +{ + switch (dst->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + return &pc->temp[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_OUTPUT: + return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_NULL: + return NULL; + default: + break; + } + + return NULL; +} + +static struct nv50_reg * +tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) +{ + /* Handle swizzling */ + switch (c) { + case 0: c = src->SrcRegister.SwizzleX; break; + case 1: c = src->SrcRegister.SwizzleY; break; + case 2: c = src->SrcRegister.SwizzleZ; break; + case 3: c = src->SrcRegister.SwizzleW; break; + default: + assert(0); + } + + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + return &pc->attr[src->SrcRegister.Index * 4 + c]; + case TGSI_FILE_TEMPORARY: + return &pc->temp[src->SrcRegister.Index * 4 + c]; + case TGSI_FILE_CONSTANT: + return &pc->param[src->SrcRegister.Index * 4 + c]; + case TGSI_FILE_IMMEDIATE: + return &pc->immd[src->SrcRegister.Index * 4 + c]; + default: + break; + } + + return NULL; +} + +static boolean +nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +{ + const struct tgsi_full_instruction *inst = &tok->FullInstruction; + struct nv50_reg *dst[4], *src[3][4], *none = NULL, *tmp; + unsigned mask; + int i, c; + + NOUVEAU_ERR("insn %p\n", tok); + + mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); + else + dst[c] = NULL; + } + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + for (c = 0; c < 4; c++) + src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) { + emit(pc, 0x0b, dst[c], + src[0][c], src[1][c], none); + } + } + break; + case TGSI_OPCODE_DP3: + tmp = alloc_temp(pc, NULL); + emit(pc, 0x0c, tmp, src[0][0], src[1][0], NULL); + emit(pc, 0x0e, tmp, src[0][1], src[1][1], tmp); + emit(pc, 0x0e, tmp, src[0][2], src[1][2], tmp); + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x01, dst[c], tmp, none, none); + } + free_temp(pc, tmp); + break; + case TGSI_OPCODE_DP4: + tmp = alloc_temp(pc, NULL); + emit(pc, 0x0c, tmp, src[0][0], src[1][0], NULL); + emit(pc, 0x0e, tmp, src[0][1], src[1][1], tmp); + emit(pc, 0x0e, tmp, src[0][2], src[1][2], tmp); + emit(pc, 0x0e, tmp, src[0][3], src[1][3], tmp); + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x01, dst[c], tmp, none, none); + } + free_temp(pc, tmp); + break; + case TGSI_OPCODE_MAD: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x0e, dst[c], + src[0][c], src[1][c], src[2][c]); + } + break; + case TGSI_OPCODE_MOV: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x01, dst[c], src[0][c], none, none); + } + break; + case TGSI_OPCODE_MUL: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x0c, dst[c], + src[0][c], src[1][c], none); + } + break; + case TGSI_OPCODE_RCP: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, 0x09, dst[c], + src[0][c], none, none); + } + break; + case TGSI_OPCODE_END: + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ + struct tgsi_parse_context p; + boolean ret = FALSE; + unsigned i, c; + + tgsi_parse_init(&p, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm = + &p.FullToken.FullImmediate; + + pc->immd_nr++; + pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr * + sizeof(float)); + pc->immd_buf[4 * (pc->immd_nr - 1) + 0] = + imm->u.ImmediateFloat32[0].Float; + pc->immd_buf[4 * (pc->immd_nr - 1) + 1] = + imm->u.ImmediateFloat32[1].Float; + pc->immd_buf[4 * (pc->immd_nr - 1) + 2] = + imm->u.ImmediateFloat32[2].Float; + pc->immd_buf[4 * (pc->immd_nr - 1) + 3] = + imm->u.ImmediateFloat32[3].Float; + } + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *d; + unsigned last; + + d = &p.FullToken.FullDeclaration; + last = d->u.DeclarationRange.Last; + + switch (d->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (pc->temp_nr < (last + 1)) + pc->temp_nr = last + 1; + break; + case TGSI_FILE_OUTPUT: + if (pc->result_nr < (last + 1)) + pc->result_nr = last + 1; + break; + case TGSI_FILE_INPUT: + if (pc->attr_nr < (last + 1)) + pc->attr_nr = last + 1; + break; + case TGSI_FILE_CONSTANT: + if (pc->param_nr < (last + 1)) + pc->param_nr = last + 1; + break; + default: + NOUVEAU_ERR("bad decl file %d\n", + d->Declaration.File); + goto out_err; + } + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + break; + default: + break; + } + } + + NOUVEAU_ERR("%d temps\n", pc->temp_nr); + if (pc->temp_nr) { + pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); + if (!pc->temp) + goto out_err; + + for (i = 0; i < pc->temp_nr; i++) { + for (c = 0; c < 4; c++) { + pc->temp[i*4+c].type = P_TEMP; + pc->temp[i*4+c].hw = -1; + pc->temp[i*4+c].index = i; + } + } + } + + NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); + if (pc->attr_nr) { + int aid = 0; + + pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); + if (!pc->attr) + goto out_err; + + for (i = 0; i < pc->attr_nr; i++) { + for (c = 0; c < 4; c++) { + pc->p->cfg.vp.attr[aid/32] |= (1 << (aid % 32)); + pc->attr[i*4+c].type = P_ATTR; + pc->attr[i*4+c].hw = aid++; + pc->attr[i*4+c].index = i; + } + } + } + + NOUVEAU_ERR("%d result regs\n", pc->result_nr); + if (pc->result_nr) { + int rid = 0; + + pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); + if (!pc->result) + goto out_err; + + for (i = 0; i < pc->result_nr; i++) { + for (c = 0; c < 4; c++) { + pc->result[i*4+c].type = P_RESULT; + pc->result[i*4+c].hw = rid++; + pc->result[i*4+c].index = i; + } + } + } + + NOUVEAU_ERR("%d param regs\n", pc->param_nr); + if (pc->param_nr) { + int rid = 0; + + pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); + if (!pc->param) + goto out_err; + + for (i = 0; i < pc->param_nr; i++) { + for (c = 0; c < 4; c++) { + pc->param[i*4+c].type = P_CONST; + pc->param[i*4+c].hw = rid++; + pc->param[i*4+c].index = i; + } + } + } + + if (pc->immd_nr) { + int rid = pc->param_nr * 4; + + pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); + if (!pc->immd) + goto out_err; + + for (i = 0; i < pc->immd_nr; i++) { + for (c = 0; c < 4; c++) { + pc->immd[i*4+c].type = P_IMMD; + pc->immd[i*4+c].hw = rid++; + pc->immd[i*4+c].index = i; + } + } + } + + ret = TRUE; +out_err: + tgsi_parse_free(&p); + return ret; +} + +static boolean +nv50_program_tx(struct nv50_program *p) +{ + struct tgsi_parse_context parse; + struct nv50_pc *pc; + boolean ret; + + pc = CALLOC_STRUCT(nv50_pc); + if (!pc) + return FALSE; + pc->p = p; + pc->p->cfg.vp.high_temp = 4; + + ret = nv50_program_tx_prep(pc); + if (ret == FALSE) + goto out_cleanup; + + tgsi_parse_init(&parse, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + const union tgsi_full_token *tok = &parse.FullToken; + + tgsi_parse_token(&parse); + + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + ret = nv50_program_tx_insn(pc, tok); + if (ret == FALSE) + goto out_err; + break; + default: + break; + } + } + + p->param_nr = pc->param_nr * 4; + p->immd_nr = pc->immd_nr * 4; + p->immd = pc->immd_buf; + +out_err: + tgsi_parse_free(&parse); + +out_cleanup: + return ret; +} + +static void +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) +{ + struct tgsi_parse_context pc; + + tgsi_parse_init(&pc, p->pipe.tokens); + + if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + p->insns_nr = 8; + p->insns = malloc(p->insns_nr * sizeof(unsigned)); + p->insns[0] = 0x80000000; + p->insns[1] = 0x9000000c; + p->insns[2] = 0x82010600; + p->insns[3] = 0x82020604; + p->insns[4] = 0x80030609; + p->insns[5] = 0x00020780; + p->insns[6] = 0x8004060d; + p->insns[7] = 0x00020781; + } else + if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + int i; + + if (nv50_program_tx(p) == FALSE) + assert(0); + p->insns[p->insns_nr - 1] |= 0x00000001; + + for (i = 0; i < p->insns_nr; i++) + NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); + } else { + NOUVEAU_ERR("invalid TGSI processor\n"); + tgsi_parse_free(&pc); + return; + } + + tgsi_parse_free(&pc); + + p->translated = TRUE; +} + +void +nv50_vertprog_validate(struct nv50_context *nv50) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + struct nouveau_winsys *nvws = nv50->screen->nvws; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->vertprog; + struct nouveau_stateobj *so; + void *map; + int i; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + if (!p->buffer) + p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map, p->insns, p->insns_nr * 4); + ws->buffer_unmap(ws, p->buffer); + + if (p->param_nr) { + float *cb; + + cb = ws->buffer_map(ws, nv50->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < p->param_nr; i++) { + BEGIN_RING(tesla, 0x0f00, 2); + OUT_RING (i << 8); + OUT_RING (fui(cb[i])); + } + ws->buffer_unmap(ws, nv50->constbuf[PIPE_SHADER_VERTEX]); + } + + + for (i = 0; i < p->immd_nr; i++) { + BEGIN_RING(tesla, 0x0f00, 2); + OUT_RING ((p->param_nr + i) << 8); + OUT_RING (fui(p->immd[i])); + } + + so = so_new(11, 2); + so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x1650, 2); + so_data (so, p->cfg.vp.attr[0]); + so_data (so, p->cfg.vp.attr[1]); + so_method(so, tesla, 0x16ac, 2); + so_data (so, 8); + so_data (so, p->cfg.vp.high_temp); + so_method(so, tesla, 0x140c, 1); + so_data (so, 0); /* program start offset */ + so_emit(nv50->screen->nvws, so); + so_ref(NULL, &so); +} + +void +nv50_fragprog_validate(struct nv50_context *nv50) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->fragprog; + struct nouveau_stateobj *so; + void *map; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + if (!p->buffer) + p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map, p->insns, p->insns_nr * 4); + ws->buffer_unmap(ws, p->buffer); + + so = so_new(3, 2); + so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_emit(nv50->screen->nvws, so); + so_ref(NULL, &so); +} + +void +nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + + if (p->insns_nr) { + if (p->insns) + FREE(p->insns); + p->insns_nr = 0; + } + + if (p->buffer) + pipe_buffer_reference(ws, &p->buffer, NULL); + + p->translated = 0; +} + diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 29c057a1453..fc3eeed9133 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -203,8 +203,19 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) return NULL; } + /* Static constant buffer */ + screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { + NOUVEAU_ERR("Error initialising constant buffer\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + /* Static tesla init */ - so = so_new(128, 0); + so = so_new(256, 20); + + so_method(so, screen->tesla, 0x1558, 1); + so_data (so, 1); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->tesla, NV50TCL_DMA_IN_MEMORY0(0), @@ -215,6 +226,61 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) NV50TCL_DMA_IN_MEMORY1__SIZE); for (i = 0; i < NV50TCL_DMA_IN_MEMORY1__SIZE; i++) so_data(so, nvws->channel->vram->handle); + so_method(so, screen->tesla, 0x121c, 1); + so_data (so, 1); + + so_method(so, screen->tesla, 0x13bc, 1); + so_data (so, 0x54); + so_method(so, screen->tesla, 0x13ac, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x16bc, 2); + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_method(so, screen->tesla, 0x1988, 2); + so_data (so, 0x08040404); + so_data (so, 0x00000004); + so_method(so, screen->tesla, 0x16ac, 2); + so_data (so, 8); + so_data (so, 4); + so_method(so, screen->tesla, 0x16b8, 1); + so_data (so, 8); + + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00001000); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00014000); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00024000); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00034000); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00040100); + + for (i = 0; i < 16; i++) { + so_method(so, screen->tesla, 0x1080 + (i * 8), 2); + so_data (so, 0x000000ff); + so_data (so, 0xffffffff); + } so_emit(nvws, so); so_ref(NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 5a2732e37f4..d63dd485085 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -12,6 +12,9 @@ struct nv50_screen { struct nouveau_grobj *tesla; struct nouveau_notifier *sync; + + struct pipe_buffer *constbuf; + struct nouveau_resource *vp_data_heap; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index f42bae0c28d..fd10a383782 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -248,8 +248,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, struct nouveau_stateobj *so = so_new(64, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); - so_data (so, cso->depth.writemask ? 1 : 0); - if (cso->depth.enabled) { + so_data (so, 0); //cso->depth.writemask ? 1 : 0); + if (0 && cso->depth.enabled) { so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); @@ -328,34 +328,58 @@ static void * nv50_vp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso) { - return NULL; + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe = *cso; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; } static void nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->vertprog = hwcso; + nv50->dirty |= NV50_NEW_VERTPROG; } static void nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50_program_destroy(nv50, hwcso); + FREE(hwcso); } static void * nv50_fp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso) { - return NULL; + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe = *cso; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; } static void nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_FRAGPROG; } static void nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) { + struct nv50_context *nv50 = nv50_context(pipe); + + nv50_program_destroy(nv50, hwcso); + FREE(hwcso); } static void @@ -378,6 +402,16 @@ static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, const struct pipe_constant_buffer *buf ) { + struct nv50_context *nv50 = nv50_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->dirty |= NV50_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->dirty |= NV50_NEW_FRAGPROG; + } } static void @@ -424,12 +458,24 @@ static void nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *vb) { + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); + nv50->vtxbuf_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; } static void nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *ve) { + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->vtxelt, ve, sizeof(*ve) * count); + nv50->vtxelt_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; } void diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h index a3b781d4c61..be0c75ad6ea 100644 --- a/src/gallium/drivers/nv50/nv50_state.h +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -2,6 +2,36 @@ #define __NV50_STATE_H__ #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" +struct nv50_program_data { + int index; + int component; + float value; +}; + +struct nv50_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + boolean translated; + + unsigned *insns; + unsigned insns_nr; + + struct pipe_buffer *buffer; + + unsigned param_nr; + float *immd; + unsigned immd_nr; + + struct nouveau_resource *data; + + union { + struct { + unsigned high_temp; + unsigned attr[2]; + } vp; + } cfg; +}; #endif diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 68d419f9fc8..4a548378b71 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -15,8 +15,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) h = fb->cbufs[i]->height; gw = 1; } else { - assert(w != fb->cbufs[i]->width); - assert(h != fb->cbufs[i]->height); + assert(w == fb->cbufs[i]->width); + assert(h == fb->cbufs[i]->height); } so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2); @@ -46,6 +46,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) } so_data(so, 0x00000040); so_data(so, 0x00000000); + + so_method(so, tesla, 0x1224, 1); + so_data (so, 1); } if (fb->zsbuf) { @@ -54,11 +57,11 @@ nv50_state_validate_fb(struct nv50_context *nv50) h = fb->zsbuf->height; gw = 1; } else { - assert(w != fb->zsbuf->width); - assert(h != fb->zsbuf->height); + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); } - so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); + so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, @@ -83,9 +86,12 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); so_data (so, w << 16); so_data (so, h << 16); - so_method(so, tesla, 0xff0, 2); + so_method(so, tesla, 0xff4, 2); so_data (so, w << 16); so_data (so, h << 16); + so_method(so, tesla, 0xdf8, 2); + so_data (so, 0); + so_data (so, h); so_emit(nv50->screen->nvws, so); so_ref(NULL, &so); @@ -108,6 +114,12 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & NV50_NEW_ZSA) so_emit(nvws, nv50->zsa->so); + if (nv50->dirty & NV50_NEW_VERTPROG) + nv50_vertprog_validate(nv50); + + if (nv50->dirty & NV50_NEW_FRAGPROG) + nv50_fragprog_validate(nv50); + if (nv50->dirty & NV50_NEW_RASTERIZER) so_emit(nvws, nv50->rasterizer->so); @@ -150,12 +162,15 @@ nv50_state_validate(struct nv50_context *nv50) so_data (so, fui(nv50->viewport.translate[2])); so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); so_data (so, fui(nv50->viewport.scale[0])); - so_data (so, fui(nv50->viewport.scale[1])); + so_data (so, fui(-nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); so_emit(nvws, so); so_ref(NULL, &so); } + if (nv50->dirty & NV50_NEW_ARRAYS) + nv50_vbo_validate(nv50); + nv50->dirty = 0; return TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index b01ce1d42c5..f086c762582 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -5,6 +5,29 @@ #include "nv50_context.h" #include "nv50_state.h" +static INLINE unsigned +nv50_prim(unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; + case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; + case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; + case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; + case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + default: + break; + } + + NOUVEAU_ERR("invalid primitive type %d\n", mode); + return NV50TCL_VERTEX_BEGIN_POINTS; +} + boolean nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -12,8 +35,22 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, struct nv50_context *nv50 = nv50_context(pipe); nv50_state_validate(nv50); - NOUVEAU_ERR("unimplemented\n"); + + BEGIN_RING(tesla, 0x142c, 1); + OUT_RING (0); + BEGIN_RING(tesla, 0x142c, 1); + OUT_RING (0); + + BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (nv50_prim(mode)); + BEGIN_RING(tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (start); + OUT_RING (count); + BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (0); + + pipe->flush(pipe, 0, NULL); return TRUE; } @@ -30,3 +67,49 @@ nv50_draw_elements(struct pipe_context *pipe, return TRUE; } +void +nv50_vbo_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *vtxbuf, *vtxfmt; + int i, vpi = 0; + + vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2); + vtxfmt = so_new(nv50->vtxelt_nr + 1, 0); + so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr); + + for (i = 0; i < nv50->vtxelt_nr; i++) { + struct pipe_vertex_element *ve = &nv50->vtxelt[i]; + struct pipe_vertex_buffer *vb = + &nv50->vtxbuf[ve->vertex_buffer_index]; + + switch (ve->src_format) { + case PIPE_FORMAT_R32G32B32_FLOAT: + so_data(vtxfmt, 0x7e100000 | i); + break; + default: + { + char fmt[128]; + pf_sprint_name(fmt, ve->src_format); + NOUVEAU_ERR("invalid vbo format %s\n", fmt); + assert(0); + return; + } + } + + so_method(vtxbuf, tesla, 0x900 + (i * 16), 3); + so_data (vtxbuf, 0x20000000 | vb->pitch); + so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + } + + so_emit(nv50->screen->nvws, vtxfmt); + so_ref (NULL, &vtxfmt); + so_emit(nv50->screen->nvws, vtxbuf); + so_ref (NULL, &vtxbuf); +} + -- cgit v1.2.3 From 716c1cd2ecbc1e86c0fd747c9fa9e095ded5fd5d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 1 Jun 2008 23:10:31 +1000 Subject: nv50: use "real" constbufs for shaders + tcb uploads --- src/gallium/drivers/nv50/nv50_context.h | 14 ++++++- src/gallium/drivers/nv50/nv50_program.c | 27 +++++------- src/gallium/drivers/nv50/nv50_screen.c | 57 +++++++++++++++----------- src/gallium/drivers/nv50/nv50_screen.h | 3 ++ src/gallium/drivers/nv50/nv50_state.c | 4 +- src/gallium/drivers/nv50/nv50_state.h | 1 - src/gallium/drivers/nv50/nv50_state_validate.c | 29 +++++++++++++ 7 files changed, 90 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index d4d716b94bf..c4a8a4c064e 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -23,6 +23,14 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +/* Constant buffer assignment */ +#define NV50_CB_PMISC 0 +#define NV50_CB_PVP 1 +#define NV50_CB_PFP 2 +#define NV50_CB_PGP 3 +#define NV50_CB_TIC 4 +#define NV50_CB_TSC 5 + #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_ZSA (1 << 1) #define NV50_NEW_BLEND_COLOUR (1 << 2) @@ -32,8 +40,10 @@ #define NV50_NEW_RASTERIZER (1 << 6) #define NV50_NEW_FRAMEBUFFER (1 << 7) #define NV50_NEW_VERTPROG (1 << 8) -#define NV50_NEW_FRAGPROG (1 << 9) -#define NV50_NEW_ARRAYS (1 << 10) +#define NV50_NEW_VERTPROG_CB (1 << 9) +#define NV50_NEW_FRAGPROG (1 << 10) +#define NV50_NEW_FRAGPROG_CB (1 << 11) +#define NV50_NEW_ARRAYS (1 << 12) struct nv50_blend_stateobj { struct pipe_blend_state pipe; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 30953b7d8af..0a436469238 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -179,6 +179,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, if (src1) { if (src1->type == P_CONST || src1->type == P_IMMD) { + if (src1->type == P_IMMD) + inst[1] |= (NV50_CB_PMISC << 22); + else + inst[1] |= (NV50_CB_PVP << 22); inst[0] |= 0x00800000; /* src1 is const */ /*XXX: does src1 come from "src2" now? */ alloc_reg(pc, src1); @@ -196,6 +200,10 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, if (src2) { if (src2->type == P_CONST || src2->type == P_IMMD) { + if (src2->type == P_IMMD) + inst[1] |= (NV50_CB_PMISC << 22); + else + inst[1] |= (NV50_CB_PVP << 22); inst[0] |= 0x01000000; /* src2 is const */ inst[1] |= (src2->hw << 14); } else { @@ -526,7 +534,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } if (pc->immd_nr) { - int rid = pc->param_nr * 4; + int rid = 0; pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); if (!pc->immd) @@ -581,7 +589,6 @@ nv50_program_tx(struct nv50_program *p) } } - p->param_nr = pc->param_nr * 4; p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; @@ -654,23 +661,9 @@ nv50_vertprog_validate(struct nv50_context *nv50) memcpy(map, p->insns, p->insns_nr * 4); ws->buffer_unmap(ws, p->buffer); - if (p->param_nr) { - float *cb; - - cb = ws->buffer_map(ws, nv50->constbuf[PIPE_SHADER_VERTEX], - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < p->param_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING (i << 8); - OUT_RING (fui(cb[i])); - } - ws->buffer_unmap(ws, nv50->constbuf[PIPE_SHADER_VERTEX]); - } - - for (i = 0; i < p->immd_nr; i++) { BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((p->param_nr + i) << 8); + OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); OUT_RING (fui(p->immd[i])); } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index fc3eeed9133..6c0810a9cfe 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -203,14 +203,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) return NULL; } - /* Static constant buffer */ - screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); - if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { - NOUVEAU_ERR("Error initialising constant buffer\n"); - nv50_screen_destroy(&screen->pipe); - return NULL; - } - /* Static tesla init */ so = so_new(256, 20); @@ -245,37 +237,56 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); + /* Shared constant buffer */ + screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { + NOUVEAU_ERR("Error initialising constant buffer\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + so_method(so, screen->tesla, 0x1280, 3); so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00001000); + so_data (so, (NV50_CB_PMISC << 16) | 0x00001000); + + /* Texture sampler/image unit setup - we abuse the constant buffer + * upload mechanism for the moment to upload data to the tex config + * blocks. At some point we *may* want to go the NVIDIA way of doing + * things? + */ + screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00014000); - so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_data (so, (NV50_CB_TIC << 16) | 0x0800); + so_method(so, screen->tesla, 0x1574, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00024000); + so_data (so, 0x00000800); + + screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00034000); - so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_data (so, (NV50_CB_TSC << 16) | 0x0800); + so_method(so, screen->tesla, 0x155c, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00040100); + so_data (so, 0x00000800); + + /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { so_method(so, screen->tesla, 0x1080 + (i * 8), 2); so_data (so, 0x000000ff); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index d63dd485085..5acb5003ba4 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -15,6 +15,9 @@ struct nv50_screen { struct pipe_buffer *constbuf; struct nouveau_resource *vp_data_heap; + + struct pipe_buffer *tic; + struct pipe_buffer *tsc; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index fd10a383782..ba3d04cede3 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -406,11 +406,11 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (shader == PIPE_SHADER_VERTEX) { nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; - nv50->dirty |= NV50_NEW_VERTPROG; + nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; - nv50->dirty |= NV50_NEW_FRAGPROG; + nv50->dirty |= NV50_NEW_FRAGPROG_CB; } } diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h index be0c75ad6ea..9e3876871bd 100644 --- a/src/gallium/drivers/nv50/nv50_state.h +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -20,7 +20,6 @@ struct nv50_program { struct pipe_buffer *buffer; - unsigned param_nr; float *immd; unsigned immd_nr; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 4a548378b71..05395c6df7c 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -168,6 +168,35 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(NULL, &so); } + if (nv50->dirty & NV50_NEW_VERTPROG_CB) { + so = so_new(4, 2); + so_method(so, tesla, 0x1280, 3); + so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_data (so, (NV50_CB_PVP << 16) | 0x1000); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_FRAGPROG_CB) { + so = so_new(4, 2); + so_method(so, tesla, 0x1280, 3); + so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, + 0, 0); + so_data (so, (NV50_CB_PFP << 16) | 0x1000); + so_emit(nvws, so); + so_ref(NULL, &so); + } + + if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); -- cgit v1.2.3 From 41cd9bddf77ea60f84a957e83ddf098818c95c41 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 1 Jun 2008 23:16:17 +1000 Subject: nv50: fucking horrible hack, I really hate G8x shaders.. --- src/gallium/drivers/nv50/nv50_program.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 0a436469238..1b192b897c4 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -135,10 +135,18 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { struct nv50_program *p = pc->p; - struct nv50_reg *tmp = NULL, *tmp2 = NULL; + struct nv50_reg *tmp0 = NULL, *tmp = NULL, *tmp2 = NULL; unsigned inst[2] = { 0, 0 }; /* Grr.. Fun restrictions on where attribs can be sourced from.. */ + if (src0 && (src0->type == P_CONST || src0->type == P_IMMD) && + (op == 0xc || op == 0xe)) { + tmp = src1; + src1 = src0; + src0 = tmp; + tmp = NULL; + } + if (src1 && src1->type == P_ATTR) { tmp = alloc_temp(pc, dst); emit(pc, 1, tmp, src1, NULL, NULL); @@ -243,6 +251,7 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, } } + if (tmp0) free_temp(pc, tmp0); if (tmp) free_temp(pc, tmp); if (tmp2) free_temp(pc, tmp2); -- cgit v1.2.3 From 38ce697e5942550888c28bd4859ca2a92f247bf7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 2 Jun 2008 12:08:06 +1000 Subject: nv50: implement SUB --- src/gallium/drivers/nv50/nv50_program.c | 63 +++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 1b192b897c4..41ff05dabe7 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -11,9 +11,11 @@ #include "nv50_state.h" #define OP_MOV 0x001 +#define OP_INTERP 0x008 #define OP_RCP 0x009 #define OP_ADD 0x00b #define OP_MUL 0x00c +#define OP_MAD 0x00e #define NV50_SU_MAX_TEMP 64 struct nv50_reg { @@ -27,6 +29,7 @@ struct nv50_reg { int index; int hw; + int neg; }; struct nv50_pc { @@ -140,7 +143,7 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, /* Grr.. Fun restrictions on where attribs can be sourced from.. */ if (src0 && (src0->type == P_CONST || src0->type == P_IMMD) && - (op == 0xc || op == 0xe)) { + (op == OP_MUL || op == OP_MAD)) { tmp = src1; src1 = src0; src0 = tmp; @@ -149,12 +152,14 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, if (src1 && src1->type == P_ATTR) { tmp = alloc_temp(pc, dst); + tmp->neg = src1->neg; src1->neg = 0; emit(pc, 1, tmp, src1, NULL, NULL); src1 = tmp; } if (src2 && src2->type == P_ATTR) { tmp2 = alloc_temp(pc, dst); + tmp2->neg = src2->neg; src2->neg = 0; emit(pc, 1, tmp2, src2, NULL, NULL); src2 = tmp2; } @@ -197,7 +202,7 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, inst[0] |= (src1->hw << 16); } else { alloc_reg(pc, src1); - if (op == 0xc || op == 0xe) + if (op == OP_MUL || op == OP_MAD) inst[0] |= (src1->hw << 16); else inst[1] |= (src1->hw << 14); @@ -216,7 +221,7 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, inst[1] |= (src2->hw << 14); } else { alloc_reg(pc, src2); - if (inst[0] & 0x00800000 || op ==0xe) + if (inst[0] & 0x00800000 || op == OP_MAD) inst[1] |= (src2->hw << 14); else inst[0] |= (src2->hw << 16); @@ -224,14 +229,24 @@ emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, } /*XXX: FIXME */ - if (op == 0xb || op == 0xc || op == 0x9 || op == 0xe) { + switch (op) { + case OP_ADD: + case OP_MUL: + case OP_RCP: + case OP_MAD: /* 0x04000000 negates arg0 */ /* 0x08000000 negates arg1 */ /*XXX: true for !0xb also ? */ + if (src0 && src0->neg) + inst[1] |= 0x04000000; + if (src1 && src1->neg) + inst[1] |= 0x08000000; inst[1] |= 0x00000780; - } else { + break; + default: /* 0x04000000 == arg0 32 bit, otherwise 16 bit */ inst[1] |= 0x04000780; + break; } } else { inst[0] |= ((op & 0xf) << 28); @@ -340,61 +355,71 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) case TGSI_OPCODE_ADD: for (c = 0; c < 4; c++) { if (mask & (1 << c)) { - emit(pc, 0x0b, dst[c], + emit(pc, OP_ADD, dst[c], src[0][c], src[1][c], none); } } break; case TGSI_OPCODE_DP3: tmp = alloc_temp(pc, NULL); - emit(pc, 0x0c, tmp, src[0][0], src[1][0], NULL); - emit(pc, 0x0e, tmp, src[0][1], src[1][1], tmp); - emit(pc, 0x0e, tmp, src[0][2], src[1][2], tmp); + emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); + emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); + emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x01, dst[c], tmp, none, none); + emit(pc, OP_MOV, dst[c], tmp, none, none); } free_temp(pc, tmp); break; case TGSI_OPCODE_DP4: tmp = alloc_temp(pc, NULL); - emit(pc, 0x0c, tmp, src[0][0], src[1][0], NULL); - emit(pc, 0x0e, tmp, src[0][1], src[1][1], tmp); - emit(pc, 0x0e, tmp, src[0][2], src[1][2], tmp); - emit(pc, 0x0e, tmp, src[0][3], src[1][3], tmp); + emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); + emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); + emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); + emit(pc, OP_MAD, tmp, src[0][3], src[1][3], tmp); for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x01, dst[c], tmp, none, none); + emit(pc, OP_MOV, dst[c], tmp, none, none); } free_temp(pc, tmp); break; case TGSI_OPCODE_MAD: for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x0e, dst[c], + emit(pc, OP_MAD, dst[c], src[0][c], src[1][c], src[2][c]); } break; case TGSI_OPCODE_MOV: for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x01, dst[c], src[0][c], none, none); + emit(pc, OP_MOV, dst[c], src[0][c], none, none); } break; case TGSI_OPCODE_MUL: for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x0c, dst[c], + emit(pc, OP_MUL, dst[c], src[0][c], src[1][c], none); } break; case TGSI_OPCODE_RCP: for (c = 0; c < 4; c++) { if (mask & (1 << c)) - emit(pc, 0x09, dst[c], + emit(pc, OP_RCP, dst[c], src[0][c], none, none); } break; + case TGSI_OPCODE_SUB: + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) { + src[1][c]->neg = 1; + emit(pc, OP_ADD, dst[c], + src[0][c], src[1][c], none); + src[1][c]->neg = 0; + } + } + break; case TGSI_OPCODE_END: break; default: -- cgit v1.2.3 From 207b7974723c6b88aacfa3703a1e049ff35db6a8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 2 Jun 2008 12:12:16 +1000 Subject: nv50: DPH --- src/gallium/drivers/nv50/nv50_program.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 41ff05dabe7..12bd419f4cd 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -383,6 +383,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, tmp); break; + case TGSI_OPCODE_DPH: + tmp = alloc_temp(pc, NULL); + emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); + emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); + emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); + emit(pc, OP_ADD, tmp, src[1][3], tmp, NULL); + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + emit(pc, OP_MOV, dst[c], tmp, none, none); + } + free_temp(pc, tmp); + break; case TGSI_OPCODE_MAD: for (c = 0; c < 4; c++) { if (mask & (1 << c)) -- cgit v1.2.3 From 22e0acc466947b203574c88f4964f61ef46ae3fd Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 2 Jun 2008 13:01:09 +1000 Subject: nv50: split code/data upload out, fp will use it later on --- src/gallium/drivers/nv50/nv50_program.c | 42 +++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 12bd419f4cd..97e3eab906f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -684,16 +684,37 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) p->translated = TRUE; } +static void +nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) +{ + int i; + + for (i = 0; i < p->immd_nr; i++) { + BEGIN_RING(tesla, 0x0f00, 2); + OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); + OUT_RING (fui(p->immd[i])); + } +} + +static void +nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + void *map; + + if (!p->buffer) + p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map, p->insns, p->insns_nr * 4); + ws->buffer_unmap(ws, p->buffer); +} + void nv50_vertprog_validate(struct nv50_context *nv50) { - struct pipe_winsys *ws = nv50->pipe.winsys; - struct nouveau_winsys *nvws = nv50->screen->nvws; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->vertprog; struct nouveau_stateobj *so; - void *map; - int i; if (!p->translated) { nv50_program_validate(nv50, p); @@ -701,17 +722,8 @@ nv50_vertprog_validate(struct nv50_context *nv50) assert(0); } - if (!p->buffer) - p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); - map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(map, p->insns, p->insns_nr * 4); - ws->buffer_unmap(ws, p->buffer); - - for (i = 0; i < p->immd_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); - OUT_RING (fui(p->immd[i])); - } + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); so = so_new(11, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); -- cgit v1.2.3 From 55b2fe1047b37d0d86641a252e1c745111030393 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 2 Jun 2008 23:52:58 +1000 Subject: nv50: drop the majority of the old shader code, reimplement, only MOV so far. --- src/gallium/drivers/nv50/nv50_program.c | 411 ++++++++++++-------------------- 1 file changed, 146 insertions(+), 265 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 97e3eab906f..aa848faa496 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -10,13 +10,8 @@ #include "nv50_context.h" #include "nv50_state.h" -#define OP_MOV 0x001 -#define OP_INTERP 0x008 -#define OP_RCP 0x009 -#define OP_ADD 0x00b -#define OP_MUL 0x00c -#define OP_MAD 0x00e #define NV50_SU_MAX_TEMP 64 +#define TX_FRAGPROG 0 struct nv50_reg { enum { @@ -106,181 +101,6 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) } } -#if 0 -static struct nv50_reg * -constant(struct nv50_pc *pc, int pipe, int c, float v) -{ - struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); - struct nv50_program *p = pc->p; - struct nv50_program_data *pd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < p->nr_consts; idx++) { - if (p->consts[idx].index == pipe) - return nv40_sr(NV40SR_CONST, idx); - } - } - - idx = p->nr_consts++; - p->consts = realloc(p->consts, sizeof(*pd) * p->nr_consts); - pd = &p->consts[idx]; - - pd->index = pipe; - pd->component = c; - pd->value = v; - return nv40_sr(NV40SR_CONST, idx); -} -#endif - -static void -emit(struct nv50_pc *pc, unsigned op, struct nv50_reg *dst, - struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) -{ - struct nv50_program *p = pc->p; - struct nv50_reg *tmp0 = NULL, *tmp = NULL, *tmp2 = NULL; - unsigned inst[2] = { 0, 0 }; - - /* Grr.. Fun restrictions on where attribs can be sourced from.. */ - if (src0 && (src0->type == P_CONST || src0->type == P_IMMD) && - (op == OP_MUL || op == OP_MAD)) { - tmp = src1; - src1 = src0; - src0 = tmp; - tmp = NULL; - } - - if (src1 && src1->type == P_ATTR) { - tmp = alloc_temp(pc, dst); - tmp->neg = src1->neg; src1->neg = 0; - emit(pc, 1, tmp, src1, NULL, NULL); - src1 = tmp; - } - - if (src2 && src2->type == P_ATTR) { - tmp2 = alloc_temp(pc, dst); - tmp2->neg = src2->neg; src2->neg = 0; - emit(pc, 1, tmp2, src2, NULL, NULL); - src2 = tmp2; - } - - /* Get this out of the way first. What type of opcode do we - * want/need to build? - */ - if ((op & 0x3f0) || dst->type == P_RESULT || - (src0 && src0->type == P_ATTR) || src1 || src2) - inst[0] |= 0x00000001; - - if (inst[0] & 0x00000001) { - inst[0] |= ((op & 0xf) << 28); - inst[1] |= ((op >> 4) << 26); - - alloc_reg(pc, dst); - if (dst->type == P_RESULT) - inst[1] |= 0x00000008; - inst[0] |= (dst->hw << 2); - - if (src0) { - if (src0->type == P_ATTR) - inst[1] |= 0x00200000; - else - if (src0->type == P_CONST || src0->type == P_IMMD) - assert(0); - alloc_reg(pc, src0); - inst[0] |= (src0->hw << 9); - } - - if (src1) { - if (src1->type == P_CONST || src1->type == P_IMMD) { - if (src1->type == P_IMMD) - inst[1] |= (NV50_CB_PMISC << 22); - else - inst[1] |= (NV50_CB_PVP << 22); - inst[0] |= 0x00800000; /* src1 is const */ - /*XXX: does src1 come from "src2" now? */ - alloc_reg(pc, src1); - inst[0] |= (src1->hw << 16); - } else { - alloc_reg(pc, src1); - if (op == OP_MUL || op == OP_MAD) - inst[0] |= (src1->hw << 16); - else - inst[1] |= (src1->hw << 14); - } - } else { - inst[1] |= 0x0003c000; /*XXX FIXME */ - } - - if (src2) { - if (src2->type == P_CONST || src2->type == P_IMMD) { - if (src2->type == P_IMMD) - inst[1] |= (NV50_CB_PMISC << 22); - else - inst[1] |= (NV50_CB_PVP << 22); - inst[0] |= 0x01000000; /* src2 is const */ - inst[1] |= (src2->hw << 14); - } else { - alloc_reg(pc, src2); - if (inst[0] & 0x00800000 || op == OP_MAD) - inst[1] |= (src2->hw << 14); - else - inst[0] |= (src2->hw << 16); - } - } - - /*XXX: FIXME */ - switch (op) { - case OP_ADD: - case OP_MUL: - case OP_RCP: - case OP_MAD: - /* 0x04000000 negates arg0 */ - /* 0x08000000 negates arg1 */ - /*XXX: true for !0xb also ? */ - if (src0 && src0->neg) - inst[1] |= 0x04000000; - if (src1 && src1->neg) - inst[1] |= 0x08000000; - inst[1] |= 0x00000780; - break; - default: - /* 0x04000000 == arg0 32 bit, otherwise 16 bit */ - inst[1] |= 0x04000780; - break; - } - } else { - inst[0] |= ((op & 0xf) << 28); - - alloc_reg(pc, dst); - inst[0] |= (dst->hw << 2); - - if (src0) { - alloc_reg(pc, src0); - inst[0] |= (src0->hw << 9); - } - - /*XXX: NFI if this even works - probably not.. */ - if (src1) { - alloc_reg(pc, src1); - inst[0] |= (src1->hw << 16); - } - } - - if (tmp0) free_temp(pc, tmp0); - if (tmp) free_temp(pc, tmp); - if (tmp2) free_temp(pc, tmp2); - - if (inst[0] & 1) { - p->insns_nr += 2; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); - } else { - p->insns_nr += 1; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); - } -} - static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -327,11 +147,147 @@ tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) return NULL; } +static void +emit(struct nv50_pc *pc, unsigned *inst) +{ + struct nv50_program *p = pc->p; + + if (inst[0] & 1) { + p->insns_nr += 2; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); + } else { + p->insns_nr += 1; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); + } +} + +static INLINE void set_long(struct nv50_pc *, unsigned *); + +static boolean +is_long(unsigned *inst) +{ + if (inst[0] & 1) + return TRUE; + return FALSE; +} + +static boolean +is_immd(unsigned *inst) +{ + if (is_long(inst) && (inst[1] & 3) == 3) + return TRUE; + return FALSE; +} + +static INLINE void +set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) +{ + set_long(pc, inst); + inst[1] &= ~((0x1f << 7) | (0x3 << 12)); + inst[1] |= (pred << 7) | (idx << 12); +} + +static INLINE void +set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) +{ + set_long(pc, inst); + inst[1] &= ~((0x3 << 4) | (1 << 6)); + inst[1] |= (idx << 4) | (on << 6); +} + +static INLINE void +set_long(struct nv50_pc *pc, unsigned *inst) +{ + if (is_long(inst)) + return; + + inst[0] |= 1; + set_pred(pc, 0xf, 0, inst); + set_pred_wr(pc, 0, 0, inst); +} + +static INLINE void +set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) +{ + if (dst->type == P_RESULT) { + set_long(pc, inst); + inst[1] |= 0x00000008; + } + + alloc_reg(pc, dst); + inst[0] |= (dst->hw << 2); +} + +static INLINE void +set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) +{ + unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ + + set_long(pc, inst); + /*XXX: can't be predicated - bits overlap.. catch cases where both + * are required and avoid them. */ + set_pred(pc, 0, 0, inst); + set_pred_wr(pc, 0, 0, inst); + + inst[1] |= 0x00000002 | 0x00000001; + inst[0] |= (val & 0x3f) << 16; + inst[1] |= (val >> 6) << 2; +} + +static void +emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + int i; + + inst[0] |= 0x10000000; + + set_dst(pc, dst, inst); + + if (dst->type != P_RESULT && src->type == P_IMMD) { + set_immd(pc, src, inst); + /*XXX: 32-bit, but steals part of "half" reg space - need to + * catch and handle this case if/when we do half-regs + */ + inst[0] |= 0x00008000; + } else + if (src->type == P_IMMD || src->type == P_CONST) { + set_long(pc, inst); + if (src->type == P_IMMD) + inst[1] |= (NV50_CB_PMISC << 22); + else + inst[1] |= (NV50_CB_PVP << 22); + inst[0] |= (src->hw << 9); + inst[1] |= 0x20000000; /* src0 const? */ + } else { + if (src->type == P_ATTR) { + set_long(pc, inst); + inst[1] |= 0x00200000; + } + + alloc_reg(pc, src); + inst[0] |= (src->hw << 9); + } + + /* We really should support "half" instructions here at some point, + * but I don't feel confident enough about them yet. + */ + set_long(pc, inst); + if (is_long(inst) && !is_immd(inst)) { + inst[1] |= 0x04000000; /* 32-bit */ + inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ + } + + emit(pc, inst); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *dst[4], *src[3][4], *none = NULL, *tmp; + struct nv50_reg *dst[4], *src[3][4]; unsigned mask; int i, c; @@ -352,85 +308,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) { - emit(pc, OP_ADD, dst[c], - src[0][c], src[1][c], none); - } - } - break; - case TGSI_OPCODE_DP3: - tmp = alloc_temp(pc, NULL); - emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); - emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); - emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MOV, dst[c], tmp, none, none); - } - free_temp(pc, tmp); - break; - case TGSI_OPCODE_DP4: - tmp = alloc_temp(pc, NULL); - emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); - emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); - emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); - emit(pc, OP_MAD, tmp, src[0][3], src[1][3], tmp); - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MOV, dst[c], tmp, none, none); - } - free_temp(pc, tmp); - break; - case TGSI_OPCODE_DPH: - tmp = alloc_temp(pc, NULL); - emit(pc, OP_MUL, tmp, src[0][0], src[1][0], NULL); - emit(pc, OP_MAD, tmp, src[0][1], src[1][1], tmp); - emit(pc, OP_MAD, tmp, src[0][2], src[1][2], tmp); - emit(pc, OP_ADD, tmp, src[1][3], tmp, NULL); - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MOV, dst[c], tmp, none, none); - } - free_temp(pc, tmp); - break; - case TGSI_OPCODE_MAD: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MAD, dst[c], - src[0][c], src[1][c], src[2][c]); - } - break; case TGSI_OPCODE_MOV: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MOV, dst[c], src[0][c], none, none); - } - break; - case TGSI_OPCODE_MUL: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_MUL, dst[c], - src[0][c], src[1][c], none); - } - break; - case TGSI_OPCODE_RCP: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) - emit(pc, OP_RCP, dst[c], - src[0][c], none, none); - } - break; - case TGSI_OPCODE_SUB: - for (c = 0; c < 4; c++) { - if (mask & (1 << c)) { - src[1][c]->neg = 1; - emit(pc, OP_ADD, dst[c], - src[0][c], src[1][c], none); - src[1][c]->neg = 0; - } - } + for (c = 0; c < 4; c++) + emit_mov(pc, dst[c], src[0][c]); break; case TGSI_OPCODE_END: break; @@ -648,6 +528,7 @@ out_cleanup: static void nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) { +#if TX_FRAGPROG == 0 struct tgsi_parse_context pc; tgsi_parse_init(&pc, p->pipe.tokens); @@ -665,6 +546,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) p->insns[7] = 0x00020781; } else if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { +#endif int i; if (nv50_program_tx(p) == FALSE) @@ -673,6 +555,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) for (i = 0; i < p->insns_nr; i++) NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); +#if TX_FRAGPROG == 0 } else { NOUVEAU_ERR("invalid TGSI processor\n"); tgsi_parse_free(&pc); @@ -680,6 +563,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) } tgsi_parse_free(&pc); +#endif p->translated = TRUE; } @@ -758,11 +642,8 @@ nv50_fragprog_validate(struct nv50_context *nv50) assert(0); } - if (!p->buffer) - p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); - map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(map, p->insns, p->insns_nr * 4); - ws->buffer_unmap(ws, p->buffer); + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); so = so_new(3, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); -- cgit v1.2.3 From 8ec6415e9fcf876c67bc1624f3eb7dd7624b7791 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 3 Jun 2008 12:37:29 +1000 Subject: nv50: start using interpreter for fragprog too, not hardcoded passthrough --- src/gallium/drivers/nv50/nv50_program.c | 96 ++++++++++++++++++++++++++++----- src/gallium/drivers/nv50/nv50_state.c | 2 + src/gallium/drivers/nv50/nv50_state.h | 8 ++- 3 files changed, 92 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index aa848faa496..9d89f6d0609 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -11,7 +11,7 @@ #include "nv50_state.h" #define NV50_SU_MAX_TEMP 64 -#define TX_FRAGPROG 0 +#define TX_FRAGPROG 1 struct nv50_reg { enum { @@ -52,15 +52,23 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { int i; - if (reg->type != P_TEMP || reg->hw >= 0) + if (reg->type != P_TEMP) return; + if (reg->hw >= 0) { + /*XXX: do this here too to catch FP temp-as-attr usage.. + * not clean, but works */ + if (pc->p->cfg.high_temp < (reg->hw + 1)) + pc->p->cfg.high_temp = reg->hw + 1; + return; + } + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { if (!(pc->r_temp[i])) { pc->r_temp[i] = reg; reg->hw = i; - if (pc->p->cfg.vp.high_temp < (i + 1)) - pc->p->cfg.vp.high_temp = i + 1; + if (pc->p->cfg.high_temp < (i + 1)) + pc->p->cfg.high_temp = i + 1; return; } } @@ -236,6 +244,24 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) inst[1] |= (val >> 6) << 2; } +static void +emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0x80000000; + set_dst(pc, dst, inst); + alloc_reg(pc, iv); + inst[0] |= (iv->hw << 9); + alloc_reg(pc, src); + inst[0] |= (src->hw << 16); + if (noperspective) + inst[0] |= (1 << 25); + + emit(pc, inst); +} + static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -409,20 +435,57 @@ nv50_program_tx_prep(struct nv50_pc *pc) NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); if (pc->attr_nr) { + struct nv50_reg *iv = NULL, *tmp = NULL; int aid = 0; pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); if (!pc->attr) goto out_err; + if (pc->p->type == NV50_PROG_FRAGMENT) { + iv = alloc_temp(pc, NULL); + aid++; + } + for (i = 0; i < pc->attr_nr; i++) { + struct nv50_reg *a = &pc->attr[i*4]; + for (c = 0; c < 4; c++) { - pc->p->cfg.vp.attr[aid/32] |= (1 << (aid % 32)); - pc->attr[i*4+c].type = P_ATTR; - pc->attr[i*4+c].hw = aid++; - pc->attr[i*4+c].index = i; + if (pc->p->type == NV50_PROG_FRAGMENT) { + struct nv50_reg *at = + alloc_temp(pc, NULL); + pc->attr[i*4+c].type = at->type; + pc->attr[i*4+c].hw = at->hw; + pc->attr[i*4+c].index = at->index; + } else { + pc->p->cfg.vp.attr[aid/32] |= + (1 << (aid % 32)); + pc->attr[i*4+c].type = P_ATTR; + pc->attr[i*4+c].hw = aid++; + pc->attr[i*4+c].index = i; + } } + + if (pc->p->type != NV50_PROG_FRAGMENT) + continue; + + emit_interp(pc, iv, iv, iv, FALSE); + tmp = alloc_temp(pc, NULL); + { + unsigned inst[2] = { 0, 0 }; + inst[0] = 0x90000000; + inst[0] |= (tmp->hw << 2); + emit(pc, inst); + } + emit_interp(pc, &a[0], &a[0], tmp, TRUE); + emit_interp(pc, &a[1], &a[1], tmp, TRUE); + emit_interp(pc, &a[2], &a[2], tmp, TRUE); + emit_interp(pc, &a[3], &a[3], tmp, TRUE); + free_temp(pc, tmp); } + + if (iv) + free_temp(pc, iv); } NOUVEAU_ERR("%d result regs\n", pc->result_nr); @@ -435,7 +498,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->result_nr; i++) { for (c = 0; c < 4; c++) { - pc->result[i*4+c].type = P_RESULT; + if (pc->p->type == NV50_PROG_FRAGMENT) + pc->result[i*4+c].type = P_TEMP; + else + pc->result[i*4+c].type = P_RESULT; pc->result[i*4+c].hw = rid++; pc->result[i*4+c].index = i; } @@ -492,7 +558,7 @@ nv50_program_tx(struct nv50_program *p) if (!pc) return FALSE; pc->p = p; - pc->p->cfg.vp.high_temp = 4; + pc->p->cfg.high_temp = 4; ret = nv50_program_tx_prep(pc); if (ret == FALSE) @@ -551,6 +617,8 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) if (nv50_program_tx(p) == FALSE) assert(0); + /* *not* sufficient, it's fine if last inst is long and + * NOT immd - otherwise it's fucked fucked fucked */ p->insns[p->insns_nr - 1] |= 0x00000001; for (i = 0; i < p->insns_nr; i++) @@ -620,7 +688,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.vp.attr[1]); so_method(so, tesla, 0x16ac, 2); so_data (so, 8); - so_data (so, p->cfg.vp.high_temp); + so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x140c, 1); so_data (so, 0); /* program start offset */ so_emit(nv50->screen->nvws, so); @@ -645,12 +713,16 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(3, 2); + so = so_new(7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x198c, 1); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x1414, 1); + so_data (so, 0); /* program start offset */ so_emit(nv50->screen->nvws, so); so_ref(NULL, &so); } diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index ba3d04cede3..48d09c296bb 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -331,6 +331,7 @@ nv50_vp_state_create(struct pipe_context *pipe, struct nv50_program *p = CALLOC_STRUCT(nv50_program); p->pipe = *cso; + p->type = NV50_PROG_VERTEX; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; } @@ -360,6 +361,7 @@ nv50_fp_state_create(struct pipe_context *pipe, struct nv50_program *p = CALLOC_STRUCT(nv50_program); p->pipe = *cso; + p->type = NV50_PROG_FRAGMENT; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; } diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h index 9e3876871bd..dd352b65856 100644 --- a/src/gallium/drivers/nv50/nv50_state.h +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -15,6 +15,10 @@ struct nv50_program { struct tgsi_shader_info info; boolean translated; + enum { + NV50_PROG_VERTEX, + NV50_PROG_FRAGMENT + } type; unsigned *insns; unsigned insns_nr; @@ -25,9 +29,9 @@ struct nv50_program { struct nouveau_resource *data; - union { + struct { + unsigned high_temp; struct { - unsigned high_temp; unsigned attr[2]; } vp; } cfg; -- cgit v1.2.3 From e55964099b0d47dea80920765daac09b9e2a61a7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 3 Jun 2008 12:38:12 +1000 Subject: nv50: remove hardcoded fp stuff --- src/gallium/drivers/nv50/nv50_program.c | 45 ++++++--------------------------- 1 file changed, 8 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 9d89f6d0609..df29069e628 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -11,7 +11,6 @@ #include "nv50_state.h" #define NV50_SU_MAX_TEMP 64 -#define TX_FRAGPROG 1 struct nv50_reg { enum { @@ -594,44 +593,16 @@ out_cleanup: static void nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) { -#if TX_FRAGPROG == 0 - struct tgsi_parse_context pc; - - tgsi_parse_init(&pc, p->pipe.tokens); - - if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - p->insns_nr = 8; - p->insns = malloc(p->insns_nr * sizeof(unsigned)); - p->insns[0] = 0x80000000; - p->insns[1] = 0x9000000c; - p->insns[2] = 0x82010600; - p->insns[3] = 0x82020604; - p->insns[4] = 0x80030609; - p->insns[5] = 0x00020780; - p->insns[6] = 0x8004060d; - p->insns[7] = 0x00020781; - } else - if (pc.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { -#endif - int i; - - if (nv50_program_tx(p) == FALSE) - assert(0); - /* *not* sufficient, it's fine if last inst is long and - * NOT immd - otherwise it's fucked fucked fucked */ - p->insns[p->insns_nr - 1] |= 0x00000001; + int i; - for (i = 0; i < p->insns_nr; i++) - NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); -#if TX_FRAGPROG == 0 - } else { - NOUVEAU_ERR("invalid TGSI processor\n"); - tgsi_parse_free(&pc); - return; - } + if (nv50_program_tx(p) == FALSE) + assert(0); + /* *not* sufficient, it's fine if last inst is long and + * NOT immd - otherwise it's fucked fucked fucked */ + p->insns[p->insns_nr - 1] |= 0x00000001; - tgsi_parse_free(&pc); -#endif + for (i = 0; i < p->insns_nr; i++) + NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); p->translated = TRUE; } -- cgit v1.2.3 From 2a1fb44d75364f2492a1ae5d232218a92b8ca807 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 4 Jun 2008 21:23:14 +1000 Subject: nv50: checkpoint: shader code now exceeds caps of "old" code --- src/gallium/drivers/nv50/nv50_program.c | 279 +++++++++++++++++++++++++++++++- 1 file changed, 272 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index df29069e628..bc0b7b2827d 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -44,6 +44,9 @@ struct nv50_pc { struct nv50_reg *immd; float *immd_buf; int immd_nr; + + struct nv50_reg *temp_temp[4]; + unsigned temp_temp_nr; }; static void @@ -108,6 +111,26 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) } } +static struct nv50_reg * +temp_temp(struct nv50_pc *pc) +{ + if (pc->temp_temp_nr >= 4) + assert(0); + + pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + return pc->temp_temp[pc->temp_temp_nr++]; +} + +static void +kill_temp_temp(struct nv50_pc *pc) +{ + int i; + + for (i = 0; i < pc->temp_temp_nr; i++) + free_temp(pc, pc->temp_temp[i]); + pc->temp_temp_nr = 0; +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -168,6 +191,8 @@ emit(struct nv50_pc *pc, unsigned *inst) p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); } + + kill_temp_temp(pc); } static INLINE void set_long(struct nv50_pc *, unsigned *); @@ -261,11 +286,24 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, inst); } +static void +set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +{ + set_long(pc, inst); + if (src->type == P_IMMD) { + inst[1] |= (NV50_CB_PMISC << 22); + } else { + if (pc->p->type == NV50_PROG_VERTEX) + inst[1] |= (NV50_CB_PVP << 22); + else + inst[1] |= (NV50_CB_PFP << 22); + } +} + static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { unsigned inst[2] = { 0, 0 }; - int i; inst[0] |= 0x10000000; @@ -280,10 +318,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } else if (src->type == P_IMMD || src->type == P_CONST) { set_long(pc, inst); - if (src->type == P_IMMD) - inst[1] |= (NV50_CB_PMISC << 22); - else - inst[1] |= (NV50_CB_PVP << 22); + set_cseg(pc, src, inst); inst[0] |= (src->hw << 9); inst[1] |= 0x20000000; /* src0 const? */ } else { @@ -308,11 +343,177 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +static boolean +check_swap_src_0_1(struct nv50_pc *pc, + struct nv50_reg **s0, struct nv50_reg **s1) +{ + struct nv50_reg *src0 = *s0, *src1 = *s1; + + if (src0->type == P_CONST) { + if (src1->type != P_CONST) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } else + if (src1->type == P_ATTR) { + if (src0->type != P_ATTR) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } + + return FALSE; +} + +static void +set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +{ + if (src->type == P_ATTR) { + set_long(pc, inst); + inst[1] |= 0x00200000; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } + + alloc_reg(pc, src); + inst[0] |= (src->hw << 9); +} + +static void +set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +{ + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + set_cseg(pc, src, inst); + inst[0] |= 0x00800000; + } + + alloc_reg(pc, src); + inst[0] |= (src->hw << 16); +} + +static void +set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +{ + set_long(pc, inst); + + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + set_cseg(pc, src, inst); + inst[0] |= 0x01000000; + } + + alloc_reg(pc, src); + inst[1] |= (src->hw << 14); +} + +static void +emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xc0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); + + emit(pc, inst); +} + +static void +emit_add(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xb0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_2(pc, src1, inst); + + emit(pc, inst); +} + +static void +emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xb0000000; + + set_long(pc, inst); + if (check_swap_src_0_1(pc, &src0, &src1)) + inst[1] |= 0x04000000; + else + inst[1] |= 0x08000000; + + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_2(pc, src1, inst); + + emit(pc, inst); +} + +static void +emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xe0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); + set_src_2(pc, src2, inst); + + emit(pc, inst); +} + +static void +emit_flop(struct nv50_pc *pc, unsigned sub, + struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] |= 0x90000000; + inst[1] |= (sub << 29); + + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + + emit(pc, inst); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *dst[4], *src[3][4]; + struct nv50_reg *dst[4], *src[3][4], *temp; unsigned mask; int i, c; @@ -333,10 +534,69 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + for (c = 0; c < 4; c++) + emit_add(pc, dst[c], src[0][c], src[1][c]); + break; + case TGSI_OPCODE_COS: + for (c = 0; c < 4; c++) + emit_flop(pc, 5, dst[c], src[0][c]); + break; + case TGSI_OPCODE_DP3: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + for (c = 0; c < 4; c++) + emit_mov(pc, dst[c], temp); + free_temp(pc, temp); + break; + case TGSI_OPCODE_DP4: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_mad(pc, temp, src[0][3], src[1][3], temp); + for (c = 0; c < 4; c++) + emit_mov(pc, dst[c], temp); + free_temp(pc, temp); + break; + case TGSI_OPCODE_EX2: + for (c = 0; c < 4; c++) + emit_flop(pc, 6, dst[c], src[0][c]); + break; + case TGSI_OPCODE_LG2: + for (c = 0; c < 4; c++) + emit_flop(pc, 3, dst[c], src[0][c]); + break; + case TGSI_OPCODE_MAD: + for (c = 0; c < 4; c++) + emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + break; case TGSI_OPCODE_MOV: for (c = 0; c < 4; c++) emit_mov(pc, dst[c], src[0][c]); break; + case TGSI_OPCODE_MUL: + for (c = 0; c < 4; c++) + emit_mul(pc, dst[c], src[0][c], src[1][c]); + break; + case TGSI_OPCODE_RCP: + for (c = 0; c < 4; c++) + emit_flop(pc, 0, dst[c], src[0][c]); + break; + case TGSI_OPCODE_RSQ: + for (c = 0; c < 4; c++) + emit_flop(pc, 2, dst[c], src[0][c]); + break; + case TGSI_OPCODE_SIN: + for (c = 0; c < 4; c++) + emit_flop(pc, 4, dst[c], src[0][c]); + break; + case TGSI_OPCODE_SUB: + for (c = 0; c < 4; c++) + emit_sub(pc, dst[c], src[0][c], src[1][c]); + break; case TGSI_OPCODE_END: break; default: @@ -601,8 +861,13 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) * NOT immd - otherwise it's fucked fucked fucked */ p->insns[p->insns_nr - 1] |= 0x00000001; + if (p->type == NV50_PROG_VERTEX) { + for (i = 0; i < p->insns_nr; i++) + NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); + } else { for (i = 0; i < p->insns_nr; i++) - NOUVEAU_ERR("%d 0x%08x\n", i, p->insns[i]); + NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); + } p->translated = TRUE; } -- cgit v1.2.3 From 52a69196c1680ff16d1ad1fc88e5869bc6055d00 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 4 Jun 2008 21:45:32 +1000 Subject: nv50: some fixes + MIN/MAX --- src/gallium/drivers/nv50/nv50_program.c | 48 ++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bc0b7b2827d..afc911cd265 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -45,7 +45,7 @@ struct nv50_pc { float *immd_buf; int immd_nr; - struct nv50_reg *temp_temp[4]; + struct nv50_reg *temp_temp[8]; unsigned temp_temp_nr; }; @@ -114,7 +114,7 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) static struct nv50_reg * temp_temp(struct nv50_pc *pc) { - if (pc->temp_temp_nr >= 4) + if (pc->temp_temp_nr >= 8) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); @@ -191,8 +191,6 @@ emit(struct nv50_pc *pc, unsigned *inst) p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); } - - kill_temp_temp(pc); } static INLINE void set_long(struct nv50_pc *, unsigned *); @@ -440,8 +438,8 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, } static void -emit_add(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, - struct nv50_reg *src1) +emit_add(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) { unsigned inst[2] = { 0, 0 }; @@ -450,7 +448,28 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, inst); set_src_0(pc, src0, inst); - set_src_2(pc, src1, inst); + if (is_long(inst)) + set_src_2(pc, src1, inst); + else + set_src_1(pc, src1, inst); + + emit(pc, inst); +} + +static void +emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] |= 0xb0000000; + inst[1] |= (sub << 29); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); emit(pc, inst); } @@ -499,9 +518,11 @@ emit_flop(struct nv50_pc *pc, unsigned sub, { unsigned inst[2] = { 0, 0 }; - set_long(pc, inst); inst[0] |= 0x90000000; - inst[1] |= (sub << 29); + if (sub) { + set_long(pc, inst); + inst[1] |= (sub << 29); + } set_dst(pc, dst, inst); set_src_0(pc, src, inst); @@ -573,6 +594,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (c = 0; c < 4; c++) emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); break; + case TGSI_OPCODE_MAX: + for (c = 0; c < 4; c++) + emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + break; + case TGSI_OPCODE_MIN: + for (c = 0; c < 4; c++) + emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + break; case TGSI_OPCODE_MOV: for (c = 0; c < 4; c++) emit_mov(pc, dst[c], src[0][c]); @@ -604,6 +633,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return FALSE; } + kill_temp_temp(pc); return TRUE; } -- cgit v1.2.3 From 7df7f7bb99441ed8e2fba2840e0459e72691f272 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 5 Jun 2008 18:39:00 +1000 Subject: nv50: big fuckup, fix it --- src/gallium/drivers/nv50/nv50_program.c | 75 ++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index afc911cd265..a293db6f723 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -556,20 +556,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ADD: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_add(pc, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_COS: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 5, dst[c], src[0][c]); + } break; case TGSI_OPCODE_DP3: temp = alloc_temp(pc, NULL); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mov(pc, dst[c], temp); + } free_temp(pc, temp); break; case TGSI_OPCODE_DP4: @@ -578,53 +587,89 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); emit_mad(pc, temp, src[0][3], src[1][3], temp); - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mov(pc, dst[c], temp); + } free_temp(pc, temp); break; case TGSI_OPCODE_EX2: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 6, dst[c], src[0][c]); + } break; case TGSI_OPCODE_LG2: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 3, dst[c], src[0][c]); + } break; case TGSI_OPCODE_MAD: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } break; case TGSI_OPCODE_MAX: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_MIN: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_MOV: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mov(pc, dst[c], src[0][c]); + } break; case TGSI_OPCODE_MUL: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_mul(pc, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_RCP: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 0, dst[c], src[0][c]); + } break; case TGSI_OPCODE_RSQ: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 2, dst[c], src[0][c]); + } break; case TGSI_OPCODE_SIN: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_flop(pc, 4, dst[c], src[0][c]); + } break; case TGSI_OPCODE_SUB: - for (c = 0; c < 4; c++) + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; emit_sub(pc, dst[c], src[0][c], src[1][c]); + } break; case TGSI_OPCODE_END: break; -- cgit v1.2.3 From fbf4027dd9b279ec159906dcad134f71e34aaec8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 5 Jun 2008 18:52:16 +1000 Subject: nv50: fix EX2.. somehow --- src/gallium/drivers/nv50/nv50_program.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index a293db6f723..6d3d8ff302b 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -595,11 +595,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) free_temp(pc, temp); break; case TGSI_OPCODE_EX2: + temp = alloc_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 6, dst[c], src[0][c]); + { + unsigned inst[2] = { 0, 0 }; + inst[0] |= 0xb0000000; + set_dst(pc, temp, inst); + set_src_0(pc, src[0][c], inst); + set_long(pc, inst); + inst[1] |= (6 << 29) | 0x00004000; + emit(pc, inst); + } + emit_flop(pc, 6, dst[c], temp); } + free_temp(pc, temp); break; case TGSI_OPCODE_LG2: for (c = 0; c < 4; c++) { -- cgit v1.2.3 From 21e18e2b74d71c93af06ef4c603ca371c4614237 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 5 Jun 2008 23:48:23 +1000 Subject: nv50: DPH, XPD, some TODOs --- src/gallium/drivers/nv50/nv50_program.c | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 6d3d8ff302b..91f24369f14 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -12,6 +12,25 @@ #define NV50_SU_MAX_TEMP 64 +/* ABS + * ARL + * DST - const(1.0) + * FLR + * FRC + * LIT + * POW + * SGE - cvt + * SLT - cvt + * SWZ + * + * MSB - Like MAD, but MUL+SUB + * - Fuck it off, introduce a way to negate args for ops that + * support it. + * + * Need ability to specifiy driver IMMD values, like nv40 constant() + * + * Look into inlining IMMD for ops other than MOV + */ struct nv50_reg { enum { P_TEMP, @@ -512,6 +531,25 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, emit(pc, inst); } +static void +emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xe0000000; + set_long(pc, inst); + inst[1] |= 0x08000000; /* src0 * src1 - src2 */ + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); + set_src_2(pc, src2, inst); + + emit(pc, inst); +} + static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) @@ -594,6 +632,19 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_DPH: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_add(pc, temp, src[1][3], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; case TGSI_OPCODE_EX2: temp = alloc_temp(pc, NULL); for (c = 0; c < 4; c++) { @@ -682,6 +733,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_sub(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_XPD: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][2], src[1][1]); + emit_msb(pc, dst[0], src[0][1], src[1][2], temp); + emit_mul(pc, temp, src[0][0], src[1][2]); + emit_msb(pc, dst[1], src[0][2], src[1][0], temp); + emit_mul(pc, temp, src[0][1], src[1][0]); + emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + free_temp(pc, temp); + break; case TGSI_OPCODE_END: break; default: -- cgit v1.2.3 From d69f33423087fc054181c60724f4bcbe29195e08 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 6 Jun 2008 13:57:59 +1000 Subject: nv50: small cleanup --- src/gallium/drivers/nv50/nv50_program.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 91f24369f14..7295c2314cf 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -568,6 +568,21 @@ emit_flop(struct nv50_pc *pc, unsigned sub, emit(pc, inst); } +static void +emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xb0000000; + + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + set_long(pc, inst); + inst[1] |= (6 << 29) | 0x00004000; + + emit(pc, inst); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { @@ -650,15 +665,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - { - unsigned inst[2] = { 0, 0 }; - inst[0] |= 0xb0000000; - set_dst(pc, temp, inst); - set_src_0(pc, src[0][c], inst); - set_long(pc, inst); - inst[1] |= (6 << 29) | 0x00004000; - emit(pc, inst); - } + emit_preex2(pc, temp, src[0][c]); emit_flop(pc, 6, dst[c], temp); } free_temp(pc, temp); @@ -1081,11 +1088,9 @@ nv50_vertprog_validate(struct nv50_context *nv50) void nv50_fragprog_validate(struct nv50_context *nv50) { - struct pipe_winsys *ws = nv50->pipe.winsys; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->fragprog; struct nouveau_stateobj *so; - void *map; if (!p->translated) { nv50_program_validate(nv50, p); -- cgit v1.2.3 From bdd31c20abb27665ca701a5a46e29d4cfa71f679 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 11:41:47 +1000 Subject: nv50: SGE/SLT --- src/gallium/drivers/nv50/nv50_program.c | 82 ++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7295c2314cf..fa16c7522e1 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -19,8 +19,6 @@ * FRC * LIT * POW - * SGE - cvt - * SLT - cvt * SWZ * * MSB - Like MAD, but MUL+SUB @@ -201,15 +199,15 @@ emit(struct nv50_pc *pc, unsigned *inst) { struct nv50_program *p = pc->p; - if (inst[0] & 1) { - p->insns_nr += 2; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); - } else { - p->insns_nr += 1; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); - } + if (inst[0] & 1) { + p->insns_nr += 2; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); + } else { + p->insns_nr += 1; + p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); + memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); + } } static INLINE void set_long(struct nv50_pc *, unsigned *); @@ -582,6 +580,54 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +/*XXX: inaccurate results.. why? */ +#define ALLOW_SET_SWAP 0 + +static void +emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + unsigned inst[2] = { 0, 0 }; +#if ALLOW_SET_SWAP + unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; +#endif + struct nv50_reg *rdst; + +#if ALLOW_SET_SWAP + assert(c_op <= 7); + if (check_swap_src_0_1(pc, &src0, &src1)) + c_op = inv_cop[c_op]; +#endif + + rdst = dst; + if (dst->type != P_TEMP) + dst = alloc_temp(pc, NULL); + + /* set.u32 */ + set_long(pc, inst); + inst[0] |= 0xb0000000; + inst[1] |= (3 << 29); + inst[1] |= (c_op << 14); + /*XXX: breaks things, .u32 by default? + * decuda will disasm as .u16 and use .lo/.hi regs, but this + * doesn't seem to match what the hw actually does. + inst[1] |= 0x04000000; << breaks things.. .u32 by default? + */ + set_dst(pc, dst, inst); + set_src_0(pc, src0, inst); + set_src_1(pc, src1, inst); + emit(pc, inst); + + /* cvt.f32.u32 */ + inst[0] = 0xa0000001; + inst[1] = 0x64014780; + set_dst(pc, rdst, inst); + set_src_0(pc, dst, inst); + emit(pc, inst); + + if (dst != rdst) + free_temp(pc, dst); +} static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) @@ -726,6 +772,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_flop(pc, 2, dst[c], src[0][c]); } break; + case TGSI_OPCODE_SGE: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 6, dst[c], src[0][c], src[1][c]); + } + break; case TGSI_OPCODE_SIN: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -733,6 +786,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_flop(pc, 4, dst[c], src[0][c]); } break; + case TGSI_OPCODE_SLT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + } + break; case TGSI_OPCODE_SUB: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From b4f7463585071236d633e4c857dbbdf67b03dc94 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 12:55:06 +1000 Subject: nv50: FLR/FRC --- src/gallium/drivers/nv50/nv50_program.c | 37 +++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index fa16c7522e1..448062e767f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -15,8 +15,6 @@ /* ABS * ARL * DST - const(1.0) - * FLR - * FRC * LIT * POW * SWZ @@ -629,6 +627,24 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, free_temp(pc, dst); } +static void +emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] = 0xa0000000; /* cvt */ + inst[1] |= (6 << 29); /* cvt */ + inst[1] |= 0x08000000; /* integer mode */ + inst[1] |= 0x04000000; /* 32 bit */ + inst[1] |= ((0x1 << 3)) << 14; /* .rn */ + inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + + emit(pc, inst); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { @@ -716,6 +732,23 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_FLR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_FRC: + temp = alloc_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, temp, src[0][c]); + emit_sub(pc, dst[c], src[0][c], temp); + } + free_temp(pc, temp); + break; case TGSI_OPCODE_LG2: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From 68091b0c89310c309b668c9d6d80640dc6040ab7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 13:01:58 +1000 Subject: nv50: ABS --- src/gallium/drivers/nv50/nv50_program.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 448062e767f..7c2177d42db 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -12,8 +12,7 @@ #define NV50_SU_MAX_TEMP 64 -/* ABS - * ARL +/* ARL * DST - const(1.0) * LIT * POW @@ -670,6 +669,21 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + for (c = 0; c < 4; c++) { + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] = 0xa0000000; /* cvt */ + inst[1] |= (6 << 29); /* cvt */ + inst[1] |= 0x04000000; /* 32 bit */ + inst[1] |= (1 << 14); /* src .f32 */ + inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst[c], inst); + set_src_0(pc, src[0][c], inst); + emit(pc, inst); + } + break; case TGSI_OPCODE_ADD: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From 33e4d30d50344be26398a51365bea1be37487403 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 14:10:48 +1000 Subject: nv50: DST --- src/gallium/drivers/nv50/nv50_program.c | 61 ++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7c2177d42db..49a731842f7 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -13,18 +13,18 @@ #define NV50_SU_MAX_TEMP 64 /* ARL - * DST - const(1.0) - * LIT + * LIT - other buggery * POW - * SWZ + * SWZ - negation ARGH + * SAT * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that * support it. * - * Need ability to specifiy driver IMMD values, like nv40 constant() - * * Look into inlining IMMD for ops other than MOV + * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, + * but can emit to P_TEMP first - then MOV later. NVIDIA does this */ struct nv50_reg { enum { @@ -145,6 +145,32 @@ kill_temp_temp(struct nv50_pc *pc) pc->temp_temp_nr = 0; } +static int +ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) +{ + pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * + sizeof(float)); + pc->immd_buf[(pc->immd_nr * 4) + 0] = x; + pc->immd_buf[(pc->immd_nr * 4) + 1] = x; + pc->immd_buf[(pc->immd_nr * 4) + 2] = x; + pc->immd_buf[(pc->immd_nr * 4) + 3] = x; + + return pc->immd_nr++; +} + +static struct nv50_reg * +alloc_immd(struct nv50_pc *pc, float f) +{ + struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + unsigned hw; + + hw = ctor_immd(pc, f, 0, 0, 0); + r->type = P_IMMD; + r->hw = hw; + r->index = -1; + return r; +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -736,6 +762,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_DST: + { + struct nv50_reg *one = alloc_immd(pc, 1.0); + emit_mov(pc, dst[0], one); + emit_mul(pc, dst[1], src[0][1], src[1][1]); + emit_mov(pc, dst[2], src[0][2]); + emit_mov(pc, dst[3], src[1][3]); + FREE(one); + } + break; case TGSI_OPCODE_EX2: temp = alloc_temp(pc, NULL); for (c = 0; c < 4; c++) { @@ -886,17 +922,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) const struct tgsi_full_immediate *imm = &p.FullToken.FullImmediate; - pc->immd_nr++; - pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr * - sizeof(float)); - pc->immd_buf[4 * (pc->immd_nr - 1) + 0] = - imm->u.ImmediateFloat32[0].Float; - pc->immd_buf[4 * (pc->immd_nr - 1) + 1] = - imm->u.ImmediateFloat32[1].Float; - pc->immd_buf[4 * (pc->immd_nr - 1) + 2] = - imm->u.ImmediateFloat32[2].Float; - pc->immd_buf[4 * (pc->immd_nr - 1) + 3] = - imm->u.ImmediateFloat32[3].Float; + ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); } break; case TGSI_TOKEN_TYPE_DECLARATION: -- cgit v1.2.3 From 34abb858e2aaef2c1a066a7cdb3e0376d6c9f6bd Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 15:21:05 +1000 Subject: nv50: handle 0/1 SWZ --- src/gallium/drivers/nv50/nv50_program.c | 56 +++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 49a731842f7..cad1d9d6795 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -15,7 +15,6 @@ /* ARL * LIT - other buggery * POW - * SWZ - negation ARGH * SAT * * MSB - Like MAD, but MUL+SUB @@ -189,32 +188,55 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) } static struct nv50_reg * -tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) { - /* Handle swizzling */ + struct nv50_reg *r = NULL; + unsigned c; + + c = tgsi_util_get_full_src_register_extswizzle(src, chan); switch (c) { - case 0: c = src->SrcRegister.SwizzleX; break; - case 1: c = src->SrcRegister.SwizzleY; break; - case 2: c = src->SrcRegister.SwizzleZ; break; - case 3: c = src->SrcRegister.SwizzleW; break; + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + r = &pc->attr[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_TEMPORARY: + r = &pc->temp[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_CONSTANT: + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_IMMEDIATE: + r = &pc->immd[src->SrcRegister.Index * 4 + c]; + break; + default: + assert(0); + break; + } + break; + case TGSI_EXTSWIZZLE_ZERO: + r = alloc_immd(pc, 0.0); + break; + case TGSI_EXTSWIZZLE_ONE: + r = alloc_immd(pc, 1.0); + break; default: assert(0); + break; } - switch (src->SrcRegister.File) { - case TGSI_FILE_INPUT: - return &pc->attr[src->SrcRegister.Index * 4 + c]; - case TGSI_FILE_TEMPORARY: - return &pc->temp[src->SrcRegister.Index * 4 + c]; - case TGSI_FILE_CONSTANT: - return &pc->param[src->SrcRegister.Index * 4 + c]; - case TGSI_FILE_IMMEDIATE: - return &pc->immd[src->SrcRegister.Index * 4 + c]; + switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + case TGSI_UTIL_SIGN_KEEP: + break; default: + assert(0); break; } - return NULL; + return r; } static void -- cgit v1.2.3 From 688064236ba8b5997014493eb6c6e3fe0739813e Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 16:01:06 +1000 Subject: nv50: fixes + untested _SAT modifier --- src/gallium/drivers/nv50/nv50_program.c | 36 ++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index cad1d9d6795..9fcd7c36c61 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -15,7 +15,6 @@ /* ARL * LIT - other buggery * POW - * SAT * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that @@ -679,8 +678,8 @@ emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { unsigned inst[2] = { 0, 0 }; - set_long(pc, inst); inst[0] = 0xa0000000; /* cvt */ + set_long(pc, inst); inst[1] |= (6 << 29); /* cvt */ inst[1] |= 0x08000000; /* integer mode */ inst[1] |= 0x04000000; /* 32 bit */ @@ -696,13 +695,14 @@ static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; - struct nv50_reg *dst[4], *src[3][4], *temp; - unsigned mask; + struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; + unsigned mask, sat; int i, c; NOUVEAU_ERR("insn %p\n", tok); mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; for (c = 0; c < 4; c++) { if (mask & (1 << c)) @@ -716,13 +716,20 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); } + if (sat) { + for (c = 0; c < 4; c++) { + rdst[c] = dst[c]; + dst[c] = temp_temp(pc); + } + } + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: for (c = 0; c < 4; c++) { unsigned inst[2] = { 0, 0 }; - set_long(pc, inst); inst[0] = 0xa0000000; /* cvt */ + set_long(pc, inst); inst[1] |= (6 << 29); /* cvt */ inst[1] |= 0x04000000; /* 32 bit */ inst[1] |= (1 << 14); /* src .f32 */ @@ -922,6 +929,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return FALSE; } + if (sat) { + for (c = 0; c < 4; c++) { + unsigned inst[2] = { 0, 0 }; + + if (!(mask & (1 << c))) + continue; + + inst[0] = 0xa0000000; /* cvt */ + set_long(pc, inst); + inst[1] |= (6 << 29); /* cvt */ + inst[1] |= 0x04000000; /* 32 bit */ + inst[1] |= (1 << 14); /* src .f32 */ + inst[1] |= ((1 << 5) << 14); /* .sat */ + set_dst(pc, rdst[c], inst); + set_src_0(pc, dst[c], inst); + emit(pc, inst); + } + } + kill_temp_temp(pc); return TRUE; } -- cgit v1.2.3 From ea4b09cbcbd9db82648ab30f18c0f46a66ab9f69 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 16:08:36 +1000 Subject: nv50: POW! --- src/gallium/drivers/nv50/nv50_program.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 9fcd7c36c61..79893a9ad4f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -14,15 +14,17 @@ /* ARL * LIT - other buggery - * POW * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that * support it. * - * Look into inlining IMMD for ops other than MOV + * Look into inlining IMMD for ops other than MOV (make it general?) * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, * but can emit to P_TEMP first - then MOV later. NVIDIA does this + * + * Verify half-insns work where expected - and force disable them where they + * don't work - MUL has it forcibly disabled atm as it fixes POW.. */ struct nv50_reg { enum { @@ -489,6 +491,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, unsigned inst[2] = { 0, 0 }; inst[0] |= 0xc0000000; + set_long(pc, inst); check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, inst); @@ -870,6 +873,19 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mul(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_POW: + temp = alloc_temp(pc, NULL); + emit_flop(pc, 3, temp, src[0][0]); + emit_mul(pc, temp, temp, src[1][0]); + emit_preex2(pc, temp, temp); + emit_flop(pc, 6, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; case TGSI_OPCODE_RCP: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From 686bc00c05094e8678747c111a6a70ad4b7063e3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 16:51:07 +1000 Subject: nv50: oops, copy+pasto --- src/gallium/drivers/nv50/nv50_program.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 79893a9ad4f..b013435f990 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -151,9 +151,9 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * sizeof(float)); pc->immd_buf[(pc->immd_nr * 4) + 0] = x; - pc->immd_buf[(pc->immd_nr * 4) + 1] = x; - pc->immd_buf[(pc->immd_nr * 4) + 2] = x; - pc->immd_buf[(pc->immd_nr * 4) + 3] = x; + pc->immd_buf[(pc->immd_nr * 4) + 1] = y; + pc->immd_buf[(pc->immd_nr * 4) + 2] = z; + pc->immd_buf[(pc->immd_nr * 4) + 3] = w; return pc->immd_nr++; } -- cgit v1.2.3 From faa1c02546db00f69c66db18076b5b0ac86d7138 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 17:36:10 +1000 Subject: nv50: create emit_pow() - emit_lit() will need to use it --- src/gallium/drivers/nv50/nv50_program.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b013435f990..bc0a834aee6 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -627,6 +627,7 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } + /*XXX: inaccurate results.. why? */ #define ALLOW_SET_SWAP 0 @@ -694,6 +695,20 @@ emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +static void +emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *v, struct nv50_reg *e) +{ + struct nv50_reg *temp = alloc_temp(pc, NULL); + + emit_flop(pc, 3, temp, v); + emit_mul(pc, temp, temp, e); + emit_preex2(pc, temp, temp); + emit_flop(pc, 6, dst, temp); + + free_temp(pc, temp); +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { @@ -875,10 +890,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_POW: temp = alloc_temp(pc, NULL); - emit_flop(pc, 3, temp, src[0][0]); - emit_mul(pc, temp, temp, src[1][0]); - emit_preex2(pc, temp, temp); - emit_flop(pc, 6, temp, temp); + emit_pow(pc, temp, src[0][0], src[1][0]); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; -- cgit v1.2.3 From fe90cc509f75772ce202930c934bade1d4b116c8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 17:42:00 +1000 Subject: nv50: obey per-source abs (TGSI_UTIL_SIGN_CLEAR) --- src/gallium/drivers/nv50/nv50_program.c | 175 +++++++++++++++++--------------- 1 file changed, 95 insertions(+), 80 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bc0a834aee6..fabd0164914 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -171,75 +171,6 @@ alloc_immd(struct nv50_pc *pc, float f) return r; } -static struct nv50_reg * -tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) -{ - switch (dst->DstRegister.File) { - case TGSI_FILE_TEMPORARY: - return &pc->temp[dst->DstRegister.Index * 4 + c]; - case TGSI_FILE_OUTPUT: - return &pc->result[dst->DstRegister.Index * 4 + c]; - case TGSI_FILE_NULL: - return NULL; - default: - break; - } - - return NULL; -} - -static struct nv50_reg * -tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) -{ - struct nv50_reg *r = NULL; - unsigned c; - - c = tgsi_util_get_full_src_register_extswizzle(src, chan); - switch (c) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch (src->SrcRegister.File) { - case TGSI_FILE_INPUT: - r = &pc->attr[src->SrcRegister.Index * 4 + c]; - break; - case TGSI_FILE_TEMPORARY: - r = &pc->temp[src->SrcRegister.Index * 4 + c]; - break; - case TGSI_FILE_CONSTANT: - r = &pc->param[src->SrcRegister.Index * 4 + c]; - break; - case TGSI_FILE_IMMEDIATE: - r = &pc->immd[src->SrcRegister.Index * 4 + c]; - break; - default: - assert(0); - break; - } - break; - case TGSI_EXTSWIZZLE_ZERO: - r = alloc_immd(pc, 0.0); - break; - case TGSI_EXTSWIZZLE_ONE: - r = alloc_immd(pc, 1.0); - break; - default: - assert(0); - break; - } - - switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { - case TGSI_UTIL_SIGN_KEEP: - break; - default: - assert(0); - break; - } - - return r; -} - static void emit(struct nv50_pc *pc, unsigned *inst) { @@ -709,6 +640,98 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, free_temp(pc, temp); } +static void +emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] = 0xa0000000; /* cvt */ + set_long(pc, inst); + inst[1] |= (6 << 29); /* cvt */ + inst[1] |= 0x04000000; /* 32 bit */ + inst[1] |= (1 << 14); /* src .f32 */ + inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + + emit(pc, inst); +} + +static struct nv50_reg * +tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) +{ + switch (dst->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + return &pc->temp[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_OUTPUT: + return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_NULL: + return NULL; + default: + break; + } + + return NULL; +} + +static struct nv50_reg * +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) +{ + struct nv50_reg *r = NULL; + struct nv50_reg *temp; + unsigned c; + + c = tgsi_util_get_full_src_register_extswizzle(src, chan); + switch (c) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + r = &pc->attr[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_TEMPORARY: + r = &pc->temp[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_CONSTANT: + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_IMMEDIATE: + r = &pc->immd[src->SrcRegister.Index * 4 + c]; + break; + default: + assert(0); + break; + } + break; + case TGSI_EXTSWIZZLE_ZERO: + r = alloc_immd(pc, 0.0); + break; + case TGSI_EXTSWIZZLE_ONE: + r = alloc_immd(pc, 1.0); + break; + default: + assert(0); + break; + } + + switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + case TGSI_UTIL_SIGN_KEEP: + break; + case TGSI_UTIL_SIGN_CLEAR: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + r = temp; + break; + default: + assert(0); + break; + } + + return r; +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { @@ -744,17 +767,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: for (c = 0; c < 4; c++) { - unsigned inst[2] = { 0, 0 }; - - inst[0] = 0xa0000000; /* cvt */ - set_long(pc, inst); - inst[1] |= (6 << 29); /* cvt */ - inst[1] |= 0x04000000; /* 32 bit */ - inst[1] |= (1 << 14); /* src .f32 */ - inst[1] |= ((1 << 6) << 14); /* .abs */ - set_dst(pc, dst[c], inst); - set_src_0(pc, src[0][c], inst); - emit(pc, inst); + if (!(mask & (1 << c))) + continue; + emit_abs(pc, dst[c], src[0][c]); } break; case TGSI_OPCODE_ADD: -- cgit v1.2.3 From 01e36eb531dfb4b1b3fd38d3fc00c6770833b5ea Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sat, 7 Jun 2008 19:54:04 +1000 Subject: nv50: LIT - sort-of *somehow* we have the exact same bug here as on nv4x, the difference being on nv4x the hw actually has a LIT opcode.. NVIDIA doesn't have the bug on either arch FWIW. --- src/gallium/drivers/nv50/nv50_program.c | 36 +++++++++++++++++++++++++++++++-- src/gallium/drivers/nv50/nv50_vbo.c | 1 - 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index fabd0164914..186eea5e13b 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -13,7 +13,6 @@ #define NV50_SU_MAX_TEMP 64 /* ARL - * LIT - other buggery * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that @@ -23,6 +22,8 @@ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, * but can emit to P_TEMP first - then MOV later. NVIDIA does this * + * Hmmm.. what happens if we have src1+src2 both consts.. ouch ! + * * Verify half-insns work where expected - and force disable them where they * don't work - MUL has it forcibly disabled atm as it fixes POW.. */ @@ -164,7 +165,7 @@ alloc_immd(struct nv50_pc *pc, float f) struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); unsigned hw; - hw = ctor_immd(pc, f, 0, 0, 0); + hw = ctor_immd(pc, f, 0, 0, 0) * 4; r->type = P_IMMD; r->hw = hw; r->index = -1; @@ -657,6 +658,34 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +static void +emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) +{ + struct nv50_reg *one = alloc_immd(pc, 1.0); + struct nv50_reg *zero = alloc_immd(pc, 0.0); + struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); + struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); + struct nv50_reg *tmp[4]; + + emit_mov(pc, dst[0], one); + emit_mov(pc, dst[3], one); + + tmp[0] = temp_temp(pc); + emit_minmax(pc, 4, dst[1], src[0], zero); + set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); + + tmp[1] = temp_temp(pc); + emit_minmax(pc, 4, tmp[1], src[1], zero); + + tmp[3] = temp_temp(pc); + emit_minmax(pc, 4, tmp[3], src[3], neg128); + emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + + emit_pow(pc, dst[2], tmp[1], tmp[3]); + emit_mov(pc, dst[2], zero); + set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -861,6 +890,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } free_temp(pc, temp); break; + case TGSI_OPCODE_LIT: + emit_lit(pc, &dst[0], &src[0][0]); + break; case TGSI_OPCODE_LG2: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f086c762582..badbef53df5 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -35,7 +35,6 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, struct nv50_context *nv50 = nv50_context(pipe); nv50_state_validate(nv50); - NOUVEAU_ERR("unimplemented\n"); BEGIN_RING(tesla, 0x142c, 1); OUT_RING (0); -- cgit v1.2.3 From 9a37a56c8ab8c64bdadb1e1e807f885d6a5e3121 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 8 Jun 2008 11:23:06 +1000 Subject: nv50: obey writemask in a couple of places --- src/gallium/drivers/nv50/nv50_program.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 186eea5e13b..7206e5d280a 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -12,7 +12,7 @@ #define NV50_SU_MAX_TEMP 64 -/* ARL +/* ARL - gallium craps itself on progs/vp/arl.txt * * MSB - Like MAD, but MUL+SUB * - Fuck it off, introduce a way to negate args for ops that @@ -856,10 +856,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) case TGSI_OPCODE_DST: { struct nv50_reg *one = alloc_immd(pc, 1.0); - emit_mov(pc, dst[0], one); - emit_mul(pc, dst[1], src[0][1], src[1][1]); - emit_mov(pc, dst[2], src[0][2]); - emit_mov(pc, dst[3], src[1][3]); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + if (mask & (1 << 1)) + emit_mul(pc, dst[1], src[0][1], src[1][1]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], src[0][2]); + if (mask & (1 << 3)) + emit_mov(pc, dst[3], src[1][3]); FREE(one); } break; @@ -891,6 +895,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) free_temp(pc, temp); break; case TGSI_OPCODE_LIT: + /*XXX: writemask */ emit_lit(pc, &dst[0], &src[0][0]); break; case TGSI_OPCODE_LG2: @@ -989,12 +994,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_XPD: temp = alloc_temp(pc, NULL); - emit_mul(pc, temp, src[0][2], src[1][1]); - emit_msb(pc, dst[0], src[0][1], src[1][2], temp); - emit_mul(pc, temp, src[0][0], src[1][2]); - emit_msb(pc, dst[1], src[0][2], src[1][0], temp); - emit_mul(pc, temp, src[0][1], src[1][0]); - emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + if (mask & (1 << 0)) { + emit_mul(pc, temp, src[0][2], src[1][1]); + emit_msb(pc, dst[0], src[0][1], src[1][2], temp); + } + if (mask & (1 << 1)) { + emit_mul(pc, temp, src[0][0], src[1][2]); + emit_msb(pc, dst[1], src[0][2], src[1][0], temp); + } + if (mask & (1 << 2)) { + emit_mul(pc, temp, src[0][1], src[1][0]); + emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + } free_temp(pc, temp); break; case TGSI_OPCODE_END: -- cgit v1.2.3 From 6ca31aa55eada38ad8b8a249b9e79a83b9784c04 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 8 Jun 2008 12:27:36 +1000 Subject: nv50: hehe, damage from tex-surfaces.. surface_fill() reenabled now :) --- src/gallium/drivers/nv50/nv50_miptree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 6c838998fd0..be85c3fd5ce 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -21,7 +21,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { struct pipe_winsys *ws = pscreen->winsys; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); - + NOUVEAU_ERR("unimplemented\n"); mt->base = *pt; @@ -29,7 +29,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.screen = pscreen; mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, - 512*32*4); + pt->width[0] * pt->cpp * pt->height[0]); if (!mt->buffer) { FREE(mt); return NULL; -- cgit v1.2.3 From afcaeaa0e4dc3ced40621c76304a2c0c5a3ab403 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 8 Jun 2008 14:12:01 +1000 Subject: nv50: note a critical bug --- src/gallium/drivers/nv50/nv50_program.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7206e5d280a..b151b540bf4 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -26,6 +26,9 @@ * * Verify half-insns work where expected - and force disable them where they * don't work - MUL has it forcibly disabled atm as it fixes POW.. + * + * FUCK! watch dst==src vectors, can overwrite components that are needed. + * ie. SUB R0, R0.yzxw, R0 */ struct nv50_reg { enum { -- cgit v1.2.3 From 34a039ae7b158cacb5b20d91067e9d6458d30a56 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 8 Jun 2008 15:51:54 +1000 Subject: nv50: fix src1 & src2 == const --- src/gallium/drivers/nv50/nv50_program.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b151b540bf4..936a8ef9b35 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -22,8 +22,6 @@ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, * but can emit to P_TEMP first - then MOV later. NVIDIA does this * - * Hmmm.. what happens if we have src1+src2 both consts.. ouch ! - * * Verify half-insns work where expected - and force disable them where they * don't work - MUL has it forcibly disabled atm as it fixes POW.. * @@ -391,8 +389,16 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - set_cseg(pc, src, inst); - inst[0] |= 0x00800000; + assert(!(inst[0] & 0x00800000)); + if (inst[0] & 0x01000000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_cseg(pc, src, inst); + inst[0] |= 0x00800000; + } } alloc_reg(pc, src); @@ -411,8 +417,16 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - set_cseg(pc, src, inst); - inst[0] |= 0x01000000; + assert(!(inst[0] & 0x01000000)); + if (inst[0] & 0x00800000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_cseg(pc, src, inst); + inst[0] |= 0x01000000; + } } alloc_reg(pc, src); -- cgit v1.2.3 From 776e9581d16fc0fd28058fbcd879756fd5d40b96 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 13:07:38 +1000 Subject: nv50: delayed write of fragprog result regs until end of program --- src/gallium/drivers/nv50/nv50_program.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 936a8ef9b35..1733cf97b76 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -27,6 +27,8 @@ * * FUCK! watch dst==src vectors, can overwrite components that are needed. * ie. SUB R0, R0.yzxw, R0 + * + * NV50_PROG* -> PIPE_SHADER* */ struct nv50_reg { enum { @@ -1198,11 +1200,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->result_nr; i++) { for (c = 0; c < 4; c++) { - if (pc->p->type == NV50_PROG_FRAGMENT) + if (pc->p->type == NV50_PROG_FRAGMENT) { pc->result[i*4+c].type = P_TEMP; - else + pc->result[i*4+c].hw = -1; + } else { pc->result[i*4+c].type = P_RESULT; - pc->result[i*4+c].hw = rid++; + pc->result[i*4+c].hw = rid++; + } pc->result[i*4+c].index = i; } } @@ -1281,6 +1285,14 @@ nv50_program_tx(struct nv50_program *p) } } + if (p->type == NV50_PROG_FRAGMENT) { + struct nv50_reg out; + + out.type = P_TEMP; + for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) + emit_mov(pc, &out, &pc->result[out.hw]); + } + p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; -- cgit v1.2.3 From b5bbf09c42a9d563984fad875ced5c4814033a3d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 13:09:55 +1000 Subject: nv50: remove NV50_PROG_{VERTEX,FRAGMENT} --- src/gallium/drivers/nv50/nv50_program.c | 19 +++++++++++-------- src/gallium/drivers/nv50/nv50_state.c | 4 ++-- src/gallium/drivers/nv50/nv50_state.h | 5 +---- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 1733cf97b76..6b8ba1bdfe5 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -28,7 +28,10 @@ * FUCK! watch dst==src vectors, can overwrite components that are needed. * ie. SUB R0, R0.yzxw, R0 * - * NV50_PROG* -> PIPE_SHADER* + * Things to check with renouveau: + * SGE/SLT with needed src0/1 swap + * FP attr/result assignment - how? + * FP/VP constbuf usage */ struct nv50_reg { enum { @@ -289,7 +292,7 @@ set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) if (src->type == P_IMMD) { inst[1] |= (NV50_CB_PMISC << 22); } else { - if (pc->p->type == NV50_PROG_VERTEX) + if (pc->p->type == PIPE_SHADER_VERTEX) inst[1] |= (NV50_CB_PVP << 22); else inst[1] |= (NV50_CB_PFP << 22); @@ -1144,7 +1147,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (!pc->attr) goto out_err; - if (pc->p->type == NV50_PROG_FRAGMENT) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { iv = alloc_temp(pc, NULL); aid++; } @@ -1153,7 +1156,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct nv50_reg *a = &pc->attr[i*4]; for (c = 0; c < 4; c++) { - if (pc->p->type == NV50_PROG_FRAGMENT) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { struct nv50_reg *at = alloc_temp(pc, NULL); pc->attr[i*4+c].type = at->type; @@ -1168,7 +1171,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - if (pc->p->type != NV50_PROG_FRAGMENT) + if (pc->p->type != PIPE_SHADER_FRAGMENT) continue; emit_interp(pc, iv, iv, iv, FALSE); @@ -1200,7 +1203,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (i = 0; i < pc->result_nr; i++) { for (c = 0; c < 4; c++) { - if (pc->p->type == NV50_PROG_FRAGMENT) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { pc->result[i*4+c].type = P_TEMP; pc->result[i*4+c].hw = -1; } else { @@ -1285,7 +1288,7 @@ nv50_program_tx(struct nv50_program *p) } } - if (p->type == NV50_PROG_FRAGMENT) { + if (p->type == PIPE_SHADER_FRAGMENT) { struct nv50_reg out; out.type = P_TEMP; @@ -1314,7 +1317,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) * NOT immd - otherwise it's fucked fucked fucked */ p->insns[p->insns_nr - 1] |= 0x00000001; - if (p->type == NV50_PROG_VERTEX) { + if (p->type == PIPE_SHADER_VERTEX) { for (i = 0; i < p->insns_nr; i++) NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); } else { diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 48d09c296bb..774117326ad 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -331,7 +331,7 @@ nv50_vp_state_create(struct pipe_context *pipe, struct nv50_program *p = CALLOC_STRUCT(nv50_program); p->pipe = *cso; - p->type = NV50_PROG_VERTEX; + p->type = PIPE_SHADER_VERTEX; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; } @@ -361,7 +361,7 @@ nv50_fp_state_create(struct pipe_context *pipe, struct nv50_program *p = CALLOC_STRUCT(nv50_program); p->pipe = *cso; - p->type = NV50_PROG_FRAGMENT; + p->type = PIPE_SHADER_FRAGMENT; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; } diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h index dd352b65856..d568bf610d5 100644 --- a/src/gallium/drivers/nv50/nv50_state.h +++ b/src/gallium/drivers/nv50/nv50_state.h @@ -15,10 +15,7 @@ struct nv50_program { struct tgsi_shader_info info; boolean translated; - enum { - NV50_PROG_VERTEX, - NV50_PROG_FRAGMENT - } type; + unsigned type; unsigned *insns; unsigned insns_nr; -- cgit v1.2.3 From 713ef6ccd2590bd866598bb6d4f646e9ec29ba78 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 13:31:42 +1000 Subject: nv50: use emit_flop() instead of building RCP manually on interp --- src/gallium/drivers/nv50/nv50_program.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 6b8ba1bdfe5..592435585d9 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1176,12 +1176,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) emit_interp(pc, iv, iv, iv, FALSE); tmp = alloc_temp(pc, NULL); - { - unsigned inst[2] = { 0, 0 }; - inst[0] = 0x90000000; - inst[0] |= (tmp->hw << 2); - emit(pc, inst); - } + emit_flop(pc, 0, tmp, iv); emit_interp(pc, &a[0], &a[0], tmp, TRUE); emit_interp(pc, &a[1], &a[1], tmp, TRUE); emit_interp(pc, &a[2], &a[2], tmp, TRUE); -- cgit v1.2.3 From 6d0f7ea95475009ee17862786469f7b9a34a797f Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 16:26:51 +1000 Subject: nv50: note some things discovered during renouveau session --- src/gallium/drivers/nv50/nv50_program.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 592435585d9..0274545131e 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -28,10 +28,29 @@ * FUCK! watch dst==src vectors, can overwrite components that are needed. * ie. SUB R0, R0.yzxw, R0 * + * MOV dst, -src + * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0) + * mov dst, tmp + * * Things to check with renouveau: - * SGE/SLT with needed src0/1 swap * FP attr/result assignment - how? - * FP/VP constbuf usage + * attrib + * - 0x16bc maps vp output onto fp hpos + * - 0x16c0 maps vp output onto fp col0 + * result + * - colr always 0-3 + * - depr always 4 + * 0x16bc->0x16e8 --> some binding between vp/fp regs + * 0x16b8 --> VP output count + * + * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 + * "MOV rcol.x, fcol.y" = 0x00000004 + * 0x19a8 --> as above but 0x00000100 and 0x00000000 + * - 0x00100000 used when KIL used + * 0x196c --> as above but 0x00000011 and 0x00000000 + * + * 0x1988 --> 0xXXNNNNNN + * - XX == FP high something */ struct nv50_reg { enum { -- cgit v1.2.3 From 31f6a24b59b0ac18e04336d2e3cbaa643358c88a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 16:35:07 +1000 Subject: nv50: support the other TGSI_UTIL_SIGN modes --- src/gallium/drivers/nv50/nv50_program.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 0274545131e..44914f41b66 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -727,6 +727,22 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); } +static void +emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + set_long(pc, inst); + inst[0] |= 0xa0000000; /* delta */ + inst[1] |= (7 << 29); /* delta */ + inst[1] |= 0x04000000; /* negate arg0? probably not */ + inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + + emit(pc, inst); +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -794,6 +810,17 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) emit_abs(pc, temp, r); r = temp; break; + case TGSI_UTIL_SIGN_TOGGLE: + temp = temp_temp(pc); + emit_neg(pc, temp, r); + r = temp; + break; + case TGSI_UTIL_SIGN_SET: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + emit_neg(pc, temp, r); + r = temp; + break; default: assert(0); break; -- cgit v1.2.3 From 51ea3aae03154046316b814053f7493bdb10c853 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 16:41:08 +1000 Subject: nv50: fix SGE/SLT when sources need swapping --- src/gallium/drivers/nv50/nv50_program.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 44914f41b66..21341619de4 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -601,24 +601,17 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } -/*XXX: inaccurate results.. why? */ -#define ALLOW_SET_SWAP 0 - static void emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { unsigned inst[2] = { 0, 0 }; -#if ALLOW_SET_SWAP - unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; -#endif + unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; struct nv50_reg *rdst; -#if ALLOW_SET_SWAP assert(c_op <= 7); if (check_swap_src_0_1(pc, &src0, &src1)) c_op = inv_cop[c_op]; -#endif rdst = dst; if (dst->type != P_TEMP) -- cgit v1.2.3 From 454394e749feca5ac00e7a270e6ca5529581d228 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 17:36:22 +1000 Subject: nv50: quick hack so progs/fp/fp-tri works for the moment --- src/gallium/drivers/nv50/nv50_context.h | 4 ++-- src/gallium/drivers/nv50/nv50_program.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index c4a8a4c064e..69c9dba675b 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -24,8 +24,8 @@ fprintf(stderr, "nouveau: "fmt, ##args); /* Constant buffer assignment */ -#define NV50_CB_PMISC 0 -#define NV50_CB_PVP 1 +#define NV50_CB_PMISC 1 +#define NV50_CB_PVP 0 #define NV50_CB_PFP 2 #define NV50_CB_PGP 3 #define NV50_CB_TIC 4 diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 21341619de4..843e0362514 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1369,7 +1369,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) for (i = 0; i < p->immd_nr; i++) { BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); + OUT_RING ((NV50_CB_PMISC << 0) | (i << 8)); OUT_RING (fui(p->immd[i])); } } -- cgit v1.2.3 From 21e688e0a3faeef18b07c4d860bd71cc6e3ddf4a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 18:01:03 +1000 Subject: nv50: LRP --- src/gallium/drivers/nv50/nv50_program.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 843e0362514..33822a1a788 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -85,7 +85,7 @@ struct nv50_pc { float *immd_buf; int immd_nr; - struct nv50_reg *temp_temp[8]; + struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; }; @@ -154,7 +154,7 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) static struct nv50_reg * temp_temp(struct nv50_pc *pc) { - if (pc->temp_temp_nr >= 8) + if (pc->temp_temp_nr >= 16) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); @@ -966,6 +966,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_flop(pc, 3, dst[c], src[0][c]); } break; + case TGSI_OPCODE_LRP: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + /*XXX: we can do better than this */ + temp = alloc_temp(pc, NULL); + emit_neg(pc, temp, src[0][c]); + emit_mad(pc, temp, temp, src[2][c], src[2][c]); + emit_mad(pc, dst[c], src[0][c], src[1][c], temp); + free_temp(pc, temp); + } + break; case TGSI_OPCODE_MAD: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) -- cgit v1.2.3 From 7b7df34781844c39998d60bbb60880d960da3fb1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 9 Jun 2008 19:50:17 +1000 Subject: nv50: various fixes + SCS --- src/gallium/drivers/nv50/nv50_program.c | 89 ++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 33822a1a788..d69eb4b86be 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -601,6 +601,21 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, inst); } +static void +emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + unsigned inst[2] = { 0, 0 }; + + inst[0] |= 0xb0000000; + + set_dst(pc, dst, inst); + set_src_0(pc, src, inst); + set_long(pc, inst); + inst[1] |= (6 << 29); + + emit(pc, inst); +} + static void emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -693,7 +708,8 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } static void -emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) +emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, + struct nv50_reg **src) { struct nv50_reg *one = alloc_immd(pc, 1.0); struct nv50_reg *zero = alloc_immd(pc, 0.0); @@ -701,23 +717,34 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); struct nv50_reg *tmp[4]; - emit_mov(pc, dst[0], one); - emit_mov(pc, dst[3], one); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + + if (mask & (1 << 3)) + emit_mov(pc, dst[3], one); + + if (mask & (3 << 1)) { + if (mask & (1 << 1)) + tmp[0] = dst[1]; + else + tmp[0] = temp_temp(pc); + emit_minmax(pc, 4, tmp[0], src[0], zero); + } - tmp[0] = temp_temp(pc); - emit_minmax(pc, 4, dst[1], src[0], zero); - set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); + if (mask & (1 << 2)) { + set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); - tmp[1] = temp_temp(pc); - emit_minmax(pc, 4, tmp[1], src[1], zero); + tmp[1] = temp_temp(pc); + emit_minmax(pc, 4, tmp[1], src[1], zero); - tmp[3] = temp_temp(pc); - emit_minmax(pc, 4, tmp[3], src[3], neg128); - emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + tmp[3] = temp_temp(pc); + emit_minmax(pc, 4, tmp[3], src[3], neg128); + emit_minmax(pc, 5, tmp[3], tmp[3], pos128); - emit_pow(pc, dst[2], tmp[1], tmp[3]); - emit_mov(pc, dst[2], zero); - set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); + emit_pow(pc, dst[2], tmp[1], tmp[3]); + emit_mov(pc, dst[2], zero); + set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); + } } static void @@ -870,10 +897,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_COS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 5, temp, temp); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 5, dst[c], src[0][c]); + emit_mov(pc, dst[c], temp); } break; case TGSI_OPCODE_DP3: @@ -930,11 +960,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_EX2: temp = alloc_temp(pc, NULL); + emit_preex2(pc, temp, src[0][0]); + emit_flop(pc, 6, temp, temp); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_preex2(pc, temp, src[0][c]); - emit_flop(pc, 6, dst[c], temp); + emit_mov(pc, dst[c], temp); } free_temp(pc, temp); break; @@ -956,14 +987,15 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) free_temp(pc, temp); break; case TGSI_OPCODE_LIT: - /*XXX: writemask */ - emit_lit(pc, &dst[0], &src[0][0]); + emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: + temp = alloc_temp(pc, NULL); + emit_flop(pc, 3, temp, src[0][0]); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 3, dst[c], src[0][c]); + emit_mov(pc, dst[c], temp); } break; case TGSI_OPCODE_LRP: @@ -1027,16 +1059,24 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 0, dst[c], src[0][c]); + emit_flop(pc, 0, dst[c], src[0][0]); } break; case TGSI_OPCODE_RSQ: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 2, dst[c], src[0][c]); + emit_flop(pc, 2, dst[c], src[0][0]); } break; + case TGSI_OPCODE_SCS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + if (mask & (1 << 0)) + emit_flop(pc, 5, dst[0], temp); + if (mask & (1 << 1)) + emit_flop(pc, 4, dst[1], temp); + break; case TGSI_OPCODE_SGE: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -1045,10 +1085,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_SIN: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 4, temp, temp); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_flop(pc, 4, dst[c], src[0][c]); + emit_mov(pc, dst[c], temp); } break; case TGSI_OPCODE_SLT: -- cgit v1.2.3 From 7e9f6e290da23d7963857241b541e00c1fcf20dc Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 11 Jun 2008 13:00:20 +1000 Subject: nv50: move some magics --- src/gallium/drivers/nv50/nv50_program.c | 16 ++++++++++++++-- src/gallium/drivers/nv50/nv50_screen.c | 9 --------- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index d69eb4b86be..934b67fb711 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1492,14 +1492,26 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(7, 2); + so = so_new(64, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, 0x198c, 1); + so_method(so, tesla, 0x1904, 4); + so_data (so, 0x01040404); /* p: 0x01000404 */ + so_data (so, 0x00000004); + so_data (so, 0x00000000); + so_data (so, 0x00000000); + so_method(so, tesla, 0x16bc, 2); + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_method(so, tesla, 0x1988, 2); + so_data (so, 0x08040404); /* p: 0x0f000401 */ so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x16ac, 2); + so_data (so, 0x00000008); /* p: 0x00000004 */ + so_data (so, 0x00000004); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ so_emit(nv50->screen->nvws, so); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 6c0810a9cfe..2417bf80013 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -225,15 +225,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 0x54); so_method(so, screen->tesla, 0x13ac, 1); so_data (so, 1); - so_method(so, screen->tesla, 0x16bc, 2); - so_data (so, 0x03020100); - so_data (so, 0x07060504); - so_method(so, screen->tesla, 0x1988, 2); - so_data (so, 0x08040404); - so_data (so, 0x00000004); - so_method(so, screen->tesla, 0x16ac, 2); - so_data (so, 8); - so_data (so, 4); so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); -- cgit v1.2.3 From bce558b37cde4be5c70117f49a2570f2988e5849 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 11 Jun 2008 13:15:23 +1000 Subject: Revert "nv50: move some magics" This reverts commit 0a38de30429d3075fc6dfc9ff3729c5ca11f0c2f. --- src/gallium/drivers/nv50/nv50_program.c | 16 ++-------------- src/gallium/drivers/nv50/nv50_screen.c | 9 +++++++++ 2 files changed, 11 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 934b67fb711..d69eb4b86be 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1492,26 +1492,14 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(64, 2); + so = so_new(7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, 0x1904, 4); - so_data (so, 0x01040404); /* p: 0x01000404 */ - so_data (so, 0x00000004); - so_data (so, 0x00000000); - so_data (so, 0x00000000); - so_method(so, tesla, 0x16bc, 2); - so_data (so, 0x03020100); - so_data (so, 0x07060504); - so_method(so, tesla, 0x1988, 2); - so_data (so, 0x08040404); /* p: 0x0f000401 */ + so_method(so, tesla, 0x198c, 1); so_data (so, p->cfg.high_temp); - so_method(so, tesla, 0x16ac, 2); - so_data (so, 0x00000008); /* p: 0x00000004 */ - so_data (so, 0x00000004); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ so_emit(nv50->screen->nvws, so); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 2417bf80013..6c0810a9cfe 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -225,6 +225,15 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 0x54); so_method(so, screen->tesla, 0x13ac, 1); so_data (so, 1); + so_method(so, screen->tesla, 0x16bc, 2); + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_method(so, screen->tesla, 0x1988, 2); + so_data (so, 0x08040404); + so_data (so, 0x00000004); + so_method(so, screen->tesla, 0x16ac, 2); + so_data (so, 8); + so_data (so, 4); so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); -- cgit v1.2.3 From 585ae74d87f3d04a4b5b7c068b865292afd1a16b Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 11 Jun 2008 13:24:32 +1000 Subject: nv50: move magics take 2 --- src/gallium/drivers/nv50/nv50_program.c | 13 +++++++++++-- src/gallium/drivers/nv50/nv50_screen.c | 9 --------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index d69eb4b86be..b73003123d8 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1492,13 +1492,22 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(7, 2); + so = so_new(64, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_method(so, tesla, 0x198c, 1); + so_method(so, tesla, 0x1904, 4); + so_data (so, 0x01040404); /* p: 0x01000404 */ + so_data (so, 0x00000004); + so_data (so, 0x00000000); + so_data (so, 0x00000000); + so_method(so, tesla, 0x16bc, 2); /*XXX: fixme */ + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_method(so, tesla, 0x1988, 2); + so_data (so, 0x08040404); /* p: 0x0f000401 */ so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 6c0810a9cfe..2417bf80013 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -225,15 +225,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 0x54); so_method(so, screen->tesla, 0x13ac, 1); so_data (so, 1); - so_method(so, screen->tesla, 0x16bc, 2); - so_data (so, 0x03020100); - so_data (so, 0x07060504); - so_method(so, screen->tesla, 0x1988, 2); - so_data (so, 0x08040404); - so_data (so, 0x00000004); - so_method(so, screen->tesla, 0x16ac, 2); - so_data (so, 8); - so_data (so, 4); so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); -- cgit v1.2.3 From 19a1e9015e4ae429ab26e56848104fa209590338 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 11 Jun 2008 14:59:19 +1000 Subject: nv50: rename nv50_state.h to nv50_program.h --- src/gallium/drivers/nv50/nv50_context.h | 2 +- src/gallium/drivers/nv50/nv50_program.c | 1 - src/gallium/drivers/nv50/nv50_program.h | 37 +++++++++++++++++++++++++++++++++ src/gallium/drivers/nv50/nv50_state.c | 1 - src/gallium/drivers/nv50/nv50_state.h | 37 --------------------------------- src/gallium/drivers/nv50/nv50_vbo.c | 1 - 6 files changed, 38 insertions(+), 41 deletions(-) create mode 100644 src/gallium/drivers/nv50/nv50_program.h delete mode 100644 src/gallium/drivers/nv50/nv50_state.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 69c9dba675b..72d859c4685 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -15,8 +15,8 @@ struct nv50_screen *ctx = nv50->screen #include "nouveau/nouveau_push.h" -#include "nv50_state.h" #include "nv50_screen.h" +#include "nv50_program.h" #define NOUVEAU_ERR(fmt, args...) \ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b73003123d8..2118d8ef85f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -8,7 +8,6 @@ #include "tgsi/util/tgsi_util.h" #include "nv50_context.h" -#include "nv50_state.h" #define NV50_SU_MAX_TEMP 64 diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h new file mode 100644 index 00000000000..dd5aed799aa --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -0,0 +1,37 @@ +#ifndef __NV50_PROGRAM_H__ +#define __NV50_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" + +struct nv50_program_data { + int index; + int component; + float value; +}; + +struct nv50_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + boolean translated; + + unsigned type; + unsigned *insns; + unsigned insns_nr; + + struct pipe_buffer *buffer; + + float *immd; + unsigned immd_nr; + + struct nouveau_resource *data; + + struct { + unsigned high_temp; + struct { + unsigned attr[2]; + } vp; + } cfg; +}; + +#endif diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 774117326ad..ed6cca9a0dd 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -3,7 +3,6 @@ #include "pipe/p_util.h" #include "nv50_context.h" -#include "nv50_state.h" #include "nouveau/nouveau_stateobj.h" diff --git a/src/gallium/drivers/nv50/nv50_state.h b/src/gallium/drivers/nv50/nv50_state.h deleted file mode 100644 index d568bf610d5..00000000000 --- a/src/gallium/drivers/nv50/nv50_state.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __NV50_STATE_H__ -#define __NV50_STATE_H__ - -#include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" - -struct nv50_program_data { - int index; - int component; - float value; -}; - -struct nv50_program { - struct pipe_shader_state pipe; - struct tgsi_shader_info info; - boolean translated; - - unsigned type; - unsigned *insns; - unsigned insns_nr; - - struct pipe_buffer *buffer; - - float *immd; - unsigned immd_nr; - - struct nouveau_resource *data; - - struct { - unsigned high_temp; - struct { - unsigned attr[2]; - } vp; - } cfg; -}; - -#endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index badbef53df5..140d60cc9a6 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -3,7 +3,6 @@ #include "pipe/p_util.h" #include "nv50_context.h" -#include "nv50_state.h" static INLINE unsigned nv50_prim(unsigned mode) -- cgit v1.2.3 From f50e78e83cf7bda3537ac82de863096d829f13ee Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 11 Jun 2008 15:28:41 +1000 Subject: nv50: get vp working again, fp is broken regardless somehow.. g8x sucks :) --- src/gallium/drivers/nv50/nv50_context.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 72d859c4685..19ec492eb3f 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -24,8 +24,8 @@ fprintf(stderr, "nouveau: "fmt, ##args); /* Constant buffer assignment */ -#define NV50_CB_PMISC 1 -#define NV50_CB_PVP 0 +#define NV50_CB_PMISC 0 +#define NV50_CB_PVP 1 #define NV50_CB_PFP 2 #define NV50_CB_PGP 3 #define NV50_CB_TIC 4 -- cgit v1.2.3 From 40137ea2631a0c8158f99ae30ca90ed038b72076 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 12:16:43 +1000 Subject: nv50: carry instructions around in nv50_program_exec, not a flat array --- src/gallium/drivers/nv50/nv50_program.c | 482 +++++++++++++++++--------------- src/gallium/drivers/nv50/nv50_program.h | 22 +- 2 files changed, 268 insertions(+), 236 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2118d8ef85f..3df38487617 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -196,168 +196,175 @@ alloc_immd(struct nv50_pc *pc, float f) return r; } +static struct nv50_program_exec * +exec(struct nv50_pc *pc) +{ + struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); + + return e; +} + static void -emit(struct nv50_pc *pc, unsigned *inst) +emit(struct nv50_pc *pc, struct nv50_program_exec *e) { struct nv50_program *p = pc->p; - if (inst[0] & 1) { - p->insns_nr += 2; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); - } else { - p->insns_nr += 1; - p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); - memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); - } + if (p->exec_tail) + p->exec_tail->next = e; + if (!p->exec_head) + p->exec_head = e; + p->exec_tail = e; + p->exec_size += (e->inst[0] & 1) ? 2 : 1; } -static INLINE void set_long(struct nv50_pc *, unsigned *); +static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); static boolean -is_long(unsigned *inst) +is_long(struct nv50_program_exec *e) { - if (inst[0] & 1) + if (e->inst[0] & 1) return TRUE; return FALSE; } static boolean -is_immd(unsigned *inst) +is_immd(struct nv50_program_exec *e) { - if (is_long(inst) && (inst[1] & 3) == 3) + if (is_long(e) && (e->inst[1] & 3) == 3) return TRUE; return FALSE; } static INLINE void -set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) +set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, + struct nv50_program_exec *e) { - set_long(pc, inst); - inst[1] &= ~((0x1f << 7) | (0x3 << 12)); - inst[1] |= (pred << 7) | (idx << 12); + set_long(pc, e); + e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); + e->inst[1] |= (pred << 7) | (idx << 12); } static INLINE void -set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) +set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, + struct nv50_program_exec *e) { - set_long(pc, inst); - inst[1] &= ~((0x3 << 4) | (1 << 6)); - inst[1] |= (idx << 4) | (on << 6); + set_long(pc, e); + e->inst[1] &= ~((0x3 << 4) | (1 << 6)); + e->inst[1] |= (idx << 4) | (on << 6); } static INLINE void -set_long(struct nv50_pc *pc, unsigned *inst) +set_long(struct nv50_pc *pc, struct nv50_program_exec *e) { - if (is_long(inst)) + if (is_long(e)) return; - inst[0] |= 1; - set_pred(pc, 0xf, 0, inst); - set_pred_wr(pc, 0, 0, inst); + e->inst[0] |= 1; + set_pred(pc, 0xf, 0, e); + set_pred_wr(pc, 0, 0, e); } static INLINE void -set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) +set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) { if (dst->type == P_RESULT) { - set_long(pc, inst); - inst[1] |= 0x00000008; + set_long(pc, e); + e->inst[1] |= 0x00000008; } alloc_reg(pc, dst); - inst[0] |= (dst->hw << 2); + e->inst[0] |= (dst->hw << 2); } static INLINE void -set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) +set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ - set_long(pc, inst); + set_long(pc, e); /*XXX: can't be predicated - bits overlap.. catch cases where both * are required and avoid them. */ - set_pred(pc, 0, 0, inst); - set_pred_wr(pc, 0, 0, inst); + set_pred(pc, 0, 0, e); + set_pred_wr(pc, 0, 0, e); - inst[1] |= 0x00000002 | 0x00000001; - inst[0] |= (val & 0x3f) << 16; - inst[1] |= (val >> 6) << 2; + e->inst[1] |= 0x00000002 | 0x00000001; + e->inst[0] |= (val & 0x3f) << 16; + e->inst[1] |= (val >> 6) << 2; } static void emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0x80000000; - set_dst(pc, dst, inst); + e->inst[0] |= 0x80000000; + set_dst(pc, dst, e); alloc_reg(pc, iv); - inst[0] |= (iv->hw << 9); + e->inst[0] |= (iv->hw << 9); alloc_reg(pc, src); - inst[0] |= (src->hw << 16); + e->inst[0] |= (src->hw << 16); if (noperspective) - inst[0] |= (1 << 25); + e->inst[0] |= (1 << 25); - emit(pc, inst); + emit(pc, e); } static void -set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +set_cseg(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { - set_long(pc, inst); + set_long(pc, e); if (src->type == P_IMMD) { - inst[1] |= (NV50_CB_PMISC << 22); + e->inst[1] |= (NV50_CB_PMISC << 22); } else { if (pc->p->type == PIPE_SHADER_VERTEX) - inst[1] |= (NV50_CB_PVP << 22); + e->inst[1] |= (NV50_CB_PVP << 22); else - inst[1] |= (NV50_CB_PFP << 22); + e->inst[1] |= (NV50_CB_PFP << 22); } } static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0x10000000; + e->inst[0] |= 0x10000000; - set_dst(pc, dst, inst); + set_dst(pc, dst, e); if (dst->type != P_RESULT && src->type == P_IMMD) { - set_immd(pc, src, inst); + set_immd(pc, src, e); /*XXX: 32-bit, but steals part of "half" reg space - need to * catch and handle this case if/when we do half-regs */ - inst[0] |= 0x00008000; + e->inst[0] |= 0x00008000; } else if (src->type == P_IMMD || src->type == P_CONST) { - set_long(pc, inst); - set_cseg(pc, src, inst); - inst[0] |= (src->hw << 9); - inst[1] |= 0x20000000; /* src0 const? */ + set_long(pc, e); + set_cseg(pc, src, e); + e->inst[0] |= (src->hw << 9); + e->inst[1] |= 0x20000000; /* src0 const? */ } else { if (src->type == P_ATTR) { - set_long(pc, inst); - inst[1] |= 0x00200000; + set_long(pc, e); + e->inst[1] |= 0x00200000; } alloc_reg(pc, src); - inst[0] |= (src->hw << 9); + e->inst[0] |= (src->hw << 9); } /* We really should support "half" instructions here at some point, * but I don't feel confident enough about them yet. */ - set_long(pc, inst); - if (is_long(inst) && !is_immd(inst)) { - inst[1] |= 0x04000000; /* 32-bit */ - inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ + set_long(pc, e); + if (is_long(e) && !is_immd(e)) { + e->inst[1] |= 0x04000000; /* 32-bit */ + e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ } - emit(pc, inst); + emit(pc, e); } static boolean @@ -385,11 +392,11 @@ check_swap_src_0_1(struct nv50_pc *pc, } static void -set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { - set_long(pc, inst); - inst[1] |= 0x00200000; + set_long(pc, e); + e->inst[1] |= 0x00200000; } else if (src->type == P_CONST || src->type == P_IMMD) { struct nv50_reg *temp = temp_temp(pc); @@ -399,11 +406,11 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) } alloc_reg(pc, src); - inst[0] |= (src->hw << 9); + e->inst[0] |= (src->hw << 9); } static void -set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { struct nv50_reg *temp = temp_temp(pc); @@ -412,26 +419,26 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(inst[0] & 0x00800000)); - if (inst[0] & 0x01000000) { + assert(!(e->inst[0] & 0x00800000)); + if (e->inst[0] & 0x01000000) { struct nv50_reg *temp = temp_temp(pc); emit_mov(pc, temp, src); src = temp; } else { - set_cseg(pc, src, inst); - inst[0] |= 0x00800000; + set_cseg(pc, src, e); + e->inst[0] |= 0x00800000; } } alloc_reg(pc, src); - inst[0] |= (src->hw << 16); + e->inst[0] |= (src->hw << 16); } static void -set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) +set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { - set_long(pc, inst); + set_long(pc, e); if (src->type == P_ATTR) { struct nv50_reg *temp = temp_temp(pc); @@ -440,186 +447,186 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(inst[0] & 0x01000000)); - if (inst[0] & 0x00800000) { + assert(!(e->inst[0] & 0x01000000)); + if (e->inst[0] & 0x00800000) { struct nv50_reg *temp = temp_temp(pc); emit_mov(pc, temp, src); src = temp; } else { - set_cseg(pc, src, inst); - inst[0] |= 0x01000000; + set_cseg(pc, src, e); + e->inst[0] |= 0x01000000; } } alloc_reg(pc, src); - inst[1] |= (src->hw << 14); + e->inst[1] |= (src->hw << 14); } static void emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xc0000000; - set_long(pc, inst); + e->inst[0] |= 0xc0000000; + set_long(pc, e); check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); - emit(pc, inst); + emit(pc, e); } static void emit_add(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xb0000000; + e->inst[0] |= 0xb0000000; check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - if (is_long(inst)) - set_src_2(pc, src1, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + if (is_long(e)) + set_src_2(pc, src1, e); else - set_src_1(pc, src1, inst); + set_src_1(pc, src1, e); - emit(pc, inst); + emit(pc, e); } static void emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - set_long(pc, inst); - inst[0] |= 0xb0000000; - inst[1] |= (sub << 29); + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (sub << 29); check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); - emit(pc, inst); + emit(pc, e); } static void emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xb0000000; + e->inst[0] |= 0xb0000000; - set_long(pc, inst); + set_long(pc, e); if (check_swap_src_0_1(pc, &src0, &src1)) - inst[1] |= 0x04000000; + e->inst[1] |= 0x04000000; else - inst[1] |= 0x08000000; + e->inst[1] |= 0x08000000; - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_2(pc, src1, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_2(pc, src1, e); - emit(pc, inst); + emit(pc, e); } static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xe0000000; + e->inst[0] |= 0xe0000000; check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); - set_src_2(pc, src2, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); - emit(pc, inst); + emit(pc, e); } static void emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xe0000000; - set_long(pc, inst); - inst[1] |= 0x08000000; /* src0 * src1 - src2 */ + e->inst[0] |= 0xe0000000; + set_long(pc, e); + e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); - set_src_2(pc, src2, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); - emit(pc, inst); + emit(pc, e); } static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0x90000000; + e->inst[0] |= 0x90000000; if (sub) { - set_long(pc, inst); - inst[1] |= (sub << 29); + set_long(pc, e); + e->inst[1] |= (sub << 29); } - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); + set_dst(pc, dst, e); + set_src_0(pc, src, e); - emit(pc, inst); + emit(pc, e); } static void emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xb0000000; + e->inst[0] |= 0xb0000000; - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); - set_long(pc, inst); - inst[1] |= (6 << 29) | 0x00004000; + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29) | 0x00004000; - emit(pc, inst); + emit(pc, e); } static void emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - inst[0] |= 0xb0000000; + e->inst[0] |= 0xb0000000; - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); - set_long(pc, inst); - inst[1] |= (6 << 29); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29); - emit(pc, inst); + emit(pc, e); } static void emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; struct nv50_reg *rdst; @@ -632,26 +639,27 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, dst = alloc_temp(pc, NULL); /* set.u32 */ - set_long(pc, inst); - inst[0] |= 0xb0000000; - inst[1] |= (3 << 29); - inst[1] |= (c_op << 14); + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (3 << 29); + e->inst[1] |= (c_op << 14); /*XXX: breaks things, .u32 by default? * decuda will disasm as .u16 and use .lo/.hi regs, but this * doesn't seem to match what the hw actually does. inst[1] |= 0x04000000; << breaks things.. .u32 by default? */ - set_dst(pc, dst, inst); - set_src_0(pc, src0, inst); - set_src_1(pc, src1, inst); - emit(pc, inst); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + emit(pc, e); /* cvt.f32.u32 */ - inst[0] = 0xa0000001; - inst[1] = 0x64014780; - set_dst(pc, rdst, inst); - set_src_0(pc, dst, inst); - emit(pc, inst); + e = exec(pc); + e->inst[0] = 0xa0000001; + e->inst[1] = 0x64014780; + set_dst(pc, rdst, e); + set_src_0(pc, dst, e); + emit(pc, e); if (dst != rdst) free_temp(pc, dst); @@ -660,19 +668,19 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, static void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; - - inst[0] = 0xa0000000; /* cvt */ - set_long(pc, inst); - inst[1] |= (6 << 29); /* cvt */ - inst[1] |= 0x08000000; /* integer mode */ - inst[1] |= 0x04000000; /* 32 bit */ - inst[1] |= ((0x1 << 3)) << 14; /* .rn */ - inst[1] |= (1 << 14); /* src .f32 */ - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); - - emit(pc, inst); + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x08000000; /* integer mode */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); } static void @@ -692,18 +700,18 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, static void emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; - - inst[0] = 0xa0000000; /* cvt */ - set_long(pc, inst); - inst[1] |= (6 << 29); /* cvt */ - inst[1] |= 0x04000000; /* 32 bit */ - inst[1] |= (1 << 14); /* src .f32 */ - inst[1] |= ((1 << 6) << 14); /* .abs */ - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); - - emit(pc, inst); + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); } static void @@ -731,7 +739,7 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, } if (mask & (1 << 2)) { - set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); + set_pred_wr(pc, 1, 0, pc->p->exec_tail); tmp[1] = temp_temp(pc); emit_minmax(pc, 4, tmp[1], src[1], zero); @@ -742,24 +750,24 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit_pow(pc, dst[2], tmp[1], tmp[3]); emit_mov(pc, dst[2], zero); - set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); + set_pred(pc, 3, 0, pc->p->exec_tail); } } static void emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e = exec(pc); - set_long(pc, inst); - inst[0] |= 0xa0000000; /* delta */ - inst[1] |= (7 << 29); /* delta */ - inst[1] |= 0x04000000; /* negate arg0? probably not */ - inst[1] |= (1 << 14); /* src .f32 */ - set_dst(pc, dst, inst); - set_src_0(pc, src, inst); + set_long(pc, e); + e->inst[0] |= 0xa0000000; /* delta */ + e->inst[1] |= (7 << 29); /* delta */ + e->inst[1] |= 0x04000000; /* negate arg0? probably not */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); - emit(pc, inst); + emit(pc, e); } static struct nv50_reg * @@ -1132,20 +1140,21 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (sat) { for (c = 0; c < 4; c++) { - unsigned inst[2] = { 0, 0 }; + struct nv50_program_exec *e; if (!(mask & (1 << c))) continue; - - inst[0] = 0xa0000000; /* cvt */ - set_long(pc, inst); - inst[1] |= (6 << 29); /* cvt */ - inst[1] |= 0x04000000; /* 32 bit */ - inst[1] |= (1 << 14); /* src .f32 */ - inst[1] |= ((1 << 5) << 14); /* .sat */ - set_dst(pc, rdst[c], inst); - set_src_0(pc, dst[c], inst); - emit(pc, inst); + e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 5) << 14); /* .sat */ + set_dst(pc, rdst[c], e); + set_src_0(pc, dst[c], e); + emit(pc, e); } } @@ -1384,6 +1393,9 @@ nv50_program_tx(struct nv50_program *p) emit_mov(pc, &out, &pc->result[out.hw]); } + assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); + pc->p->exec_tail->inst[1] |= 0x00000001; + p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; @@ -1397,20 +1409,17 @@ out_cleanup: static void nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) { - int i; + struct nv50_program_exec *e; if (nv50_program_tx(p) == FALSE) assert(0); - /* *not* sufficient, it's fine if last inst is long and - * NOT immd - otherwise it's fucked fucked fucked */ - p->insns[p->insns_nr - 1] |= 0x00000001; - if (p->type == PIPE_SHADER_VERTEX) { - for (i = 0; i < p->insns_nr; i++) - NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); - } else { - for (i = 0; i < p->insns_nr; i++) - NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); + e = p->exec_head; + while (e) { + NOUVEAU_ERR("0x%08x\n", e->inst[0]); + if (is_long(e)) + NOUVEAU_ERR("0x%08x\n", e->inst[1]); + e = e->next; } p->translated = TRUE; @@ -1432,12 +1441,18 @@ static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct pipe_winsys *ws = nv50->pipe.winsys; - void *map; + struct nv50_program_exec *e; + unsigned *map; if (!p->buffer) - p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); + p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(map, p->insns, p->insns_nr * 4); + for (e = p->exec_head; e; e = e->next) { + *(map++) = e->inst[0]; + if (is_long(e)) + *(map++) = e->inst[1]; + } ws->buffer_unmap(ws, p->buffer); } @@ -1519,11 +1534,14 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { struct pipe_winsys *ws = nv50->pipe.winsys; - if (p->insns_nr) { - if (p->insns) - FREE(p->insns); - p->insns_nr = 0; + while (p->exec_head) { + struct nv50_program_exec *e = p->exec_head; + + p->exec_head = e->next; + FREE(e); } + p->exec_tail = NULL; + p->exec_size = 0; if (p->buffer) pipe_buffer_reference(ws, &p->buffer, NULL); diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index dd5aed799aa..6dafe56fbb6 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -10,22 +10,36 @@ struct nv50_program_data { float value; }; +struct nv50_program_exec { + struct nv50_program_exec *next; + + unsigned inst[2]; + struct { + int index; + unsigned mask; + unsigned shift; + } param; +}; + struct nv50_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; boolean translated; unsigned type; - unsigned *insns; - unsigned insns_nr; + struct nv50_program_exec *exec_head; + struct nv50_program_exec *exec_tail; + unsigned exec_size; + struct nv50_program_data *data; + struct nouveau_resource *data_res; + unsigned data_nr; + unsigned data_start; struct pipe_buffer *buffer; float *immd; unsigned immd_nr; - struct nouveau_resource *data; - struct { unsigned high_temp; struct { -- cgit v1.2.3 From 1c7489bd7e5391136d0f2e68b467de89eb2d2bfc Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 12:26:43 +1000 Subject: nv50: allow relocating a shader's constants at upload time --- src/gallium/drivers/nv50/nv50_program.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3df38487617..6c5aeb237db 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -201,6 +201,7 @@ exec(struct nv50_pc *pc) { struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); + e->param.index = -1; return e; } @@ -311,7 +312,8 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, } static void -set_cseg(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, + struct nv50_program_exec *e) { set_long(pc, e); if (src->type == P_IMMD) { @@ -322,6 +324,10 @@ set_cseg(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) else e->inst[1] |= (NV50_CB_PFP << 22); } + + e->param.index = src->hw; + e->param.shift = s; + e->param.mask = m << (s % 32); } static void @@ -342,8 +348,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } else if (src->type == P_IMMD || src->type == P_CONST) { set_long(pc, e); - set_cseg(pc, src, e); - e->inst[0] |= (src->hw << 9); + set_data(pc, src, 0x7f, 9, e); e->inst[1] |= 0x20000000; /* src0 const? */ } else { if (src->type == P_ATTR) { @@ -426,7 +431,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) emit_mov(pc, temp, src); src = temp; } else { - set_cseg(pc, src, e); + set_data(pc, src, 0x7f, 16, e); e->inst[0] |= 0x00800000; } } @@ -454,7 +459,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) emit_mov(pc, temp, src); src = temp; } else { - set_cseg(pc, src, e); + set_data(pc, src, 0x7f, 32+14, e); e->inst[0] |= 0x01000000; } } @@ -1449,6 +1454,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); for (e = p->exec_head; e; e = e->next) { + if (e->param.index >= 0) { + e->inst[e->param.shift / 32] &= ~e->param.mask; + e->inst[e->param.shift / 32] |= (e->param.index << + e->param.shift); + } + *(map++) = e->inst[0]; if (is_long(e)) *(map++) = e->inst[1]; -- cgit v1.2.3 From aea1669ff221f97682f0be6a60632e40c2739d09 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 12:39:35 +1000 Subject: nv50: use constbuf segment 0 for everything - I can't make the others work.. --- src/gallium/drivers/nv50/nv50_program.c | 70 ++++++++++++++++++++++++++------- src/gallium/drivers/nv50/nv50_program.h | 11 +----- 2 files changed, 57 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 6c5aeb237db..cbbecafb028 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -10,6 +10,7 @@ #include "nv50_context.h" #define NV50_SU_MAX_TEMP 64 +#define NV50_PROGRAM_DUMP /* ARL - gallium craps itself on progs/vp/arl.txt * @@ -316,6 +317,9 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, struct nv50_program_exec *e) { set_long(pc, e); +#if 1 + e->inst[1] |= (1 << 22); +#else if (src->type == P_IMMD) { e->inst[1] |= (NV50_CB_PMISC << 22); } else { @@ -324,6 +328,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, else e->inst[1] |= (NV50_CB_PFP << 22); } +#endif e->param.index = src->hw; e->param.shift = s; @@ -1335,7 +1340,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } if (pc->immd_nr) { - int rid = 0; + int rid = pc->param_nr * 4; pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); if (!pc->immd) @@ -1401,6 +1406,7 @@ nv50_program_tx(struct nv50_program *p) assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); pc->p->exec_tail->inst[1] |= 0x00000001; + p->param_nr = pc->param_nr * 4; p->immd_nr = pc->immd_nr * 4; p->immd = pc->immd_buf; @@ -1418,26 +1424,47 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) if (nv50_program_tx(p) == FALSE) assert(0); - - e = p->exec_head; - while (e) { - NOUVEAU_ERR("0x%08x\n", e->inst[0]); - if (is_long(e)) - NOUVEAU_ERR("0x%08x\n", e->inst[1]); - e = e->next; - } - p->translated = TRUE; } static void nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) { + struct nouveau_winsys *nvws = nv50->screen->nvws; + struct pipe_winsys *ws = nv50->pipe.winsys; + unsigned nr = p->param_nr + p->immd_nr; int i; + if (!p->data && nr) { + struct nouveau_resource *heap = nv50->screen->vp_data_heap; + + if (nvws->res_alloc(heap, nr, p, &p->data)) { + while (heap->next && heap->size < nr) { + struct nv50_program *evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, nr, p, &p->data)) + assert(0); + } + } + + if (p->param_nr) { + float *map = ws->buffer_map(ws, nv50->constbuf[p->type], + PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < p->param_nr; i++) { + BEGIN_RING(tesla, 0x0f00, 2); + OUT_RING ((NV50_CB_PMISC << 0) | + ((p->data->start + i) << 8)); + OUT_RING (fui(map[i])); + } + ws->buffer_unmap(ws, nv50->constbuf[p->type]); + } + for (i = 0; i < p->immd_nr; i++) { BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 0) | (i << 8)); + OUT_RING ((NV50_CB_PMISC << 0) | + ((p->data_start + p->param_nr + i) << 8)); OUT_RING (fui(p->immd[i])); } } @@ -1452,17 +1479,30 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!p->buffer) p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); - map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - for (e = p->exec_head; e; e = e->next) { - if (e->param.index >= 0) { + if (p->data && p->data->start != p->data_start) { + for (e = p->exec_head; e; e = e->next) { + if (e->param.index < 0) + continue; e->inst[e->param.shift / 32] &= ~e->param.mask; e->inst[e->param.shift / 32] |= (e->param.index << e->param.shift); } + p->data_start = p->data->start; + } + + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + for (e = p->exec_head; e; e = e->next) { +#ifdef NV50_PROGRAM_DUMP + NOUVEAU_ERR("0x%08x\n", e->inst[0]); +#endif *(map++) = e->inst[0]; - if (is_long(e)) + if (is_long(e)) { +#ifdef NV50_PROGRAM_DUMP + NOUVEAU_ERR("0x%08x\n", e->inst[1]); +#endif *(map++) = e->inst[1]; + } } ws->buffer_unmap(ws, p->buffer); } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 6dafe56fbb6..d143ae97979 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -4,12 +4,6 @@ #include "pipe/p_state.h" #include "tgsi/util/tgsi_scan.h" -struct nv50_program_data { - int index; - int component; - float value; -}; - struct nv50_program_exec { struct nv50_program_exec *next; @@ -30,15 +24,14 @@ struct nv50_program { struct nv50_program_exec *exec_head; struct nv50_program_exec *exec_tail; unsigned exec_size; - struct nv50_program_data *data; - struct nouveau_resource *data_res; - unsigned data_nr; + struct nouveau_resource *data; unsigned data_start; struct pipe_buffer *buffer; float *immd; unsigned immd_nr; + unsigned param_nr; struct { unsigned high_temp; -- cgit v1.2.3 From ab3d55e2e3578db8deba84dcf47a024071486bd8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 13:11:41 +1000 Subject: nv50: more efficient const upload + fixes (fp/* works now!) --- src/gallium/drivers/nv50/nv50_program.c | 48 +++++++++++++++++++++------------ src/gallium/drivers/nv50/nv50_screen.c | 4 +-- 2 files changed, 33 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index cbbecafb028..4c41e5a7c24 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1420,20 +1420,35 @@ out_cleanup: static void nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) { - struct nv50_program_exec *e; - if (nv50_program_tx(p) == FALSE) assert(0); p->translated = TRUE; } +static void +nv50_program_upload_data(struct nv50_context *nv50, float *map, + unsigned start, unsigned count) +{ + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(tesla, 0x00000f00, 1); + OUT_RING ((NV50_CB_PMISC << 0) | (start << 8)); + BEGIN_RING(tesla, 0x40000f04, nr); + OUT_RINGp (map, nr); + + map += nr; + start += nr; + count -= nr; + } +} + static void nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) { struct nouveau_winsys *nvws = nv50->screen->nvws; struct pipe_winsys *ws = nv50->pipe.winsys; unsigned nr = p->param_nr + p->immd_nr; - int i; if (!p->data && nr) { struct nouveau_resource *heap = nv50->screen->vp_data_heap; @@ -1452,20 +1467,15 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) if (p->param_nr) { float *map = ws->buffer_map(ws, nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < p->param_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 0) | - ((p->data->start + i) << 8)); - OUT_RING (fui(map[i])); - } + nv50_program_upload_data(nv50, map, p->data->start, + p->param_nr); ws->buffer_unmap(ws, nv50->constbuf[p->type]); } - for (i = 0; i < p->immd_nr; i++) { - BEGIN_RING(tesla, 0x0f00, 2); - OUT_RING ((NV50_CB_PMISC << 0) | - ((p->data_start + p->param_nr + i) << 8)); - OUT_RING (fui(p->immd[i])); + if (p->immd_nr) { + nv50_program_upload_data(nv50, p->immd, + p->data->start + p->param_nr, + p->immd_nr); } } @@ -1481,11 +1491,15 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (p->data && p->data->start != p->data_start) { for (e = p->exec_head; e; e = e->next) { + unsigned ei, ci; + if (e->param.index < 0) continue; - e->inst[e->param.shift / 32] &= ~e->param.mask; - e->inst[e->param.shift / 32] |= (e->param.index << - e->param.shift); + ei = e->param.shift >> 5; + ci = e->param.index + p->data->start; + + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); } p->data_start = p->data->start; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 2417bf80013..5f10fe2e440 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -229,8 +229,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 8); /* Shared constant buffer */ - screen->constbuf = ws->buffer_create(ws, 0, 0, 256 * 4 * 4); - if (nvws->res_init(&screen->vp_data_heap, 0, 256)) { + screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { NOUVEAU_ERR("Error initialising constant buffer\n"); nv50_screen_destroy(&screen->pipe); return NULL; -- cgit v1.2.3 From f700d6be6335a4d4394296891f783687b6f2d4f2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 13:50:56 +1000 Subject: nv50: remove some cruft, don't upload program unless really needed --- src/gallium/drivers/nv50/nv50_program.c | 9 ++++++- src/gallium/drivers/nv50/nv50_state_validate.c | 33 ++------------------------ 2 files changed, 10 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 4c41e5a7c24..f71aa19312f 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1484,10 +1484,13 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct pipe_winsys *ws = nv50->pipe.winsys; struct nv50_program_exec *e; + boolean upload = FALSE; unsigned *map; - if (!p->buffer) + if (!p->buffer) { p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); + upload = TRUE; + } if (p->data && p->data->start != p->data_start) { for (e = p->exec_head; e; e = e->next) { @@ -1503,8 +1506,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) } p->data_start = p->data->start; + upload = TRUE; } + if (!upload) + return FALSE; + map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); for (e = p->exec_head; e; e = e->next) { #ifdef NV50_PROGRAM_DUMP diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 05395c6df7c..5f2244e3e80 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -114,10 +114,10 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & NV50_NEW_ZSA) so_emit(nvws, nv50->zsa->so); - if (nv50->dirty & NV50_NEW_VERTPROG) + if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) nv50_vertprog_validate(nv50); - if (nv50->dirty & NV50_NEW_FRAGPROG) + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) @@ -168,35 +168,6 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(NULL, &so); } - if (nv50->dirty & NV50_NEW_VERTPROG_CB) { - so = so_new(4, 2); - so_method(so, tesla, 0x1280, 3); - so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, - NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, - 0, 0); - so_reloc (so, nv50->constbuf[PIPE_SHADER_VERTEX], 0, - NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, - 0, 0); - so_data (so, (NV50_CB_PVP << 16) | 0x1000); - so_emit(nvws, so); - so_ref(NULL, &so); - } - - if (nv50->dirty & NV50_NEW_FRAGPROG_CB) { - so = so_new(4, 2); - so_method(so, tesla, 0x1280, 3); - so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, - NOUVEAU_BO_HIGH | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, - 0, 0); - so_reloc (so, nv50->constbuf[PIPE_SHADER_FRAGMENT], 0, - NOUVEAU_BO_LOW | NOUVEAU_BO_RD | NOUVEAU_BO_VRAM, - 0, 0); - so_data (so, (NV50_CB_PFP << 16) | 0x1000); - so_emit(nvws, so); - so_ref(NULL, &so); - } - - if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); -- cgit v1.2.3 From 163d9aa1fe33612e7806549e47b257b61ca5045e Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 13:59:51 +1000 Subject: nv50: support a couple more common VBO formats --- src/gallium/drivers/nv50/nv50_vbo.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 140d60cc9a6..74519489f7d 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -82,9 +82,15 @@ nv50_vbo_validate(struct nv50_context *nv50) &nv50->vtxbuf[ve->vertex_buffer_index]; switch (ve->src_format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + so_data(vtxfmt, 0x7e080000 | i); + break; case PIPE_FORMAT_R32G32B32_FLOAT: so_data(vtxfmt, 0x7e100000 | i); break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + so_data(vtxfmt, 0x24500000 | i); + break; default: { char fmt[128]; -- cgit v1.2.3 From 027ed25c12f69b39e205d3bbd26b68e9a02bea81 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 14:15:38 +1000 Subject: nv50: draw_elements() - inline only for the moment --- src/gallium/drivers/nv50/nv50_vbo.c | 96 ++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 74519489f7d..586a6c49de0 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -52,16 +52,110 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, return TRUE; } +static INLINE void +nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + BEGIN_RING(tesla, 0x15e8, 1); + OUT_RING (map[0]); + map++; + count--; + } + + while (count) { + unsigned nr = count > 2046 ? 2046 : count; + int i; + + BEGIN_RING(tesla, 0x400015f0, nr >> 1); + for (i = 0; i < nr; i += 2) + OUT_RING ((map[1] << 16) | map[0]); + + count -= nr; + map += nr; + } +} + +static INLINE void +nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + BEGIN_RING(tesla, 0x15e8, 1); + OUT_RING (map[0]); + map++; + count--; + } + + while (count) { + unsigned nr = count > 2046 ? 2046 : count; + int i; + + BEGIN_RING(tesla, 0x400015f0, nr >> 1); + for (i = 0; i < nr; i += 2) + OUT_RING ((map[1] << 16) | map[0]); + + count -= nr; + map += nr; + } +} + +static INLINE void +nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(tesla, 0x400015e8, nr); + OUT_RINGp (map, nr); + + count -= nr; + map += nr; + } +} + boolean nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); nv50_state_validate(nv50); - NOUVEAU_ERR("unimplemented\n"); + BEGIN_RING(tesla, 0x142c, 1); + OUT_RING (0); + BEGIN_RING(tesla, 0x142c, 1); + OUT_RING (0); + + BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (nv50_prim(mode)); + switch (indexSize) { + case 1: + nv50_draw_elements_inline_u08(nv50, map, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(nv50, map, start, count); + break; + case 4: + nv50_draw_elements_inline_u32(nv50, map, start, count); + break; + default: + assert(0); + } + BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (0); + + pipe->flush(pipe, 0, NULL); return TRUE; } -- cgit v1.2.3 From 619549a6377a58d54c9cf55f8863beed56b09566 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 14:21:28 +1000 Subject: nv50: valgrind complaint --- src/gallium/drivers/nv50/nv50_program.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index f71aa19312f..af5ca84c8e7 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -146,8 +146,10 @@ static void free_temp(struct nv50_pc *pc, struct nv50_reg *r) { if (r->index == -1) { - FREE(pc->r_temp[r->hw]); - pc->r_temp[r->hw] = NULL; + unsigned hw = r->hw; + + FREE(pc->r_temp[hw]); + pc->r_temp[hw] = NULL; } } -- cgit v1.2.3 From 4bde3a72ab0b4246cd779a6d1e2a72943f25c0f6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 14:40:20 +1000 Subject: nv50: fix blend cso --- src/gallium/drivers/nv50/nv50_state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index ed6cca9a0dd..b56a3992bc4 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -29,12 +29,12 @@ nv50_blend_state_create(struct pipe_context *pipe, so_data(so, 1); so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5); so_data (so, nvgl_blend_eqn(cso->rgb_func)); - so_data (so, nvgl_blend_func(cso->rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->rgb_dst_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor)); so_data (so, nvgl_blend_eqn(cso->alpha_func)); - so_data (so, nvgl_blend_func(cso->alpha_src_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor)); so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1); - so_data (so, nvgl_blend_func(cso->alpha_dst_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor)); } if (cso->logicop_enable == 0 ) { -- cgit v1.2.3 From 4d520e0b76cf54ae8eb5464afc126c6cc5c6bfdc Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 14:47:17 +1000 Subject: nv50: another vbo format --- src/gallium/drivers/nv50/nv50_vbo.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 586a6c49de0..7a1c2f24ef9 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -182,6 +182,9 @@ nv50_vbo_validate(struct nv50_context *nv50) case PIPE_FORMAT_R32G32B32_FLOAT: so_data(vtxfmt, 0x7e100000 | i); break; + case PIPE_FORMAT_R32G32_FLOAT: + so_data(vtxfmt, 0x7e200000 | i); + break; case PIPE_FORMAT_R8G8B8A8_UNORM: so_data(vtxfmt, 0x24500000 | i); break; -- cgit v1.2.3 From 2fdeb4d5a5cc8b93bf885ba646e3a29a68c755ed Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 12 Jun 2008 23:49:26 +1000 Subject: nv50: comment on a so-far unseen bug --- src/gallium/drivers/nv50/nv50_program.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index af5ca84c8e7..39597c54814 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -22,6 +22,9 @@ * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, * but can emit to P_TEMP first - then MOV later. NVIDIA does this * + * In ops such as ADD it's possible to construct a bad opcode in the !is_long() + * case, if the emit_src() causes the inst to suddenly become long. + * * Verify half-insns work where expected - and force disable them where they * don't work - MUL has it forcibly disabled atm as it fixes POW.. * -- cgit v1.2.3 From 101305f37f7268354a50b825bcb66894e4a0b777 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 13 Jun 2008 10:58:27 +1000 Subject: nv50: flag to indicate to winsys we want a surface for use as a zeta buffer NVIDIA love to make life difficult.. we need different flags in PTEs for zeta.. yay.. not. --- src/gallium/drivers/nouveau/nouveau_winsys.h | 1 + src/gallium/drivers/nv50/nv50_miptree.c | 13 ++++++++++++- src/gallium/drivers/nv50/nv50_screen.c | 4 ++++ src/gallium/drivers/nv50/nv50_state.c | 4 ++-- src/gallium/drivers/nv50/nv50_state_validate.c | 10 ++++++++++ 5 files changed, 29 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 07a41dcf4a1..13810460bfe 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -18,6 +18,7 @@ #define NOUVEAU_CAP_HW_IDXBUF (0xbeef0001) #define NOUVEAU_BUFFER_USAGE_TEXTURE (1 << 16) +#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17) struct nouveau_winsys { struct nouveau_context *nv; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index be85c3fd5ce..459e0ff9dd6 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -21,6 +21,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { struct pipe_winsys *ws = pscreen->winsys; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + unsigned usage; NOUVEAU_ERR("unimplemented\n"); @@ -28,7 +29,17 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.refcount = 1; mt->base.screen = pscreen; - mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + usage = PIPE_BUFFER_USAGE_PIXEL; + switch (pt->format) { + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + usage |= NOUVEAU_BUFFER_USAGE_ZETA; + break; + default: + break; + } + + mt->buffer = ws->buffer_create(ws, 256, usage, pt->width[0] * pt->cpp * pt->height[0]); if (!mt->buffer) { FREE(mt); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 5f10fe2e440..d76cce9f791 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -284,6 +284,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 0xffffffff); } + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_emit(nvws, so); so_ref(NULL, &so); nvws->push_flush(nvws, 0, NULL); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index b56a3992bc4..176ebff5da4 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -247,8 +247,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, struct nouveau_stateobj *so = so_new(64, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); - so_data (so, 0); //cso->depth.writemask ? 1 : 0); - if (0 && cso->depth.enabled) { + so_data (so, cso->depth.writemask ? 1 : 0); + if (cso->depth.enabled) { so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 5f2244e3e80..a3f399a139f 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -70,6 +70,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) case PIPE_FORMAT_Z24S8_UNORM: so_data(so, 0x16); break; + case PIPE_FORMAT_Z16_UNORM: + so_data(so, 0x15); + break; default: { char fmt[128]; @@ -81,6 +84,13 @@ nv50_state_validate_fb(struct nv50_context *nv50) } so_data(so, 0x00000040); so_data(so, 0x00000000); + + so_method(so, tesla, 0x1538, 1); + so_data (so, 1); + so_method(so, tesla, 0x1228, 3); + so_data (so, fb->zsbuf->width); + so_data (so, fb->zsbuf->height); + so_data (so, 0x00010001); } so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); -- cgit v1.2.3 From 0d7f25c890e1f1505625542c256d4512c065449a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 13 Jun 2008 11:50:13 +1000 Subject: nv50: disable ztest for now - it doesn't work still --- src/gallium/drivers/nv50/nv50_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 176ebff5da4..0129b0fb144 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -247,10 +247,10 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, struct nouveau_stateobj *so = so_new(64, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); - so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, 0); //cso->depth.writemask ? 1 : 0); if (cso->depth.enabled) { so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); - so_data (so, 1); + so_data (so, 0); //1); so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); so_data (so, nvgl_comparison_op(cso->depth.func)); } else { -- cgit v1.2.3 From c0ed6a871cd3513e17a1fab960f5626485ffed13 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 13 Jun 2008 12:09:46 +1000 Subject: nv50: do tsc/tic upload + stub out shader TEX stuff --- src/gallium/drivers/nv50/nv50_context.h | 17 +++++++++++++ src/gallium/drivers/nv50/nv50_miptree.c | 11 --------- src/gallium/drivers/nv50/nv50_program.c | 6 +++++ src/gallium/drivers/nv50/nv50_screen.c | 1 + src/gallium/drivers/nv50/nv50_state.c | 27 ++++++++++++++++++++- src/gallium/drivers/nv50/nv50_state_validate.c | 33 ++++++++++++++++++++++++++ 6 files changed, 83 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 19ec492eb3f..5214ce1a69c 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -44,6 +44,8 @@ #define NV50_NEW_FRAGPROG (1 << 10) #define NV50_NEW_FRAGPROG_CB (1 << 11) #define NV50_NEW_ARRAYS (1 << 12) +#define NV50_NEW_SAMPLER (1 << 13) +#define NV50_NEW_TEXTURE (1 << 14) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -60,6 +62,17 @@ struct nv50_rasterizer_stateobj { struct nouveau_stateobj *so; }; +struct nv50_miptree { + struct pipe_texture base; + struct pipe_buffer *buffer; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_texture *pt) +{ + return (struct nv50_miptree *)pt; +} + struct nv50_context { struct pipe_context pipe; @@ -84,6 +97,10 @@ struct nv50_context { unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; unsigned vtxelt_nr; + unsigned *sampler[PIPE_MAX_SAMPLERS]; + unsigned sampler_nr; + struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; + unsigned miptree_nr; }; static INLINE struct nv50_context * diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 459e0ff9dd6..cdd98844f94 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -5,17 +5,6 @@ #include "nv50_context.h" -struct nv50_miptree { - struct pipe_texture base; - struct pipe_buffer *buffer; -}; - -static INLINE struct nv50_miptree * -nv50_miptree(struct pipe_texture *pt) -{ - return (struct nv50_miptree *)pt; -} - static struct pipe_texture * nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 39597c54814..5eec68e5e19 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -828,6 +828,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->SrcRegister.Index * 4 + c]; break; + case TGSI_FILE_SAMPLER: + break; default: assert(0); break; @@ -1130,6 +1132,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_sub(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_TEX: + break; case TGSI_OPCODE_XPD: temp = alloc_temp(pc, NULL); if (mask & (1 << 0)) { @@ -1226,6 +1230,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->param_nr < (last + 1)) pc->param_nr = last + 1; break; + case TGSI_FILE_SAMPLER: + break; default: NOUVEAU_ERR("bad decl file %d\n", d->Declaration.File); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index d76cce9f791..b9cecb77f1f 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -28,6 +28,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, break; case PIPE_TEXTURE: switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_I8_UNORM: return TRUE; default: diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 0129b0fb144..f36299db4d2 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "nv50_context.h" @@ -85,23 +86,47 @@ static void * nv50_sampler_state_create(struct pipe_context *pipe, const struct pipe_sampler_state *cso) { - return NULL; + unsigned *tsc = CALLOC(8, sizeof(unsigned)); + + tsc[0] = 0x00024080; + tsc[1] = 0x00000062; + + return (void *)tsc; } static void nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) { + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + nv50->sampler_nr = nr; + for (i = 0; i < nv50->sampler_nr; i++) + nv50->sampler[i] = sampler[i]; + + nv50->dirty |= NV50_NEW_SAMPLER; } static void nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { + FREE(hwcso); } static void nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, struct pipe_texture **pt) { + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + for (i = 0; i < nr; i++) + pipe_texture_reference(&nv50->miptree[i], pt[i]); + for (i = nr; i < nv50->miptree_nr; i++) + pipe_texture_reference(&nv50->miptree[i], NULL); + + nv50->miptree_nr = nr; + nv50->dirty |= NV50_NEW_TEXTURE; } static void * diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index a3f399a139f..63c1756bcbf 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -178,6 +178,39 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(NULL, &so); } + if (nv50->dirty & NV50_NEW_SAMPLER) { + int i; + + BEGIN_RING(tesla, 0x0f00, 1); + OUT_RING ((NV50_CB_TSC << 0) | (0 << 8)); + BEGIN_RING(tesla, 0x40000f04, nv50->sampler_nr * 8); + for (i = 0; i < nv50->sampler_nr; i++) + OUT_RINGp(nv50->sampler[i], 8); + } + + if (nv50->dirty & NV50_NEW_TEXTURE) { + int i; + + BEGIN_RING(tesla, 0x0f00, 1); + OUT_RING ((NV50_CB_TIC << 0) | (0 << 8)); + BEGIN_RING(tesla, 0x40000f04, nv50->miptree_nr * 8); + for (i = 0; i < nv50->sampler_nr; i++) { + struct nv50_miptree *mt = nv50->miptree[i]; + + OUT_RING (0x2a712488); + OUT_RELOCl(mt->buffer, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW); + OUT_RING (0xd0c05000); + OUT_RING (0x00300000); + OUT_RING (mt->base.width[0]); + OUT_RING ((mt->base.depth[0] << 16) | + mt->base.height[0]); + OUT_RING (0x03000000); + OUT_RELOCh(mt->buffer, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH); + } + } + if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); -- cgit v1.2.3 From fa5cd63f96d2b69ded48d40b9cb7e57c147f7332 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 15 Jun 2008 15:49:25 +1000 Subject: nv50: simplify interp crap a bit... hopefully there wasn't a good reason I went the route I did.. can't recall.. --- src/gallium/drivers/nv50/nv50_program.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5eec68e5e19..5800a347bf6 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1263,7 +1263,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); if (pc->attr_nr) { - struct nv50_reg *iv = NULL, *tmp = NULL; + struct nv50_reg *iv = NULL; int aid = 0; pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); @@ -1272,6 +1272,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type == PIPE_SHADER_FRAGMENT) { iv = alloc_temp(pc, NULL); + emit_interp(pc, iv, iv, iv, FALSE); + emit_flop(pc, 0, iv, iv); aid++; } @@ -1297,14 +1299,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type != PIPE_SHADER_FRAGMENT) continue; - emit_interp(pc, iv, iv, iv, FALSE); - tmp = alloc_temp(pc, NULL); - emit_flop(pc, 0, tmp, iv); - emit_interp(pc, &a[0], &a[0], tmp, TRUE); - emit_interp(pc, &a[1], &a[1], tmp, TRUE); - emit_interp(pc, &a[2], &a[2], tmp, TRUE); - emit_interp(pc, &a[3], &a[3], tmp, TRUE); - free_temp(pc, tmp); + emit_interp(pc, &a[0], &a[0], iv, TRUE); + emit_interp(pc, &a[1], &a[1], iv, TRUE); + emit_interp(pc, &a[2], &a[2], iv, TRUE); + emit_interp(pc, &a[3], &a[3], iv, TRUE); } if (iv) -- cgit v1.2.3 From da66b8a2f4c3c052ad71b2b6d5a845c2fd267c6e Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 15 Jun 2008 15:53:22 +1000 Subject: nv50: disable inline IMMD for now, IMMD+pred == BANG! fixes progs/fp/lit.txt --- src/gallium/drivers/nv50/nv50_program.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5800a347bf6..845fc611905 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -31,10 +31,6 @@ * FUCK! watch dst==src vectors, can overwrite components that are needed. * ie. SUB R0, R0.yzxw, R0 * - * MOV dst, -src - * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0) - * mov dst, tmp - * * Things to check with renouveau: * FP attr/result assignment - how? * attrib @@ -349,7 +345,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_dst(pc, dst, e); - if (dst->type != P_RESULT && src->type == P_IMMD) { + if (0 && dst->type != P_RESULT && src->type == P_IMMD) { set_immd(pc, src, e); /*XXX: 32-bit, but steals part of "half" reg space - need to * catch and handle this case if/when we do half-regs -- cgit v1.2.3 From fea0b1651677444fc6c135e1a4b8ab6463a9fdf9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 16 Jun 2008 12:55:53 +1000 Subject: nv50: a couple more bits'n'pieces --- src/gallium/drivers/nv50/nv50_program.c | 10 +++++++++- src/gallium/drivers/nv50/nv50_program.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 845fc611905..2fe60ad51ff 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -93,6 +93,11 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { int i; + if (reg->type == P_RESULT) { + if (pc->p->cfg.high_result < (reg->hw + 1)) + pc->p->cfg.high_result = reg->hw + 1; + } + if (reg->type != P_TEMP) return; @@ -1558,6 +1563,8 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_method(so, tesla, 0x1650, 2); so_data (so, p->cfg.vp.attr[0]); so_data (so, p->cfg.vp.attr[1]); + so_method(so, tesla, 0x16b8, 1); + so_data (so, p->cfg.high_result); so_method(so, tesla, 0x16ac, 2); so_data (so, 8); so_data (so, p->cfg.high_temp); @@ -1594,9 +1601,10 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, 0x00000004); so_data (so, 0x00000000); so_data (so, 0x00000000); - so_method(so, tesla, 0x16bc, 2); /*XXX: fixme */ + so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */ so_data (so, 0x03020100); so_data (so, 0x07060504); + so_data (so, 0x0b0a0908); so_method(so, tesla, 0x1988, 2); so_data (so, 0x08040404); /* p: 0x0f000401 */ so_data (so, p->cfg.high_temp); diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index d143ae97979..d643e8db218 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -35,6 +35,7 @@ struct nv50_program { struct { unsigned high_temp; + unsigned high_result; struct { unsigned attr[2]; } vp; -- cgit v1.2.3 From cae38d0fcc6c936d3a4dc25ca2dbef3d106d05a5 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 16 Jun 2008 16:29:40 +1000 Subject: nv50: abuse constbuf upload for program upload --- src/gallium/drivers/nv50/nv50_context.h | 1 + src/gallium/drivers/nv50/nv50_program.c | 50 +++++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 5214ce1a69c..cbb473410a5 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -30,6 +30,7 @@ #define NV50_CB_PGP 3 #define NV50_CB_TIC 4 #define NV50_CB_TSC 5 +#define NV50_CB_PUPLOAD 6 #define NV50_NEW_BLEND (1 << 0) #define NV50_NEW_ZSA (1 << 1) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2fe60ad51ff..2cf6275730c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1494,8 +1494,10 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct pipe_winsys *ws = nv50->pipe.winsys; struct nv50_program_exec *e; + struct nouveau_stateobj *so; + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; + unsigned start, count, *up, *ptr; boolean upload = FALSE; - unsigned *map; if (!p->buffer) { p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); @@ -1522,20 +1524,44 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!upload) return FALSE; - map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { -#ifdef NV50_PROGRAM_DUMP - NOUVEAU_ERR("0x%08x\n", e->inst[0]); -#endif - *(map++) = e->inst[0]; - if (is_long(e)) { -#ifdef NV50_PROGRAM_DUMP - NOUVEAU_ERR("0x%08x\n", e->inst[1]); -#endif - *(map++) = e->inst[1]; + *(ptr++) = e->inst[0]; + if (is_long(e)) + *(ptr++) = e->inst[1]; + } + + so = so_new(3,2); + so_method(so, nv50->screen->tesla, 0x1280, 3); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); + + start = 0; count = p->exec_size; + while (count) { + struct nouveau_winsys *nvws = nv50->screen->nvws; + unsigned nr; + + so_emit(nvws, so); + + nr = MIN2(count, 2047); + nr = MIN2(nvws->channel->pushbuf->remaining, nr); + if (nvws->channel->pushbuf->remaining < (nr + 3)) { + FIRE_RING(NULL); + continue; } + + BEGIN_RING(tesla, 0x0f00, 1); + OUT_RING ((start << 8) | NV50_CB_PUPLOAD); + BEGIN_RING(tesla, 0x40000f04, nr); + OUT_RINGp (up + start, nr); + + start += nr; + count -= nr; } - ws->buffer_unmap(ws, p->buffer); + + FREE(up); + so_ref(NULL, &so); } void -- cgit v1.2.3 From bcbe6baac37915563bc120ad558cd930bc1ddec1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 16 Jun 2008 18:03:29 +1000 Subject: nv50: hacks for stuff I don't really get yet --- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2cf6275730c..fa5e24d3e90 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1592,7 +1592,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_method(so, tesla, 0x16b8, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, 0x16ac, 2); - so_data (so, 8); + so_data (so, p->cfg.high_result); //8); so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x140c, 1); so_data (so, 0); /* program start offset */ @@ -1632,7 +1632,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, 0x07060504); so_data (so, 0x0b0a0908); so_method(so, tesla, 0x1988, 2); - so_data (so, 0x08040404); /* p: 0x0f000401 */ + so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */ so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ -- cgit v1.2.3 From 431504b99cd55948522e86a249e656e78598ddbd Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 16 Jun 2008 18:56:39 +1000 Subject: nv50: hack of a TEX opcode --- src/gallium/drivers/nv50/nv50_program.c | 19 +++++++++++++++++++ src/gallium/drivers/nv50/nv50_screen.c | 5 +++++ 2 files changed, 24 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index fa5e24d3e90..21945410b17 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1134,6 +1134,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_TEX: + { + struct nv50_reg *t0, *t1; + struct nv50_program_exec *e; + + t0 = alloc_temp(pc, NULL); + t0 = alloc_temp(pc, NULL); + t1 = alloc_temp(pc, NULL); + emit_mov(pc, t0, src[0][0]); + emit_mov(pc, t1, src[0][1]); + + e = exec(pc); + e->inst[0] = 0xf0400000; + set_long(pc, e); + e->inst[1] |= 0x0000c004; + set_dst(pc, t0, e); + emit(pc, e); + free_temp(pc, t0); + free_temp(pc, t1); + } break; case TGSI_OPCODE_XPD: temp = alloc_temp(pc, NULL); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index b9cecb77f1f..8affb0f073f 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -289,6 +289,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, fui(0.0)); so_data (so, fui(1.0)); + so_method(so, screen->tesla, 0x1234, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x1458, 1); + so_data (so, 1); + so_emit(nvws, so); so_ref(NULL, &so); nvws->push_flush(nvws, 0, NULL); -- cgit v1.2.3 From 5d3070149267251bafc1ff982b77e7f422554f50 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 16 Jun 2008 22:02:02 +1000 Subject: nv50: use stateobjs for sampler/image_control uploads --- src/gallium/drivers/nouveau/nouveau_stateobj.h | 18 +++++++++++ src/gallium/drivers/nv50/nv50_state_validate.c | 45 +++++++++++++++----------- 2 files changed, 44 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index d501b76b51e..78b27380702 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -2,6 +2,7 @@ #define __NOUVEAU_STATEOBJ_H__ #include "pipe/p_util.h" +#include "pipe/p_debug.h" struct nouveau_stateobj_reloc { struct pipe_buffer *bo; @@ -67,6 +68,14 @@ so_data(struct nouveau_stateobj *so, unsigned data) so->cur_packet += 4; } +static INLINE void +so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +{ + so->cur_packet += (4 * size); + while (size--) + (*so->cur++) = (*data++); +} + static INLINE void so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, unsigned mthd, unsigned size) @@ -91,6 +100,15 @@ so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo, so_data(so, data); } +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ + unsigned i, nr = so->cur - so->push; + + for (i = 0; i < nr; i++) + debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); +} + static INLINE void so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) { diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 63c1756bcbf..b5a9195231b 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -181,34 +181,41 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & NV50_NEW_SAMPLER) { int i; - BEGIN_RING(tesla, 0x0f00, 1); - OUT_RING ((NV50_CB_TSC << 0) | (0 << 8)); - BEGIN_RING(tesla, 0x40000f04, nv50->sampler_nr * 8); + so = so_new(nv50->sampler_nr * 8 + 3, 0); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TSC); + so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8); for (i = 0; i < nv50->sampler_nr; i++) - OUT_RINGp(nv50->sampler[i], 8); + so_datap (so, nv50->sampler[i], 8); + so_emit(nvws, so); + so_ref(NULL, &so); } if (nv50->dirty & NV50_NEW_TEXTURE) { int i; - BEGIN_RING(tesla, 0x0f00, 1); - OUT_RING ((NV50_CB_TIC << 0) | (0 << 8)); - BEGIN_RING(tesla, 0x40000f04, nv50->miptree_nr * 8); - for (i = 0; i < nv50->sampler_nr; i++) { + so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TIC); + so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); + for (i = 0; i < nv50->miptree_nr; i++) { struct nv50_miptree *mt = nv50->miptree[i]; - OUT_RING (0x2a712488); - OUT_RELOCl(mt->buffer, 0, - NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW); - OUT_RING (0xd0c05000); - OUT_RING (0x00300000); - OUT_RING (mt->base.width[0]); - OUT_RING ((mt->base.depth[0] << 16) | - mt->base.height[0]); - OUT_RING (0x03000000); - OUT_RELOCh(mt->buffer, 0, - NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH); + so_data (so, 0x2a712488); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0xd0c05000); + so_data (so, 0x00300000); + so_data (so, mt->base.width[0]); + so_data (so, (mt->base.depth[0] << 16) | + mt->base.height[0]); + so_data (so, 0x03000000); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_HIGH, 0, 0); } + + so_emit(nvws, so); + so_ref(NULL, &so); } if (nv50->dirty & NV50_NEW_ARRAYS) -- cgit v1.2.3 From 94999d39d43d24a702f4cb55b515906d03a57277 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 16 Jun 2008 22:06:54 +1000 Subject: nv50: fix blend colour --- src/gallium/drivers/nv50/nv50_state_validate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index b5a9195231b..9d93c04da7d 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -135,11 +135,11 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { so = so_new(5, 0); - so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 8); - so_data (so, fui(nv50->blend_colour.color[3])); + so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); so_data (so, fui(nv50->blend_colour.color[0])); so_data (so, fui(nv50->blend_colour.color[1])); so_data (so, fui(nv50->blend_colour.color[2])); + so_data (so, fui(nv50->blend_colour.color[3])); so_emit(nvws, so); so_ref(NULL, &so); } -- cgit v1.2.3 From bb9efb5534a652878161e28bd73039eff5b11014 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 16 Jun 2008 22:24:16 +1000 Subject: nv50: separate state validation and upload, similar to nv40 --- src/gallium/drivers/nv50/nv50_context.h | 21 +++++++ src/gallium/drivers/nv50/nv50_program.c | 6 +- src/gallium/drivers/nv50/nv50_state_validate.c | 80 ++++++++++++++++++++------ src/gallium/drivers/nv50/nv50_vbo.c | 6 +- 4 files changed, 86 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index cbb473410a5..3e0e523ee3e 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -74,6 +74,25 @@ nv50_miptree(struct pipe_texture *pt) return (struct nv50_miptree *)pt; } +struct nv50_state { + unsigned dirty; + + struct nouveau_stateobj *fb; + struct nouveau_stateobj *blend; + struct nouveau_stateobj *blend_colour; + struct nouveau_stateobj *zsa; + struct nouveau_stateobj *rast; + struct nouveau_stateobj *stipple; + struct nouveau_stateobj *scissor; + struct nouveau_stateobj *viewport; + struct nouveau_stateobj *tsc_upload; + struct nouveau_stateobj *tic_upload; + struct nouveau_stateobj *vertprog; + struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *vtxfmt; + struct nouveau_stateobj *vtxbuf; +}; + struct nv50_context { struct pipe_context pipe; @@ -82,6 +101,8 @@ struct nv50_context { struct draw_context *draw; + struct nv50_state state; + unsigned dirty; struct nv50_blend_stateobj *blend; struct nv50_zsa_stateobj *zsa; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 21945410b17..7bb2f454bb9 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1615,8 +1615,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x140c, 1); so_data (so, 0); /* program start offset */ - so_emit(nv50->screen->nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.vertprog); } void @@ -1655,8 +1654,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ - so_emit(nv50->screen->nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.fragprog); } void diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 9d93c04da7d..566f95f682c 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -103,14 +103,59 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data (so, 0); so_data (so, h); - so_emit(nv50->screen->nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.fb); +} + +static void +nv50_state_emit(struct nv50_context *nv50) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_winsys *nvws = screen->nvws; + + if (nv50->pctx_id != screen->cur_pctx) { + nv50->state.dirty |= 0xffffffff; + screen->cur_pctx = nv50->pctx_id; + } + + if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER) + so_emit(nvws, nv50->state.fb); + if (nv50->state.dirty & NV50_NEW_BLEND) + so_emit(nvws, nv50->state.blend); + if (nv50->state.dirty & NV50_NEW_ZSA) + so_emit(nvws, nv50->state.zsa); + if (nv50->state.dirty & NV50_NEW_VERTPROG) + so_emit(nvws, nv50->state.vertprog); + if (nv50->state.dirty & NV50_NEW_FRAGPROG) + so_emit(nvws, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_RASTERIZER) + so_emit(nvws, nv50->state.rast); + if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) + so_emit(nvws, nv50->state.blend_colour); + if (nv50->state.dirty & NV50_NEW_STIPPLE) + so_emit(nvws, nv50->state.stipple); + if (nv50->state.dirty & NV50_NEW_SCISSOR) + so_emit(nvws, nv50->state.scissor); + if (nv50->state.dirty & NV50_NEW_VIEWPORT) + so_emit(nvws, nv50->state.viewport); + if (nv50->state.dirty & NV50_NEW_SAMPLER) + so_emit(nvws, nv50->state.tsc_upload); + if (nv50->state.dirty & NV50_NEW_TEXTURE) + so_emit(nvws, nv50->state.tic_upload); + if (nv50->state.dirty & NV50_NEW_ARRAYS) { + so_emit(nvws, nv50->state.vtxfmt); + so_emit(nvws, nv50->state.vtxbuf); + } + nv50->state.dirty = 0; + + so_emit_reloc_markers(nvws, nv50->state.fb); + so_emit_reloc_markers(nvws, nv50->state.vertprog); + so_emit_reloc_markers(nvws, nv50->state.fragprog); + so_emit_reloc_markers(nvws, nv50->state.vtxbuf); } boolean nv50_state_validate(struct nv50_context *nv50) { - struct nouveau_winsys *nvws = nv50->screen->nvws; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_stateobj *so; unsigned i; @@ -119,10 +164,10 @@ nv50_state_validate(struct nv50_context *nv50) nv50_state_validate_fb(nv50); if (nv50->dirty & NV50_NEW_BLEND) - so_emit(nvws, nv50->blend->so); + so_ref(nv50->blend->so, &nv50->state.blend); if (nv50->dirty & NV50_NEW_ZSA) - so_emit(nvws, nv50->zsa->so); + so_ref(nv50->zsa->so, &nv50->state.zsa); if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) nv50_vertprog_validate(nv50); @@ -131,7 +176,7 @@ nv50_state_validate(struct nv50_context *nv50) nv50_fragprog_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) - so_emit(nvws, nv50->rasterizer->so); + so_ref(nv50->rasterizer->so, &nv50->state.rast); if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { so = so_new(5, 0); @@ -140,8 +185,7 @@ nv50_state_validate(struct nv50_context *nv50) so_data (so, fui(nv50->blend_colour.color[1])); so_data (so, fui(nv50->blend_colour.color[2])); so_data (so, fui(nv50->blend_colour.color[3])); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.blend_colour); } if (nv50->dirty & NV50_NEW_STIPPLE) { @@ -149,8 +193,7 @@ nv50_state_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv50->stipple.stipple[i]); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.stipple); } if (nv50->dirty & NV50_NEW_SCISSOR) { @@ -160,8 +203,7 @@ nv50_state_validate(struct nv50_context *nv50) nv50->scissor.minx); so_data (so, (nv50->scissor.maxy << 16) | nv50->scissor.miny); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.scissor); } if (nv50->dirty & NV50_NEW_VIEWPORT) { @@ -174,8 +216,7 @@ nv50_state_validate(struct nv50_context *nv50) so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(-nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.viewport); } if (nv50->dirty & NV50_NEW_SAMPLER) { @@ -187,8 +228,7 @@ nv50_state_validate(struct nv50_context *nv50) so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8); for (i = 0; i < nv50->sampler_nr; i++) so_datap (so, nv50->sampler[i], 8); - so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &nv50->state.tsc_upload); } if (nv50->dirty & NV50_NEW_TEXTURE) { @@ -213,15 +253,17 @@ nv50_state_validate(struct nv50_context *nv50) so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); } - - so_emit(nvws, so); - so_ref(NULL, &so); + + so_ref(so, &nv50->state.tic_upload); } if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); + nv50->state.dirty |= nv50->dirty; nv50->dirty = 0; + nv50_state_emit(nv50); + return TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 7a1c2f24ef9..3f0b66ae365 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -208,9 +208,7 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_LOW, 0, 0); } - so_emit(nv50->screen->nvws, vtxfmt); - so_ref (NULL, &vtxfmt); - so_emit(nv50->screen->nvws, vtxbuf); - so_ref (NULL, &vtxbuf); + so_ref (vtxfmt, &nv50->state.vtxfmt); + so_ref (vtxbuf, &nv50->state.vtxbuf); } -- cgit v1.2.3 From 035a04d9c11e0e90e2dbcdba25f39c3156a64115 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 16 Jun 2008 22:34:50 +1000 Subject: nv50: get tri-scissor-tri working --- src/gallium/drivers/nv50/nv50_context.h | 1 + src/gallium/drivers/nv50/nv50_state_validate.c | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 3e0e523ee3e..4ea01009fb6 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -84,6 +84,7 @@ struct nv50_state { struct nouveau_stateobj *rast; struct nouveau_stateobj *stipple; struct nouveau_stateobj *scissor; + unsigned scissor_enabled; struct nouveau_stateobj *viewport; struct nouveau_stateobj *tsc_upload; struct nouveau_stateobj *tic_upload; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 566f95f682c..d99cdc73cac 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -196,15 +196,28 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(so, &nv50->state.stipple); } - if (nv50->dirty & NV50_NEW_SCISSOR) { + if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) { + struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe; + struct pipe_scissor_state *s = &nv50->scissor; + + if (nv50->state.scissor && + (rast->scissor == 0 && nv50->state.scissor_enabled == 0)) + goto scissor_uptodate; + nv50->state.scissor_enabled = rast->scissor; + so = so_new(3, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); - so_data (so, (nv50->scissor.maxx << 16) | - nv50->scissor.minx); - so_data (so, (nv50->scissor.maxy << 16) | - nv50->scissor.miny); + so_method(so, tesla, 0xff4, 2); //NV50TCL_SCISSOR_HORIZ, 2); + if (nv50->state.scissor_enabled) { + so_data(so, ((s->maxx - s->minx) << 16) | s->minx); + so_data(so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data(so, (8192 << 16)); + so_data(so, (8192 << 16)); + } so_ref(so, &nv50->state.scissor); + nv50->state.dirty |= NV50_NEW_SCISSOR; } +scissor_uptodate: if (nv50->dirty & NV50_NEW_VIEWPORT) { so = so_new(8, 0); -- cgit v1.2.3 From 598b2a51052913521e3059cdef7cf0c66a5adb90 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 17 Jun 2008 01:36:36 +1000 Subject: nv50: make TEX a halfie --- src/gallium/drivers/nv50/nv50_program.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7bb2f454bb9..5ef09542397 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1145,9 +1145,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mov(pc, t1, src[0][1]); e = exec(pc); - e->inst[0] = 0xf0400000; - set_long(pc, e); - e->inst[1] |= 0x0000c004; + e->inst[0] = 0xf0400100; set_dst(pc, t0, e); emit(pc, e); free_temp(pc, t0); -- cgit v1.2.3 From fd7412a7f1beab8b81ce307b1054331eee102e8b Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 17 Jun 2008 01:51:13 +1000 Subject: nv50: some people are just born stupid.. really.. --- src/gallium/drivers/nv50/nv50_program.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5ef09542397..ba60b8c5338 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1135,21 +1135,33 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) break; case TGSI_OPCODE_TEX: { - struct nv50_reg *t0, *t1; + struct nv50_reg *t0, *t1, *t2, *t3; struct nv50_program_exec *e; t0 = alloc_temp(pc, NULL); t0 = alloc_temp(pc, NULL); t1 = alloc_temp(pc, NULL); + t2 = alloc_temp(pc, NULL); + t3 = alloc_temp(pc, NULL); emit_mov(pc, t0, src[0][0]); emit_mov(pc, t1, src[0][1]); e = exec(pc); - e->inst[0] = 0xf0400100; + e->inst[0] = 0xf0400000; + set_long(pc, e); + e->inst[1] |= 0x0000c004; set_dst(pc, t0, e); emit(pc, e); + + if (mask & (1 << 0)) emit_mov(pc, dst[0], t0); + if (mask & (1 << 1)) emit_mov(pc, dst[1], t1); + if (mask & (1 << 2)) emit_mov(pc, dst[2], t2); + if (mask & (1 << 3)) emit_mov(pc, dst[3], t3); + free_temp(pc, t0); free_temp(pc, t1); + free_temp(pc, t2); + free_temp(pc, t3); } break; case TGSI_OPCODE_XPD: -- cgit v1.2.3 From 65ad8176ca91b5ed2a01b1b3ee145cfdce369419 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 17 Jun 2008 23:43:38 +1000 Subject: nv50: move surface_map/unmap into nv50_surface.c --- src/gallium/drivers/nv50/nv50_screen.c | 26 +------------------------ src/gallium/drivers/nv50/nv50_screen.h | 2 ++ src/gallium/drivers/nv50/nv50_surface.c | 34 +++++++++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 8affb0f073f..18f22c59609 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -118,28 +118,6 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) } } -static void * -nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, - unsigned flags ) -{ - struct pipe_winsys *ws = screen->winsys; - void *map; - - map = ws->buffer_map(ws, surface->buffer, flags); - if (!map) - return NULL; - - return map + surface->offset; -} - -static void -nv50_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) -{ - struct pipe_winsys *ws = screen->winsys; - - ws->buffer_unmap(ws, surface->buffer); -} - static void nv50_screen_destroy(struct pipe_screen *pscreen) { @@ -309,10 +287,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.is_format_supported = nv50_screen_is_format_supported; - screen->pipe.surface_map = nv50_surface_map; - screen->pipe.surface_unmap = nv50_surface_unmap; - nv50_screen_init_miptree_functions(&screen->pipe); + nv50_surface_init_screen_functions(&screen->pipe); return &screen->pipe; } diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 5acb5003ba4..08d1f450891 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -26,4 +26,6 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +void nv50_surface_init_screen_functions(struct pipe_screen *); + #endif diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 4e294198b07..acd7b501ef4 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -57,9 +57,39 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, nvws->surface_fill(nvws, dest, destx, desty, width, height, value); } +static void * +nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv50_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + void nv50_init_surface_functions(struct nv50_context *nv50) { - nv50->pipe.surface_copy = nv50_surface_copy; - nv50->pipe.surface_fill = nv50_surface_fill; + nv50->pipe.surface_copy = nv50_surface_copy; + nv50->pipe.surface_fill = nv50_surface_fill; } + +void +nv50_surface_init_screen_functions(struct pipe_screen *pscreen) +{ + pscreen->surface_map = nv50_surface_map; + pscreen->surface_unmap = nv50_surface_unmap; +} + -- cgit v1.2.3 From 3b88c3f4112a8bac52b7f7e613b1c2df8a14b752 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 17 Jun 2008 23:52:41 +1000 Subject: nv50: R32_FLOAT vbo format --- src/gallium/drivers/nv50/nv50_vbo.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 3f0b66ae365..c16bfe3a937 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -185,6 +185,9 @@ nv50_vbo_validate(struct nv50_context *nv50) case PIPE_FORMAT_R32G32_FLOAT: so_data(vtxfmt, 0x7e200000 | i); break; + case PIPE_FORMAT_R32_FLOAT: + so_data(vtxfmt, 0x7e900000 | i); + break; case PIPE_FORMAT_R8G8B8A8_UNORM: so_data(vtxfmt, 0x24500000 | i); break; -- cgit v1.2.3 From 714cb4a86c1f503334b37ca6c24272fa1bdf7899 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 17 Jun 2008 23:55:23 +1000 Subject: nv50: make sure static buffers (constbuf, tex control etc) get on reloc list --- src/gallium/drivers/nv50/nv50_screen.c | 2 +- src/gallium/drivers/nv50/nv50_screen.h | 2 ++ src/gallium/drivers/nv50/nv50_state_validate.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 18f22c59609..5b4c5f96ac3 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -273,7 +273,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 1); so_emit(nvws, so); - so_ref(NULL, &so); + so_ref(so, &screen->static_init); nvws->push_flush(nvws, 0, NULL); screen->pipe.winsys = ws; diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 08d1f450891..400ddcef06d 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -18,6 +18,8 @@ struct nv50_screen { struct pipe_buffer *tic; struct pipe_buffer *tsc; + + struct nouveau_stateobj *static_init; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index d99cdc73cac..8229bce89ee 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -151,6 +151,7 @@ nv50_state_emit(struct nv50_context *nv50) so_emit_reloc_markers(nvws, nv50->state.vertprog); so_emit_reloc_markers(nvws, nv50->state.fragprog); so_emit_reloc_markers(nvws, nv50->state.vtxbuf); + so_emit_reloc_markers(nvws, nv50->screen->static_init); } boolean -- cgit v1.2.3 From 5a3ea9ee59ac586955f7784eb25e7fd70d0c8882 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 18 Jun 2008 13:23:00 +1000 Subject: nv50: simplify emit_interp a bit --- src/gallium/drivers/nv50/nv50_program.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index ba60b8c5338..3248f2aa3c3 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -302,18 +302,19 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) static void emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, - struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) + struct nv50_reg *src, struct nv50_reg *iv) { struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x80000000; set_dst(pc, dst, e); - alloc_reg(pc, iv); - e->inst[0] |= (iv->hw << 9); alloc_reg(pc, src); e->inst[0] |= (src->hw << 16); - if (noperspective) + if (iv) { e->inst[0] |= (1 << 25); + alloc_reg(pc, iv); + e->inst[0] |= (iv->hw << 9); + } emit(pc, e); } @@ -1147,7 +1148,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mov(pc, t1, src[0][1]); e = exec(pc); - e->inst[0] = 0xf0400000; + e->inst[0] = 0xf6400000; set_long(pc, e); e->inst[1] |= 0x0000c004; set_dst(pc, t0, e); @@ -1302,7 +1303,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type == PIPE_SHADER_FRAGMENT) { iv = alloc_temp(pc, NULL); - emit_interp(pc, iv, iv, iv, FALSE); + emit_interp(pc, iv, iv, NULL); emit_flop(pc, 0, iv, iv); aid++; } @@ -1329,10 +1330,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type != PIPE_SHADER_FRAGMENT) continue; - emit_interp(pc, &a[0], &a[0], iv, TRUE); - emit_interp(pc, &a[1], &a[1], iv, TRUE); - emit_interp(pc, &a[2], &a[2], iv, TRUE); - emit_interp(pc, &a[3], &a[3], iv, TRUE); + emit_interp(pc, &a[0], &a[0], iv); + emit_interp(pc, &a[1], &a[1], iv); + emit_interp(pc, &a[2], &a[2], iv); + emit_interp(pc, &a[3], &a[3], iv); } if (iv) -- cgit v1.2.3 From e90130257527aff43f807ae16d802c5515d29e8e Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 23 Jun 2008 23:42:53 +1000 Subject: nv50: remove some debug --- src/gallium/drivers/nv50/nv50_program.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 3248f2aa3c3..148de3b7eaf 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -883,8 +883,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) unsigned mask, sat; int i, c; - NOUVEAU_ERR("insn %p\n", tok); - mask = inst->FullDstRegisters[0].DstRegister.WriteMask; sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; @@ -1277,7 +1275,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - NOUVEAU_ERR("%d temps\n", pc->temp_nr); if (pc->temp_nr) { pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); if (!pc->temp) @@ -1292,7 +1289,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); if (pc->attr_nr) { struct nv50_reg *iv = NULL; int aid = 0; @@ -1340,7 +1336,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) free_temp(pc, iv); } - NOUVEAU_ERR("%d result regs\n", pc->result_nr); if (pc->result_nr) { int rid = 0; @@ -1362,7 +1357,6 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - NOUVEAU_ERR("%d param regs\n", pc->param_nr); if (pc->param_nr) { int rid = 0; -- cgit v1.2.3 From 47771bcd2fb5bcfecfa076c19360436351c21c95 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 23 Jun 2008 23:43:34 +1000 Subject: nv50: maintain pipe surface status field --- src/gallium/drivers/nv50/nv50_clear.c | 1 + src/gallium/drivers/nv50/nv50_state_validate.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c index 552b92f72e2..f35087b37ef 100644 --- a/src/gallium/drivers/nv50/nv50_clear.c +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -9,4 +9,5 @@ nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 8229bce89ee..f5c734699fc 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -157,10 +157,17 @@ nv50_state_emit(struct nv50_context *nv50) boolean nv50_state_validate(struct nv50_context *nv50) { + const struct pipe_framebuffer_state *fb = &nv50->framebuffer; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_stateobj *so; unsigned i; + for (i = 0; i < fb->num_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + if (nv50->dirty & NV50_NEW_FRAMEBUFFER) nv50_state_validate_fb(nv50); -- cgit v1.2.3 From 5a3362521de5e17e4f340fd9136af1d5e3891e23 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 25 Jun 2008 03:56:57 +1000 Subject: nv50: turn on depth test/write again, not 100% but winsys handles it better --- src/gallium/drivers/nv50/nv50_miptree.c | 7 ++++--- src/gallium/drivers/nv50/nv50_state.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index cdd98844f94..6ee09fc4dd4 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -10,7 +10,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { struct pipe_winsys *ws = pscreen->winsys; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); - unsigned usage; + unsigned usage, pitch; NOUVEAU_ERR("unimplemented\n"); @@ -28,8 +28,9 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) break; } - mt->buffer = ws->buffer_create(ws, 256, usage, - pt->width[0] * pt->cpp * pt->height[0]); + pitch = ((pt->width[0] + 63) & ~63) * pt->cpp; + + mt->buffer = ws->buffer_create(ws, 256, usage, pitch * pt->height[0]); if (!mt->buffer) { FREE(mt); return NULL; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index f36299db4d2..1b765cb5c7d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -272,10 +272,10 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, struct nouveau_stateobj *so = so_new(64, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); - so_data (so, 0); //cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.writemask ? 1 : 0); if (cso->depth.enabled) { so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); - so_data (so, 0); //1); + so_data (so, 1); so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); so_data (so, nvgl_comparison_op(cso->depth.func)); } else { -- cgit v1.2.3 From 2c2cb8646168c8709e51d7ff583a86044e3f2040 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 25 Jun 2008 04:53:34 +1000 Subject: nv50: rework miptree/texture/texsurf code a bit --- src/gallium/drivers/nv50/nv50_miptree.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 6ee09fc4dd4..ec6e09aea08 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -12,8 +12,6 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); unsigned usage, pitch; - NOUVEAU_ERR("unimplemented\n"); - mt->base = *pt; mt->base.refcount = 1; mt->base.screen = pscreen; @@ -45,9 +43,8 @@ nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) struct pipe_winsys *ws = pscreen->winsys; struct pipe_texture *pt = *ppt; - NOUVEAU_ERR("unimplemented\n"); - *ppt = NULL; + if (--pt->refcount <= 0) { struct nv50_miptree *mt = nv50_miptree(pt); @@ -65,13 +62,9 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, struct nv50_miptree *mt = nv50_miptree(pt); struct pipe_surface *ps; - NOUVEAU_ERR("unimplemented\n"); - - ps = ws->surface_alloc(ws); - if (!ps) - return NULL; - - pipe_buffer_reference(ws, &ps->buffer, mt->buffer); + ps = CALLOC_STRUCT(pipe_surface); + ps->refcount = 1; + ps->winsys = ws; ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -80,6 +73,11 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->nblocksy = pt->nblocksy[level]; ps->stride = ps->width * ps->block.size; ps->offset = 0; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(ws, &ps->buffer, mt->buffer); return ps; } @@ -88,6 +86,16 @@ static void nv50_miptree_surface_del(struct pipe_screen *pscreen, struct pipe_surface **psurface) { + struct pipe_winsys *ws = pscreen->winsys; + struct pipe_surface *surf = *psurface; + + *psurface = NULL; + + if (--surf->refcount <= 0) { + pipe_texture_reference(&surf->texture, NULL); + pipe_buffer_reference(ws, &surf->buffer, NULL); + FREE(surf); + } } void -- cgit v1.2.3 From e002ad77398fbe14a0efbd91824c3325ca09b4c1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 25 Jun 2008 04:57:27 +1000 Subject: nv50: whoops --- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 148de3b7eaf..bc45fe0bdfd 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1555,7 +1555,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) *(ptr++) = e->inst[1]; } - so = so_new(3,2); + so = so_new(4,2); so_method(so, nv50->screen->tesla, 0x1280, 3); so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); @@ -1604,7 +1604,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(11, 2); + so = so_new(13, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); -- cgit v1.2.3 From 95d64ceb5a2b20032e757d6c1b0b5ef5e2b973e2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 25 Jun 2008 05:11:46 +1000 Subject: nv50: vpt translate/scale backwards --- src/gallium/drivers/nv50/nv50_state_validate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f5c734699fc..eba13f8e621 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -229,11 +229,11 @@ scissor_uptodate: if (nv50->dirty & NV50_NEW_VIEWPORT) { so = so_new(8, 0); - so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); so_data (so, fui(nv50->viewport.translate[0])); so_data (so, fui(nv50->viewport.translate[1])); so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(-nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); -- cgit v1.2.3 From e05f67cbe6d852d01da3c4e0c4d52b28723f3684 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 25 Jun 2008 05:17:08 +1000 Subject: nv50: maybe some scissor fixes.. --- src/gallium/drivers/nv50/nv50_state_validate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index eba13f8e621..cf5a071a8d6 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -96,7 +96,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); so_data (so, w << 16); so_data (so, h << 16); - so_method(so, tesla, 0xff4, 2); + so_method(so, tesla, 0x0e04, 2); so_data (so, w << 16); so_data (so, h << 16); so_method(so, tesla, 0xdf8, 2); @@ -214,7 +214,7 @@ nv50_state_validate(struct nv50_context *nv50) nv50->state.scissor_enabled = rast->scissor; so = so_new(3, 0); - so_method(so, tesla, 0xff4, 2); //NV50TCL_SCISSOR_HORIZ, 2); + so_method(so, tesla, 0x0ff4, 2); if (nv50->state.scissor_enabled) { so_data(so, ((s->maxx - s->minx) << 16) | s->minx); so_data(so, ((s->maxy - s->miny) << 16) | s->miny); -- cgit v1.2.3 From fea9eb284248adda65afdc3833385d4b03bb25aa Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 25 Jun 2008 05:58:35 +1000 Subject: nv50: don't multiply polygon offset units by 2.0 like on nv40 --- src/gallium/drivers/nv50/nv50_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 1b765cb5c7d..cffcbc3e7d3 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -237,7 +237,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); so_data (so, fui(cso->offset_scale)); so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); - so_data (so, fui(cso->offset_units * 2)); + so_data (so, fui(cso->offset_units)); } rso->pipe = *cso; -- cgit v1.2.3 From e52d37d56c91e152bc149230410ed700ff1cffe2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 25 Jun 2008 06:01:30 +1000 Subject: nv50: fix line stipple --- src/gallium/drivers/nv50/nv50_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index cffcbc3e7d3..d72886ac596 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -157,7 +157,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1); - so_data (so, (cso->line_stipple_pattern << 16) | + so_data (so, (cso->line_stipple_pattern << 8) | cso->line_stipple_factor); } else { so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); -- cgit v1.2.3 From 70f0f0ebdfa40de0fe03ca94294d372b9fa4642d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 25 Jun 2008 06:07:02 +1000 Subject: nv50: reverse stencil sides, header is wrong --- src/gallium/drivers/nv50/nv50_state.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index d72886ac596..665e3cc80d6 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -283,24 +283,25 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } + /*XXX: yes, I know they're backwards.. header needs fixing */ if (cso->stencil[0].enabled) { - so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 5); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 3); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); so_data (so, cso->stencil[0].ref_value); so_data (so, cso->stencil[0].write_mask); so_data (so, cso->stencil[0].value_mask); } else { - so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 8); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); @@ -310,7 +311,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, cso->stencil[1].write_mask); so_data (so, cso->stencil[1].value_mask); } else { - so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } -- cgit v1.2.3 From bf94027fdde51aed476e9bfdd4326aa9040440b0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Sun, 29 Jun 2008 15:59:24 +1000 Subject: nv50: fixes after rebase + commits note on the code that was just pushed. OK, seems a lot of people have been getting the idea that nouveau is dying lately - I decided to commit some of the work I've been doing lately to prove them wrong :) Progress on my side is slow due to lack of time mainly, but I'm still around. Firstly, don't even bother trying to use gallium on G8x/G9x yet, it won't work. I've deliberately left all the necessary winsys changes out of the commits for a very good reason - I don't know what we're going to need from the DRM exactly yet and don't want to be continually breaking interfaces as I discover additional requirements. On my side, I think I've gone through about 3 different DRM interface changes, and have just discovered that I may need more yet. It'd be very annoying for everyone who uses nouveau to keep things in sync. Once I've got it sorted - I'll commit a lot of cool stuff. Stay tuned. Also, don't look at the shader code.. it's horribly nasty and full of hacks, I used it as an opportunity to learn G8x GPU programs at the same time. New semi-decent code is in works, and will follow at some point. :) --- src/gallium/drivers/nv50/nv50_miptree.c | 2 +- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- src/gallium/drivers/nv50/nv50_state.c | 4 ++-- src/gallium/winsys/dri/nouveau/nouveau_screen.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index ec6e09aea08..f936a1f0e20 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -26,7 +26,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) break; } - pitch = ((pt->width[0] + 63) & ~63) * pt->cpp; + pitch = ((pt->width[0] + 63) & ~63) * pt->block.size; mt->buffer = ws->buffer_create(ws, 256, usage, pitch * pt->height[0]); if (!mt->buffer) { diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index bc45fe0bdfd..cd6967b3668 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1240,7 +1240,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) unsigned last; d = &p.FullToken.FullDeclaration; - last = d->u.DeclarationRange.Last; + last = d->DeclarationRange.Last; switch (d->Declaration.File) { case TGSI_FILE_TEMPORARY: @@ -1546,7 +1546,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) } if (!upload) - return FALSE; + return; up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 665e3cc80d6..743de089d6f 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -121,9 +121,9 @@ nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, int i; for (i = 0; i < nr; i++) - pipe_texture_reference(&nv50->miptree[i], pt[i]); + pipe_texture_reference((void *)&nv50->miptree[i], pt[i]); for (i = nr; i < nv50->miptree_nr; i++) - pipe_texture_reference(&nv50->miptree[i], NULL); + pipe_texture_reference((void *)&nv50->miptree[i], NULL); nv50->miptree_nr = nr; nv50->dirty |= NV50_NEW_TEXTURE; diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.c b/src/gallium/winsys/dri/nouveau/nouveau_screen.c index b15ee7509cc..df1fe7e69b4 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.c @@ -13,7 +13,7 @@ #include "nouveau_screen.h" #include "nouveau_swapbuffers.h" -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 10 +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 11 #error nouveau_drm.h version does not match expected version #endif -- cgit v1.2.3 From 8fb4d602db48d425380e5508e3fd71cbdc2e7095 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 2 Jul 2008 19:05:18 -0600 Subject: gallium: nr_attrs was off by one, updated comments, minor code movement --- src/gallium/drivers/softpipe/sp_setup.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 543d86a5cb9..e99df9d0181 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1266,12 +1266,14 @@ void setup_prepare( struct setup_context *setup ) sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; } + /* Note: nr_attrs is only used for debugging (vertex printing) */ { const struct sp_fragment_shader *fs = setup->softpipe->fs; - setup->quad.nr_attrs = fs->info.num_inputs; - sp->quad.first->begin(sp->quad.first); + setup->quad.nr_attrs = fs->info.num_inputs + 1; /* +1 for vert pos */ } + sp->quad.first->begin(sp->quad.first); + if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { -- cgit v1.2.3 From 36488ed052a18f7eafef1d1c5c18b20ad508b2b7 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 20:58:31 +0200 Subject: nv30: Emit framebuffer state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 3 + src/gallium/drivers/nv30/nv30_state.c | 105 +-------------------------- src/gallium/drivers/nv30/nv30_state_emit.c | 111 ++++++++++++++++++++--------- src/gallium/drivers/nv30/nv30_state_fb.c | 35 ++------- src/gallium/drivers/nv30/nv30_vbo.c | 2 + 5 files changed, 92 insertions(+), 164 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index dce077b2b4b..4a6c1d2f47b 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -99,6 +99,7 @@ struct nv30_state { unsigned stipple_enabled; unsigned viewport_bypass; + uint64_t dirty; struct nouveau_stateobj *hw[NV30_STATE_MAX]; }; @@ -208,8 +209,10 @@ extern void nv30_fragprog_destroy(struct nv30_context *, extern void nv30_fragtex_bind(struct nv30_context *); /* nv30_state.c and friends */ +extern boolean nv30_state_validate(struct nv30_context *nv30); extern void nv30_emit_hw_state(struct nv30_context *nv30); extern void nv30_state_tex_update(struct nv30_context *nv30); +extern struct nv30_state_entry nv30_state_framebuffer; /* nv30_vbo.c */ extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index c1618041bb0..3edfb0874da 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -565,110 +565,9 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nv30_context *nv30 = nv30_context(pipe); - struct pipe_surface *rt[2], *zeta = NULL; - uint32_t rt_enable, rt_format, w = 0, h = 0; - int i, colour_format = 0, zeta_format = 0; - - rt_enable = 0; - for (i = 0; i < 2; i++) { - if (!fb->cbufs[i]) - continue; - - if (colour_format) { - assert(w == fb->cbufs[i]->width); - assert(h == fb->cbufs[i]->height); - assert(colour_format == fb->cbufs[i]->format); - } else { - w = fb->cbufs[i]->width; - h = fb->cbufs[i]->height; - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - rt[i] = fb->cbufs[i]; - } - } - - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV34TCL_RT_ENABLE_COLOR2 | - NV34TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV34TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - if (colour_format) { - assert(w == fb->zsbuf->width); - assert(h == fb->zsbuf->height); - } else { - w = fb->zsbuf->width; - h = fb->zsbuf->height; - } - - zeta_format = fb->zsbuf->format; - zeta = fb->zsbuf; - } - - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { - uint32_t pitch = rt[0]->stride; - if (zeta) { - pitch |= (zeta->stride << 16); - } else { - pitch |= (pitch << 16); - } - - BEGIN_RING(rankine, NV34TCL_COLOR0_PITCH, 1); - OUT_RING (pitch); - nv30->rt[0] = rt[0]->buffer; - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - BEGIN_RING(rankine, NV34TCL_COLOR1_PITCH, 1); - OUT_RING (rt[1]->stride); - nv30->rt[1] = rt[1]->buffer; - } - - if (zeta_format) - { - nv30->zeta = zeta->buffer; - } - nv30->rt_enable = rt_enable; - BEGIN_RING(rankine, NV34TCL_RT_ENABLE, 1); - OUT_RING (rt_enable); - BEGIN_RING(rankine, NV34TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - OUT_RING (rt_format); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_HORIZ, 2); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1); - OUT_RING (0); + nv30->framebuffer = *fb; + nv30->dirty |= NV30_NEW_FB; } static void diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index eda2fd45f56..9a3954594a7 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -1,10 +1,55 @@ #include "nv30_context.h" #include "nv30_state.h" +static struct nv30_state_entry *render_states[] = { + &nv30_state_framebuffer, + NULL +}; + +static void +nv30_state_do_validate(struct nv30_context *nv30, + struct nv30_state_entry **states) +{ + const struct pipe_framebuffer_state *fb = &nv30->framebuffer; + unsigned i; + + for (i = 0; i < fb->num_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + while (*states) { + struct nv30_state_entry *e = *states; + + if (nv30->dirty & e->dirty.pipe) { + if (e->validate(nv30)) { + nv30->state.dirty |= (1ULL << e->dirty.hw); + } + } + + states++; + } + +/* TODO: uncomment when finished converting + nv30->dirty = 0; +*/ +} + void nv30_emit_hw_state(struct nv30_context *nv30) { - int i; + struct nv30_state *state = &nv30->state; + unsigned i; + uint64 states; + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nv30->screen->state[i]); + if (state->hw[i]) + so_emit(nv30->nvws, nv30->screen->state[i]); + states &= ~(1ULL << i); + } if (nv30->dirty & NV30_NEW_FRAGPROG) { nv30_fragprog_bind(nv30, nv30->fragprog.current); @@ -28,37 +73,7 @@ nv30_emit_hw_state(struct nv30_context *nv30) nv30->dirty_samplers = 0; - /* Emit relocs for every referenced buffer. - * This is to ensure the bufmgr has an accurate idea of how - * the buffer is used. This isn't very efficient, but we don't - * seem to take a significant performance hit. Will be improved - * at some point. Vertex arrays are emitted by nv30_vbo.c - */ - - /* Render targets */ - if (nv30->rt_enable & NV34TCL_RT_ENABLE_COLOR0) { - BEGIN_RING(rankine, NV34TCL_DMA_COLOR0, 1); - OUT_RELOCo(nv30->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(rankine, NV34TCL_COLOR0_OFFSET, 1); - OUT_RELOCl(nv30->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - } - - if (nv30->rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - BEGIN_RING(rankine, NV34TCL_DMA_COLOR1, 1); - OUT_RELOCo(nv30->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(rankine, NV34TCL_COLOR1_OFFSET, 1); - OUT_RELOCl(nv30->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - } - - if (nv30->zeta) { - BEGIN_RING(rankine, NV34TCL_DMA_ZETA, 1); - OUT_RELOCo(nv30->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(rankine, NV34TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv30->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - /* XXX allocate LMA */ -/* BEGIN_RING(rankine, NV34TCL_LMA_DEPTH_OFFSET, 1); - OUT_RING(0);*/ - } + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); /* Texture images, emitted in nv30_fragtex_build */ #if 0 @@ -83,3 +98,35 @@ nv30_emit_hw_state(struct nv30_context *nv30) NV34TCL_FP_ACTIVE_PROGRAM_DMA1); } +boolean +nv30_state_validate(struct nv30_context *nv30) +{ +#if 0 + boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE; + + if (nv30->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nv30->fallback_swtnl & nv30->dirty) + != nv30->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nv30->pipe.flush(&nv30->pipe, 0, NULL); + nv30->dirty |= (NV30_NEW_VIEWPORT | + NV30_NEW_VERTPROG | + NV30_NEW_ARRAYS); + nv30->render_mode = HW; + } +#endif + nv30_state_do_validate(nv30, render_states); +#if 0 + if (nv30->fallback_swtnl || nv30->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); +#endif + return TRUE; +} diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index a20df9f75d1..d93e2bb5c89 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -4,7 +4,7 @@ static boolean nv30_state_framebuffer_validate(struct nv30_context *nv30) { struct pipe_framebuffer_state *fb = &nv30->framebuffer; - struct pipe_surface *rt[4], *zeta = NULL; + struct pipe_surface *rt[2], *zeta = NULL; uint32_t rt_enable, rt_format; int i, colour_format = 0, zeta_format = 0; struct nouveau_stateobj *so = so_new(64, 10); @@ -23,8 +23,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) } } - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | NV34TCL_RT_ENABLE_COLOR2 | - NV34TCL_RT_ENABLE_COLOR3)) + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) rt_enable |= NV34TCL_RT_ENABLE_MRT; if (fb->zsbuf) { @@ -86,31 +85,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) NOUVEAU_BO_LOW, 0, 0); so_data (so, rt[1]->stride); } -/* - if (rt_enable & NV34TCL_RT_ENABLE_COLOR2) { - so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR2, 1); - so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv30->nvws->channel->vram->handle, - nv30->nvws->channel->gart->handle); - so_method(so, nv30->screen->rankine, NV34TCL_COLOR2_OFFSET, 1); - so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv30->screen->rankine, NV34TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch * rt[2]->cpp); - } - if (rt_enable & NV34TCL_RT_ENABLE_COLOR3) { - so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR3, 1); - so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, - nv30->nvws->channel->vram->handle, - nv30->nvws->channel->gart->handle); - so_method(so, nv30->screen->rankine, NV34TCL_COLOR3_OFFSET, 1); - so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | - NOUVEAU_BO_LOW, 0, 0); - so_method(so, nv30->screen->rankine, NV34TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch * rt[3]->cpp); - } -*/ if (zeta_format) { so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1); so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, @@ -119,8 +94,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1); so_reloc (so, zeta->buffer, zeta->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - /*so_method(so, nv30->screen->rankine, NV34TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch * zeta->cpp);*/ + /* TODO: allocate LMA depth buffer */ } so_method(so, nv30->screen->rankine, NV34TCL_RT_ENABLE, 1); @@ -137,6 +111,9 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) so_data (so, ((h - 1) << 16) | 0); so_method(so, nv30->screen->rankine, 0x1d88, 1); so_data (so, (1 << 12) | h); + /* Wonder why this is needed, context should all be set to zero on init */ + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1); + so_data (so, 0); so_ref(so, &nv30->state.hw[NV30_STATE_FB]); return TRUE; diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 359e443bb1c..d23164eb485 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -154,6 +154,8 @@ nv30_vbo_validate_state(struct nv30_context *nv30, { unsigned inputs; + nv30_state_validate(nv30); + nv30_emit_hw_state(nv30); if (nv30->dirty & NV30_NEW_ARRAYS) { -- cgit v1.2.3 From 52cf7a6c1ccc987859834b640a5ec0a62f84134a Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 21:11:07 +0200 Subject: nv30: Emit blend color state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 1 + src/gallium/drivers/nv30/nv30_state.c | 7 ++----- src/gallium/drivers/nv30/nv30_state_emit.c | 1 + 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 4a6c1d2f47b..57cc991cd7a 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -212,6 +212,7 @@ extern void nv30_fragtex_bind(struct nv30_context *); extern boolean nv30_state_validate(struct nv30_context *nv30); extern void nv30_emit_hw_state(struct nv30_context *nv30); extern void nv30_state_tex_update(struct nv30_context *nv30); +extern struct nv30_state_entry nv30_state_blend_colour; extern struct nv30_state_entry nv30_state_framebuffer; /* nv30_vbo.c */ diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 3edfb0874da..e67a701809a 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -531,11 +531,8 @@ nv30_set_blend_color(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); - BEGIN_RING(rankine, NV34TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0)); + nv30->blend_colour = *bcol; + nv30->dirty |= NV30_NEW_BCOL; } static void diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 9a3954594a7..541ad0da3e8 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -3,6 +3,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_framebuffer, + &nv30_state_blend_colour, NULL }; -- cgit v1.2.3 From c0e9eb3b095c9769d3deacf4ad4470bd155acdcd Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 21:25:47 +0200 Subject: nv30: Emit blend state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 6 +-- src/gallium/drivers/nv30/nv30_state.c | 84 +++++++++++++++-------------- src/gallium/drivers/nv30/nv30_state.h | 14 ----- src/gallium/drivers/nv30/nv30_state_blend.c | 2 +- src/gallium/drivers/nv30/nv30_state_emit.c | 1 + 5 files changed, 48 insertions(+), 59 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 57cc991cd7a..eedebc91b23 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -87,8 +87,7 @@ struct nv30_zsa_state { struct nouveau_stateobj *so; }; -/* TODO: rename when removing the old state emitter */ -struct nv30_blend_state_new { +struct nv30_blend_state { struct pipe_blend_state pipe; struct nouveau_stateobj *so; }; @@ -125,7 +124,7 @@ struct nv30_context { unsigned vp_samplers; /* Context state */ - struct nv30_blend_state_new *blend; + struct nv30_blend_state *blend; struct pipe_blend_color blend_colour; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; @@ -212,6 +211,7 @@ extern void nv30_fragtex_bind(struct nv30_context *); extern boolean nv30_state_validate(struct nv30_context *nv30); extern void nv30_emit_hw_state(struct nv30_context *nv30); extern void nv30_state_tex_update(struct nv30_context *nv30); +extern struct nv30_state_entry nv30_state_blend; extern struct nv30_state_entry nv30_state_blend_colour; extern struct nv30_state_entry nv30_state_framebuffer; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index e67a701809a..cf46dcef936 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -9,63 +9,65 @@ static void * nv30_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { - struct nv30_blend_state *cb; - - cb = malloc(sizeof(struct nv30_blend_state)); - - cb->b_enable = cso->blend_enable ? 1 : 0; - cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | - (nvgl_blend_func(cso->rgb_src_factor))); - cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | - (nvgl_blend_func(cso->rgb_dst_factor))); - cb->b_eqn = ((nvgl_blend_eqn(cso->alpha_func) << 16) | - (nvgl_blend_eqn(cso->rgb_func))); + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(16, 0); + + if (cso->blend_enable) { + so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | + nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rgb_dst_factor)); + so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 | + nvgl_blend_eqn(cso->rgb_func)); + } else { + so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1); + so_data (so, 0); + } - cb->l_enable = cso->logicop_enable ? 1 : 0; - cb->l_op = nvgl_logicop_func(cso->logicop_func); + so_method(so, rankine, NV34TCL_COLOR_MASK, 1); + so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); - cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | - ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | - ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | - ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + if (cso->logicop_enable) { + so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } - cb->d_enable = cso->dither ? 1 : 0; + so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); - return (void *)cb; + so_ref(so, &bso->so); + bso->pipe = *cso; + return (void *)bso; } static void nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_blend_state *cb = hwcso; - - if (!hwcso) { - return; - } - - BEGIN_RING(rankine, NV34TCL_DITHER_ENABLE, 1); - OUT_RING (cb->d_enable); - BEGIN_RING(rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (cb->b_enable); - OUT_RING (cb->b_srcfunc); - OUT_RING (cb->b_dstfunc); - BEGIN_RING(rankine, NV34TCL_BLEND_EQUATION, 1); - OUT_RING (cb->b_eqn); - - BEGIN_RING(rankine, NV34TCL_COLOR_MASK, 1); - OUT_RING (cb->c_mask); - - BEGIN_RING(rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (cb->l_enable); - OUT_RING (cb->l_op); + nv30->blend = hwcso; + nv30->dirty |= NV30_NEW_BLEND; } static void nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + struct nv30_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); } diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index 117520dd13c..8489b7b7969 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -3,20 +3,6 @@ #include "pipe/p_state.h" -struct nv30_blend_state { - uint32_t b_enable; - uint32_t b_srcfunc; - uint32_t b_dstfunc; - uint32_t b_eqn; - - uint32_t l_enable; - uint32_t l_op; - - uint32_t c_mask; - - uint32_t d_enable; -}; - struct nv30_sampler_state { uint32_t fmt; uint32_t wrap; diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c index a1b0100472d..44d43e132a5 100644 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -7,7 +7,7 @@ nv30_state_blend_validate(struct nv30_context *nv30) return TRUE; } -struct nv30_state_entry nv30_state_blend_new = { +struct nv30_state_entry nv30_state_blend = { .validate = nv30_state_blend_validate, .dirty = { .pipe = NV30_NEW_BLEND, diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 541ad0da3e8..492f4110117 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -3,6 +3,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_framebuffer, + &nv30_state_blend, &nv30_state_blend_colour, NULL }; -- cgit v1.2.3 From 360f7a3e239553fc0e1aff3b38c06c2e3d0a698c Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 21:48:18 +0200 Subject: nv30: Emit rasterizer state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 6 +- src/gallium/drivers/nv30/nv30_state.c | 177 +++++++++++++++-------------- src/gallium/drivers/nv30/nv30_state.h | 23 ---- src/gallium/drivers/nv30/nv30_state_emit.c | 1 + 4 files changed, 98 insertions(+), 109 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index eedebc91b23..692d8dd5230 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -76,8 +76,7 @@ enum nv30_state_index { #define NV30_NEW_ARRAYS (1 << 11) #define NV30_NEW_UCP (1 << 12) -/* TODO: rename when removing the old state emitter */ -struct nv30_rasterizer_state_new { +struct nv30_rasterizer_state { struct pipe_rasterizer_state pipe; struct nouveau_stateobj *so; }; @@ -124,11 +123,11 @@ struct nv30_context { unsigned vp_samplers; /* Context state */ + struct nv30_rasterizer_state *rasterizer; struct nv30_blend_state *blend; struct pipe_blend_color blend_colour; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; - struct nv30_rasterizer_state_new *rasterizer; struct nv30_zsa_state *zsa; unsigned stipple[32]; @@ -211,6 +210,7 @@ extern void nv30_fragtex_bind(struct nv30_context *); extern boolean nv30_state_validate(struct nv30_context *nv30); extern void nv30_emit_hw_state(struct nv30_context *nv30); extern void nv30_state_tex_update(struct nv30_context *nv30); +extern struct nv30_state_entry nv30_state_rasterizer; extern struct nv30_state_entry nv30_state_blend; extern struct nv30_state_entry nv30_state_blend_colour; extern struct nv30_state_entry nv30_state_framebuffer; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index cf46dcef936..8c7a902e28f 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -275,123 +275,134 @@ static void * nv30_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - struct nv30_rasterizer_state *rs; - int i; + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *rankine = nv30->screen->rankine; /*XXX: ignored: * light_twoside - * offset_cw/ccw -nohw - * scissor * point_smooth -nohw * multisample - * offset_units / offset_scale */ - rs = malloc(sizeof(struct nv30_rasterizer_state)); - - rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; - rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; - rs->line_smooth_en = cso->line_smooth ? 1 : 0; - rs->line_stipple_en = cso->line_stipple_enable ? 1 : 0; - rs->line_stipple = (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor; + so_method(so, rankine, NV34TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : + NV34TCL_SHADE_MODEL_SMOOTH); - rs->point_size = *(uint32_t*)&cso->point_size; + so_method(so, rankine, NV34TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); - rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; - rs->poly_stipple_en = cso->poly_stipple_enable ? 1 : 0; + so_method(so, rankine, NV34TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6); if (cso->front_winding == PIPE_WINDING_CCW) { - rs->front_face = NV34TCL_FRONT_FACE_CCW; - rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); - rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CCW); } else { - rs->front_face = NV34TCL_FRONT_FACE_CW; - rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); - rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CW); } - - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - rs->cull_face_en = 1; - if (cso->front_winding == PIPE_WINDING_CCW) - rs->cull_face = NV34TCL_CULL_FACE_FRONT; - else - rs->cull_face = NV34TCL_CULL_FACE_BACK; - break; - case PIPE_WINDING_CW: - rs->cull_face_en = 1; - if (cso->front_winding == PIPE_WINDING_CW) - rs->cull_face = NV34TCL_CULL_FACE_FRONT; - else - rs->cull_face = NV34TCL_CULL_FACE_BACK; - break; - case PIPE_WINDING_BOTH: - rs->cull_face_en = 1; - rs->cull_face = NV34TCL_CULL_FACE_FRONT_AND_BACK; - break; - case PIPE_WINDING_NONE: - default: - rs->cull_face_en = 0; - rs->cull_face = 0; - break; + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); } + so_method(so, rankine, NV34TCL_POINT_SPRITE, 1); if (cso->point_sprite) { - rs->point_sprite = (1 << 0); + unsigned psctl = (1 << 0), i; + for (i = 0; i < 8; i++) { if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) - rs->point_sprite |= (1 << (8 + i)); + psctl |= (1 << (8 + i)); } + + so_data(so, psctl); } else { - rs->point_sprite = 0; + so_data(so, 0); } - return (void *)rs; + so_ref(so, &rsso->so); + rsso->pipe = *cso; + return (void *)rsso; } static void nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_rasterizer_state *rs = hwcso; - if (!hwcso) { - return; - } - - BEGIN_RING(rankine, NV34TCL_SHADE_MODEL, 1); - OUT_RING (rs->shade_model); - - BEGIN_RING(rankine, NV34TCL_LINE_WIDTH, 2); - OUT_RING (rs->line_width); - OUT_RING (rs->line_smooth_en); - BEGIN_RING(rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); - OUT_RING (rs->line_stipple_en); - OUT_RING (rs->line_stipple); - - BEGIN_RING(rankine, NV34TCL_POINT_SIZE, 1); - OUT_RING (rs->point_size); - - BEGIN_RING(rankine, NV34TCL_POLYGON_MODE_FRONT, 6); - OUT_RING (rs->poly_mode_front); - OUT_RING (rs->poly_mode_back); - OUT_RING (rs->cull_face); - OUT_RING (rs->front_face); - OUT_RING (rs->poly_smooth_en); - OUT_RING (rs->cull_face_en); - - BEGIN_RING(rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - OUT_RING (rs->poly_stipple_en); - - BEGIN_RING(rankine, NV34TCL_POINT_SPRITE, 1); - OUT_RING (rs->point_sprite); + nv30->rasterizer = hwcso; + nv30->dirty |= NV30_NEW_RAST; + /*nv30->draw_dirty |= NV30_NEW_RAST;*/ } static void nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + struct nv30_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); } static void diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index 8489b7b7969..b33075aceb6 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -11,29 +11,6 @@ struct nv30_sampler_state { uint32_t bcol; }; -struct nv30_rasterizer_state { - uint32_t shade_model; - - uint32_t line_width; - uint32_t line_smooth_en; - uint32_t line_stipple_en; - uint32_t line_stipple; - - uint32_t point_size; - - uint32_t poly_smooth_en; - uint32_t poly_stipple_en; - - uint32_t poly_mode_front; - uint32_t poly_mode_back; - - uint32_t front_face; - uint32_t cull_face; - uint32_t cull_face_en; - - uint32_t point_sprite; -}; - struct nv30_vertex_program_exec { uint32_t data[4]; boolean has_branch_offset; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 492f4110117..7d77adbfdf0 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -3,6 +3,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_framebuffer, + &nv30_state_rasterizer, &nv30_state_blend, &nv30_state_blend_colour, NULL -- cgit v1.2.3 From c66f376e271427799f777c39bc9221df7c961f77 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 22:10:53 +0200 Subject: nv30: Emit depth/stencil/alpha state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 1 + src/gallium/drivers/nv30/nv30_state.c | 91 +++++++++++++++++------------- src/gallium/drivers/nv30/nv30_state.h | 30 ---------- src/gallium/drivers/nv30/nv30_state_emit.c | 1 + 4 files changed, 53 insertions(+), 70 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 692d8dd5230..b8a26712a8b 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -213,6 +213,7 @@ extern void nv30_state_tex_update(struct nv30_context *nv30); extern struct nv30_state_entry nv30_state_rasterizer; extern struct nv30_state_entry nv30_state_blend; extern struct nv30_state_entry nv30_state_blend_colour; +extern struct nv30_state_entry nv30_state_zsa; extern struct nv30_state_entry nv30_state_framebuffer; /* nv30_vbo.c */ diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 8c7a902e28f..8121e62fb92 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -405,65 +405,76 @@ nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) FREE(rsso); } -static void -nv30_translate_stencil(const struct pipe_depth_stencil_alpha_state *cso, - unsigned idx, struct nv30_stencil_push *hw) -{ - hw->enable = cso->stencil[idx].enabled ? 1 : 0; - hw->wmask = cso->stencil[idx].write_mask; - hw->func = nvgl_comparison_op(cso->stencil[idx].func); - hw->ref = cso->stencil[idx].ref_value; - hw->vmask = cso->stencil[idx].value_mask; - hw->fail = nvgl_stencil_op(cso->stencil[idx].fail_op); - hw->zfail = nvgl_stencil_op(cso->stencil[idx].zfail_op); - hw->zpass = nvgl_stencil_op(cso->stencil[idx].zpass_op); -} - static void * nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *cso) { - struct nv30_depth_stencil_alpha_state *hw; - - hw = malloc(sizeof(struct nv30_depth_stencil_alpha_state)); - - hw->depth.func = nvgl_comparison_op(cso->depth.func); - hw->depth.write_enable = cso->depth.writemask ? 1 : 0; - hw->depth.test_enable = cso->depth.enabled ? 1 : 0; + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *rankine = nv30->screen->rankine; - nv30_translate_stencil(cso, 0, &hw->stencil.front); - nv30_translate_stencil(cso, 1, &hw->stencil.back); + so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref)); + + if (cso->stencil[0].enabled) { + so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].write_mask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].value_mask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } - hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; - hw->alpha.func = nvgl_comparison_op(cso->alpha.func); - hw->alpha.ref = float_to_ubyte(cso->alpha.ref); + if (cso->stencil[1].enabled) { + so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 8); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].write_mask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].value_mask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } - return (void *)hw; + so_ref(so, &zsaso->so); + zsaso->pipe = *cso; + return (void *)zsaso; } static void nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_depth_stencil_alpha_state *hw = hwcso; - - if (!hwcso) { - return; - } - BEGIN_RING(rankine, NV34TCL_DEPTH_FUNC, 3); - OUT_RINGp ((uint32_t *)&hw->depth, 3); - BEGIN_RING(rankine, NV34TCL_STENCIL_BACK_ENABLE, 16); - OUT_RINGp ((uint32_t *)&hw->stencil.back, 8); - OUT_RINGp ((uint32_t *)&hw->stencil.front, 8); - BEGIN_RING(rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); - OUT_RINGp ((uint32_t *)&hw->alpha.enabled, 3); + nv30->zsa = hwcso; + nv30->dirty |= NV30_NEW_ZSA; } static void nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + struct nv30_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); } static void * diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index b33075aceb6..c65a937467e 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -65,36 +65,6 @@ struct nv30_fragment_program { uint32_t fp_reg_control; }; -struct nv30_stencil_push { - uint32_t enable; - uint32_t wmask; - uint32_t func; - uint32_t ref; - uint32_t vmask; - uint32_t fail; - uint32_t zfail; - uint32_t zpass; -}; - -struct nv30_depth_stencil_alpha_state { - struct { - uint32_t func; - uint32_t write_enable; - uint32_t test_enable; - } depth; - - struct { - struct nv30_stencil_push back; - struct nv30_stencil_push front; - } stencil; - - struct { - uint32_t enabled; - uint32_t func; - uint32_t ref; - } alpha; -}; - struct nv30_miptree { struct pipe_texture base; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 7d77adbfdf0..83bb3408a5a 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -6,6 +6,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_rasterizer, &nv30_state_blend, &nv30_state_blend_colour, + &nv30_state_zsa, NULL }; -- cgit v1.2.3 From f1d24c1d27255e4ff5ba451e6d58558f9ccdc801 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 22:18:38 +0200 Subject: nv30: Emit scissor state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 1 + src/gallium/drivers/nv30/nv30_state.c | 5 ++--- src/gallium/drivers/nv30/nv30_state_emit.c | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index b8a26712a8b..2a48903e32a 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -211,6 +211,7 @@ extern boolean nv30_state_validate(struct nv30_context *nv30); extern void nv30_emit_hw_state(struct nv30_context *nv30); extern void nv30_state_tex_update(struct nv30_context *nv30); extern struct nv30_state_entry nv30_state_rasterizer; +extern struct nv30_state_entry nv30_state_scissor; extern struct nv30_state_entry nv30_state_blend; extern struct nv30_state_entry nv30_state_blend_colour; extern struct nv30_state_entry nv30_state_zsa; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 8121e62fb92..f2413a3372c 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -607,9 +607,8 @@ nv30_set_scissor_state(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); - BEGIN_RING(rankine, NV34TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny); + nv30->scissor = *s; + nv30->dirty |= NV30_NEW_SCISSOR; } static void diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 83bb3408a5a..f45a340b5d6 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -4,6 +4,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_framebuffer, &nv30_state_rasterizer, + &nv30_state_scissor, &nv30_state_blend, &nv30_state_blend_colour, &nv30_state_zsa, -- cgit v1.2.3 From e7e231a5116aed9f1ca685a297032a3e3e6a2433 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 22:31:23 +0200 Subject: nv30: Emit polygon stipple state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 10 +++++----- src/gallium/drivers/nv30/nv30_state.c | 4 ++-- src/gallium/drivers/nv30/nv30_state_emit.c | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 2a48903e32a..ed7b0759ad4 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -112,9 +112,6 @@ struct nv30_context { /* HW state derived from pipe states */ struct nv30_state state; - struct pipe_scissor_state scissor; - - uint32_t dirty; struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; @@ -123,13 +120,15 @@ struct nv30_context { unsigned vp_samplers; /* Context state */ + unsigned dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; struct nv30_rasterizer_state *rasterizer; + struct nv30_zsa_state *zsa; struct nv30_blend_state *blend; struct pipe_blend_color blend_colour; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; - struct nv30_zsa_state *zsa; - unsigned stipple[32]; uint32_t rt_enable; struct pipe_buffer *rt[2]; @@ -212,6 +211,7 @@ extern void nv30_emit_hw_state(struct nv30_context *nv30); extern void nv30_state_tex_update(struct nv30_context *nv30); extern struct nv30_state_entry nv30_state_rasterizer; extern struct nv30_state_entry nv30_state_scissor; +extern struct nv30_state_entry nv30_state_stipple; extern struct nv30_state_entry nv30_state_blend; extern struct nv30_state_entry nv30_state_blend_colour; extern struct nv30_state_entry nv30_state_zsa; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index f2413a3372c..7109deae552 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -597,8 +597,8 @@ nv30_set_polygon_stipple(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); - BEGIN_RING(rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); - OUT_RINGp ((uint32_t *)stipple->stipple, 32); + memcpy(nv30->stipple, stipple->stipple, 4 * 32); + nv30->dirty |= NV30_NEW_STIPPLE; } static void diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index f45a340b5d6..4f9079184d4 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -5,6 +5,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_framebuffer, &nv30_state_rasterizer, &nv30_state_scissor, + &nv30_state_stipple, &nv30_state_blend, &nv30_state_blend_colour, &nv30_state_zsa, -- cgit v1.2.3 From 568b477b9c118e5ace831b8ecf1811da1c0961cd Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 22:42:30 +0200 Subject: nv30: Emit viewport state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 1 + src/gallium/drivers/nv30/nv30_state.c | 12 +++--------- src/gallium/drivers/nv30/nv30_state_emit.c | 1 + 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index ed7b0759ad4..0ad1dc6f912 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -215,6 +215,7 @@ extern struct nv30_state_entry nv30_state_stipple; extern struct nv30_state_entry nv30_state_blend; extern struct nv30_state_entry nv30_state_blend_colour; extern struct nv30_state_entry nv30_state_zsa; +extern struct nv30_state_entry nv30_state_viewport; extern struct nv30_state_entry nv30_state_framebuffer; /* nv30_vbo.c */ diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 7109deae552..e65c4b215d4 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -617,15 +617,9 @@ nv30_set_viewport_state(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); - BEGIN_RING(rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); - OUT_RINGf (vpt->translate[0]); - OUT_RINGf (vpt->translate[1]); - OUT_RINGf (vpt->translate[2]); - OUT_RINGf (vpt->translate[3]); - OUT_RINGf (vpt->scale[0]); - OUT_RINGf (vpt->scale[1]); - OUT_RINGf (vpt->scale[2]); - OUT_RINGf (vpt->scale[3]); + nv30->viewport = *vpt; + nv30->dirty |= NV30_NEW_VIEWPORT; + /*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/ } static void diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 4f9079184d4..cb7f1a7c310 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -9,6 +9,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_blend, &nv30_state_blend_colour, &nv30_state_zsa, + &nv30_state_viewport, NULL }; -- cgit v1.2.3 From 6f56b527d866506a323feb19f9d8529d40034af2 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 3 Jul 2008 22:47:15 +0200 Subject: nv30: Reemit state when changing context --- src/gallium/drivers/nv30/nv30_state_emit.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index cb7f1a7c310..eca1f0652cc 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -46,9 +46,19 @@ void nv30_emit_hw_state(struct nv30_context *nv30) { struct nv30_state *state = &nv30->state; + struct nv30_screen *screen = nv30->screen; unsigned i; uint64 states; + if (nv30->pctx_id != screen->cur_pctx) { + for (i = 0; i < NV30_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_pctx = nv30->pctx_id; + } + for (i = 0, states = state->dirty; states; i++) { if (!(states & (1ULL << i))) continue; -- cgit v1.2.3 From 1942e29bf7e4df34164ed815ccd77b08cd28ed56 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 5 Jul 2008 00:55:18 +0900 Subject: softpipe: Implement texture blankets. --- src/gallium/drivers/softpipe/sp_texture.c | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 4db045cdc3d..20ad336b4fa 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -162,6 +162,39 @@ softpipe_texture_create(struct pipe_screen *screen, } +static struct pipe_texture * +softpipe_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + struct softpipe_texture *spt; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *base; + spt->base.refcount = 1; + spt->base.screen = screen; + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->stride[0] = stride[0]; + + pipe_buffer_reference(screen->winsys, &spt->buffer, buffer); + + return &spt->base; +} + + static void softpipe_texture_release(struct pipe_screen *screen, struct pipe_texture **pt) @@ -309,6 +342,7 @@ void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; + screen->texture_blanket = softpipe_texture_blanket; screen->texture_release = softpipe_texture_release; screen->get_tex_surface = softpipe_get_tex_surface; -- cgit v1.2.3 From 4a18324c0bca4ae806b1c62cad44e859f0924a9b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 5 Jul 2008 00:56:36 +0900 Subject: psb: Fill all texture fields when creating texture blanket. --- src/gallium/drivers/i915simple/i915_texture.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 2815e61345c..5d6769924c3 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -710,6 +710,8 @@ i915_texture_blanket(struct pipe_screen * screen, return NULL; tex->base = *base; + tex->base.refcount = 1; + tex->base.screen = screen; tex->stride = stride[0]; -- cgit v1.2.3 From f99643ca6ea3aa05a0b16dc5d99e11fa00185684 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 4 Jul 2008 18:53:44 +0200 Subject: nv30: Emit sampler state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 3 +++ src/gallium/drivers/nv30/nv30_state.c | 42 +++++++++++++++++++++++++----- src/gallium/drivers/nv30/nv30_state_emit.c | 9 ++++++- 3 files changed, 46 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 0ad1dc6f912..5404685da25 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -96,6 +96,7 @@ struct nv30_state { unsigned scissor_enabled; unsigned stipple_enabled; unsigned viewport_bypass; + unsigned fp_samplers; uint64_t dirty; struct nouveau_stateobj *hw[NV30_STATE_MAX]; @@ -129,6 +130,8 @@ struct nv30_context { struct pipe_blend_color blend_colour; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; + unsigned nr_samplers; + unsigned nr_textures; uint32_t rt_enable; struct pipe_buffer *rt[2]; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index e65c4b215d4..97becd2be37 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -1,6 +1,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "nv30_context.h" #include "nv30_state.h" @@ -116,7 +117,7 @@ nv30_sampler_state_create(struct pipe_context *pipe, struct nv30_sampler_state *ps; uint32_t filter = 0; - ps = malloc(sizeof(struct nv30_sampler_state)); + ps = MALLOC(sizeof(struct nv30_sampler_state)); ps->fmt = 0; if (!cso->normalized_coords) @@ -197,6 +198,19 @@ nv30_sampler_state_create(struct pipe_context *pipe, ps->filt = filter; + /*{ + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + }*/ + /* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { switch (cso->compare_func) { case PIPE_FUNC_NEVER: @@ -242,20 +256,24 @@ nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) struct nv30_context *nv30 = nv30_context(pipe); unsigned unit; - if (!sampler) { - return; - } - for (unit = 0; unit < nr; unit++) { nv30->tex_sampler[unit] = sampler[unit]; nv30->dirty_samplers |= (1 << unit); } + + for (unit = nr; unit < nv30->nr_samplers; unit++) { + nv30->tex_sampler[unit] = NULL; + nv30->dirty_samplers |= (1 << unit); + } + + nv30->nr_samplers = nr; + nv30->dirty |= NV30_NEW_SAMPLER; } static void nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { - free(hwcso); + FREE(hwcso); } static void @@ -266,9 +284,19 @@ nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr, unsigned unit; for (unit = 0; unit < nr; unit++) { - nv30->tex_miptree[unit] = (struct nv30_miptree *)miptree[unit]; + pipe_texture_reference((struct pipe_texture **) + &nv30->tex_miptree[unit], miptree[unit]); nv30->dirty_samplers |= (1 << unit); } + + for (unit = nr; unit < nv30->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv30->tex_miptree[unit], NULL); + nv30->dirty_samplers |= (1 << unit); + } + + nv30->nr_textures = nr; + nv30->dirty |= NV30_NEW_SAMPLER; } static void * diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index eca1f0652cc..0ea78571972 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -47,7 +47,7 @@ nv30_emit_hw_state(struct nv30_context *nv30) { struct nv30_state *state = &nv30->state; struct nv30_screen *screen = nv30->screen; - unsigned i; + unsigned i, samplers; uint64 states; if (nv30->pctx_id != screen->cur_pctx) { @@ -91,6 +91,13 @@ nv30_emit_hw_state(struct nv30_context *nv30) nv30->dirty_samplers = 0; so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv30->nvws, + state->hw[NV30_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } /* Texture images, emitted in nv30_fragtex_build */ #if 0 -- cgit v1.2.3 From e6c24539c3d482271a8c1bcba697b08e24344e05 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Fri, 4 Jul 2008 19:23:52 +0200 Subject: i915: Added debug filling code of texture, not active --- src/gallium/drivers/i915simple/i915_texture.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 5d6769924c3..cf4964b26b3 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -581,6 +581,7 @@ i915_texture_create(struct pipe_screen *screen, struct i915_screen *i915screen = i915_screen(screen); struct pipe_winsys *ws = screen->winsys; struct i915_texture *tex = CALLOC_STRUCT(i915_texture); + size_t tex_size; if (!tex) return NULL; @@ -600,14 +601,22 @@ i915_texture_create(struct pipe_screen *screen, goto fail; } + tex_size = tex->stride * tex->total_nblocksy; + tex->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, - tex->stride * - tex->total_nblocksy); + tex_size); if (!tex->buffer) goto fail; +#if 0 + void *ptr = ws->buffer_map(ws, tex->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + memset(ptr, 0x80, tex_size); + ws->buffer_unmap(ws, tex->buffer); +#endif + return &tex->base; fail: -- cgit v1.2.3 From c23b64f1646ac5349c395ede47707906ddff7b4c Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 5 Jul 2008 03:19:56 +0900 Subject: softpipe: Compute block size for display targets. --- src/gallium/drivers/softpipe/sp_texture.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 20ad336b4fa..f775591352b 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -122,8 +122,10 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, /* Now extract the goodies: */ - spt->buffer = surf.buffer; + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); spt->stride[0] = surf.stride; + spt->buffer = surf.buffer; return spt->buffer != NULL; } -- cgit v1.2.3 From 152ed98b84acf500f4b5122cf3fde82c6e5206f2 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 5 Jul 2008 03:21:27 +0900 Subject: softpipe: Prevent NULL ptr derreference on takedown. --- src/gallium/drivers/softpipe/sp_state_fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 24b91fbc794..901c8f83e75 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -154,8 +154,8 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, /* note: reference counting */ pipe_buffer_reference(ws, &softpipe->constants[shader].buffer, - buf->buffer); - softpipe->constants[shader].size = buf->size; + buf ? buf->buffer : NULL); + softpipe->constants[shader].size = buf ? buf->size : 0; softpipe->dirty |= SP_NEW_CONSTANTS; } -- cgit v1.2.3 From 52a68dd9eb1d347aa01ce09db9375793d0d0ceaf Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Tue, 8 Jul 2008 01:06:18 +0200 Subject: nv04. --- configs/default | 2 +- src/gallium/drivers/nouveau/nouveau_winsys.h | 6 + src/gallium/drivers/nv04/Makefile | 28 ++ src/gallium/drivers/nv04/nv04_clear.c | 12 + src/gallium/drivers/nv04/nv04_context.c | 107 ++++++ src/gallium/drivers/nv04/nv04_context.h | 142 ++++++++ src/gallium/drivers/nv04/nv04_fragprog.c | 22 ++ src/gallium/drivers/nv04/nv04_fragtex.c | 100 ++++++ src/gallium/drivers/nv04/nv04_miptree.c | 138 ++++++++ src/gallium/drivers/nv04/nv04_prim_vbuf.c | 312 +++++++++++++++++ src/gallium/drivers/nv04/nv04_screen.c | 216 ++++++++++++ src/gallium/drivers/nv04/nv04_screen.h | 25 ++ src/gallium/drivers/nv04/nv04_state.c | 494 +++++++++++++++++++++++++++ src/gallium/drivers/nv04/nv04_state.h | 71 ++++ src/gallium/drivers/nv04/nv04_state_emit.c | 156 +++++++++ src/gallium/drivers/nv04/nv04_surface.c | 65 ++++ src/gallium/drivers/nv04/nv04_vbo.c | 72 ++++ 17 files changed, 1967 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/nv04/Makefile create mode 100644 src/gallium/drivers/nv04/nv04_clear.c create mode 100644 src/gallium/drivers/nv04/nv04_context.c create mode 100644 src/gallium/drivers/nv04/nv04_context.h create mode 100644 src/gallium/drivers/nv04/nv04_fragprog.c create mode 100644 src/gallium/drivers/nv04/nv04_fragtex.c create mode 100644 src/gallium/drivers/nv04/nv04_miptree.c create mode 100644 src/gallium/drivers/nv04/nv04_prim_vbuf.c create mode 100644 src/gallium/drivers/nv04/nv04_screen.c create mode 100644 src/gallium/drivers/nv04/nv04_screen.h create mode 100644 src/gallium/drivers/nv04/nv04_state.c create mode 100644 src/gallium/drivers/nv04/nv04_state.h create mode 100644 src/gallium/drivers/nv04/nv04_state_emit.c create mode 100644 src/gallium/drivers/nv04/nv04_surface.c create mode 100644 src/gallium/drivers/nv04/nv04_vbo.c (limited to 'src/gallium/drivers') diff --git a/configs/default b/configs/default index a8d1fe6bec2..c1bc5fb01d0 100644 --- a/configs/default +++ b/configs/default @@ -70,7 +70,7 @@ PROGRAM_DIRS = demos redbook samples glsl xdemos # Gallium directories and GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi sct translate rtasm util GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) -GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple nv10 nv30 nv40 nv50 failover +GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple nv04 nv10 nv30 nv40 nv50 failover GALLIUM_WINSYS_COMMON_DIRS = GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) GALLIUM_WINSYS_DIRS = xlib egl_xlib diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 13810460bfe..48feeba309d 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -57,6 +57,12 @@ struct nouveau_winsys { unsigned, unsigned, unsigned, unsigned, unsigned); }; +extern struct pipe_screen * +nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv04_create(struct pipe_screen *, unsigned pctx_id); + extern struct pipe_screen * nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); diff --git a/src/gallium/drivers/nv04/Makefile b/src/gallium/drivers/nv04/Makefile new file mode 100644 index 00000000000..5ea51a2f420 --- /dev/null +++ b/src/gallium/drivers/nv04/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv04 + +DRIVER_SOURCES = \ + nv04_clear.c \ + nv04_context.c \ + nv04_fragprog.c \ + nv04_fragtex.c \ + nv04_miptree.c \ + nv04_prim_vbuf.c \ + nv04_screen.c \ + nv04_state.c \ + nv04_state_emit.c \ + nv04_surface.c \ + nv04_vbo.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv04/nv04_clear.c b/src/gallium/drivers/nv04/nv04_clear.c new file mode 100644 index 00000000000..01cacd36fe1 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv04_context.h" + +void +nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c new file mode 100644 index 00000000000..852a8edf5ff --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -0,0 +1,107 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static void +nv04_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_flush(nv04->draw); + + FIRE_RING(fence); +} + +static void +nv04_destroy(struct pipe_context *pipe) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + if (nv04->draw) + draw_destroy(nv04->draw); + + FREE(nv04); +} + +static void +nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +static boolean +nv04_init_hwctx(struct nv04_context *nv04) +{ + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); + OUT_RING(0); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); + OUT_RING(0); + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(0x40182800); +// OUT_RING(1<<20/*no cull*/); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); +// OUT_RING(0x24|(1<<6)|(1<<8)); + OUT_RING(0x120001a4); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); + OUT_RING(0x332213a1); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); + OUT_RING(0x11001010); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); + OUT_RING(0x0); +// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); +// OUT_RING(SCREEN_OFFSET); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); + OUT_RING(0xff000000); + + + + FIRE_RING (NULL); + return TRUE; +} + +struct pipe_context * +nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv04_screen *screen = nv04_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv04_context *nv04; + struct nouveau_winsys *nvws = screen->nvws; + + nv04 = CALLOC(1, sizeof(struct nv04_context)); + if (!nv04) + return NULL; + nv04->screen = screen; + nv04->pctx_id = pctx_id; + + nv04->nvws = nvws; + + nv04->pipe.winsys = ws; + nv04->pipe.screen = pscreen; + nv04->pipe.destroy = nv04_destroy; + nv04->pipe.set_edgeflags = nv04_set_edgeflags; + nv04->pipe.draw_arrays = nv04_draw_arrays; + nv04->pipe.draw_elements = nv04_draw_elements; + nv04->pipe.clear = nv04_clear; + nv04->pipe.flush = nv04_flush; + + nv04_init_surface_functions(nv04); + nv04_init_state_functions(nv04); + + nv04->draw = draw_create(); + assert(nv04->draw); + draw_wide_point_threshold(nv04->draw, 0.0); + draw_wide_line_threshold(nv04->draw, 0.0); + draw_enable_line_stipple(nv04->draw, FALSE); + draw_enable_point_sprites(nv04->draw, FALSE); + draw_set_rasterize_stage(nv04->draw, nv04_draw_vbuf_stage(nv04)); + + nv04_init_hwctx(nv04); + + return &nv04->pipe; +} + diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h new file mode 100644 index 00000000000..5ba1d4ecdc1 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -0,0 +1,142 @@ +#ifndef __NV04_CONTEXT_H__ +#define __NV04_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv04_screen *ctx = nv04->screen +#include "nouveau/nouveau_push.h" + +#include "nv04_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#include "nv04_screen.h" + +#define NV04_NEW_VERTPROG (1 << 1) +#define NV04_NEW_FRAGPROG (1 << 2) +#define NV04_NEW_BLEND (1 << 3) +#define NV04_NEW_RAST (1 << 4) +#define NV04_NEW_CONTROL (1 << 5) +#define NV04_NEW_VIEWPORT (1 << 6) +#define NV04_NEW_SAMPLER (1 << 7) + +struct nv04_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv04_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + int chipset; + struct nouveau_notifier *sync; + + uint32_t dirty; + + struct nv04_blend_state *blend; + struct nv04_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct nv04_fragtex_state fragtex; + struct nv04_rasterizer_state *rast; + struct nv04_depth_stencil_alpha_state *dsa; + + struct nv04_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_buffer *rt[4]; + struct pipe_buffer *zeta; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[16]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + + struct vertex_info vertex_info; + struct { + + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv04_vertex_program *active; + + struct nv04_vertex_program *current; + struct pipe_buffer *constant_buf; + } vertprog; + + struct { + struct nv04_fragment_program *active; + + struct nv04_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_buffers; + unsigned num_vertex_elements; + + struct pipe_viewport_state viewport; +}; + +static INLINE struct nv04_context * +nv04_context(struct pipe_context *pipe) +{ + return (struct nv04_context *)pipe; +} + +extern void nv04_init_state_functions(struct nv04_context *nv04); +extern void nv04_init_surface_functions(struct nv04_context *nv04); +extern void nv04_init_miptree_functions(struct pipe_screen *screen); + +/* nv04_clear.c */ +extern void nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv04_draw.c */ +extern struct draw_stage *nv04_draw_render_stage(struct nv04_context *nv04); + +/* nv04_fragprog.c */ +extern void nv04_fragprog_bind(struct nv04_context *, + struct nv04_fragment_program *); +extern void nv04_fragprog_destroy(struct nv04_context *, + struct nv04_fragment_program *); + +/* nv04_fragtex.c */ +extern void nv04_fragtex_bind(struct nv04_context *); + +/* nv04_prim_vbuf.c */ +struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ); + +/* nv04_state.c and friends */ +extern void nv04_emit_hw_state(struct nv04_context *nv04); +extern void nv04_state_tex_update(struct nv04_context *nv04); + +/* nv04_vbo.c */ +extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv04_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c new file mode 100644 index 00000000000..11f4360ed80 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_fragprog.c @@ -0,0 +1,22 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" + +#include "nv04_context.h" + +void +nv04_fragprog_bind(struct nv04_context *nv04, struct nv04_fragment_program *fp) +{ +} + +void +nv04_fragprog_destroy(struct nv04_context *nv04, + struct nv04_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c new file mode 100644 index 00000000000..3db673cd2c1 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -0,0 +1,100 @@ +#include "nv04_context.h" + +static INLINE int log2i(int i) +{ + int r = 0; + + if (i & 0xffff0000) { + i >>= 16; + r += 16; + } + if (i & 0x0000ff00) { + i >>= 8; + r += 8; + } + if (i & 0x000000f0) { + i >>= 4; + r += 4; + } + if (i & 0x0000000c) { + i >>= 2; + r += 2; + } + if (i & 0x00000002) { + r += 1; + } + return r; +} + +#define _(m,tf) \ +{ \ + PIPE_FORMAT_##m, \ + NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ +} + +struct nv04_texture_format { + uint pipe; + int format; +}; + +static struct nv04_texture_format +nv04_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8), + _(X8R8G8B8_UNORM, X8R8G8B8), + _(A1R5G5B5_UNORM, A1R5G5B5), + _(A4R4G4B4_UNORM, A4R4G4B4), + _(L8_UNORM, Y8 ), + _(A8_UNORM, Y8 ), +}; + +static uint32_t +nv04_fragtex_format(uint pipe_format) +{ + struct nv04_texture_format *tf = nv04_texture_formats; + char fs[128]; + int i; + + for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) { + if (tf->pipe == pipe_format) + return tf->format; + tf++; + } + + pf_sprint_name(fs, pipe_format); + NOUVEAU_ERR("unknown texture format %s\n", fs); + return 0; +} + + +static void +nv04_fragtex_build(struct nv04_context *nv04, int unit) +{ + struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; + struct pipe_texture *pt = &nv04mt->base; + + switch (pt->target) { + case PIPE_TEXTURE_2D: + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER + | nv04_fragtex_format(pt->format) + | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) + | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE + ; +} + + +void +nv04_fragtex_bind(struct nv04_context *nv04) +{ + nv04_fragtex_build(nv04, 0); +} + diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c new file mode 100644 index 00000000000..ab2d2386bee --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -0,0 +1,138 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static void +nv04_miptree_layout(struct nv04_miptree *nv04mt) +{ + struct pipe_texture *pt = &nv04mt->base; + uint width = pt->width[0], height = pt->height[0]; + uint offset = 0; + int nr_faces, l, f; + + nr_faces = 1; + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + nv04mt->level[l].pitch = pt->width[0] * pt->block.size; + nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; + + nv04mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv04mt->level[l].image_offset[f] = offset; + offset += nv04mt->level[l].pitch * pt->height[l]; + } + } + + nv04mt->total_size = offset; +} + +static struct pipe_texture * +nv04_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv04_miptree *mt; + + mt = MALLOC(sizeof(struct nv04_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = screen; + + nv04_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->total_size); + if (!mt->buffer) { + free(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv04_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv04_miptree *nv04mt = (struct nv04_miptree *)mt; + int l; + + pipe_buffer_reference(ws, &nv04mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv04mt->level[l].image_offset) + free(nv04mt->level[l].image_offset); + } + free(nv04mt); + } +} + +static struct pipe_surface * +nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(ws, &ps->buffer, nv04mt->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv04mt->level[level].pitch; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv04mt->level[level].image_offset[face]; + } else { + ps->offset = nv04mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv04_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ +} + +void +nv04_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv04_miptree_create; + pscreen->texture_release = nv04_miptree_release; + pscreen->get_tex_surface = nv04_miptree_surface_new; + pscreen->tex_surface_release = nv04_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c new file mode 100644 index 00000000000..daf28e81f56 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -0,0 +1,312 @@ + +// XXX this has to go somewhere else +#define NONINC_METHOD 0x40000000 + + +#include "draw/draw_vbuf.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +#define VERTEX_SIZE 40 +#define VERTEX_BUFFER_SIZE (4096*VERTEX_SIZE) // 4096 vertices of 40 bytes each + +/** + * Primitive renderer for nv04. + */ +struct nv04_vbuf_render { + struct vbuf_render base; + + struct nv04_context *nv04; + + /** Vertex buffer */ + unsigned char* buffer; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Current primitive */ + unsigned prim; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv04_vbuf_render * +nv04_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct nv04_vbuf_render *)render; +} + + +static const struct vertex_info * +nv04_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + struct nv04_context *nv04 = nv04_render->nv04; + return &nv04->vertex_info; +} + + +static void * +nv04_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + nv04_render->buffer = (unsigned char*) malloc(VERTEX_BUFFER_SIZE); + assert(!nv04_render->buffer); + + return nv04_render->buffer; +} + + +static boolean +nv04_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + if (prim <= PIPE_PRIM_LINE_STRIP) + return FALSE; + + nv04_render->prim = prim; + return TRUE; +} + +static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(buffer + VERTEX_SIZE * v3,8); + OUT_RINGp(buffer + VERTEX_SIZE * v4,8); + OUT_RINGp(buffer + VERTEX_SIZE * v5,8); + OUT_RING(0xFEDCBA); +} + +static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RING(0xFED); +} + +static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(buffer + VERTEX_SIZE * v3,8); + OUT_RING(0xFECEDC); +} + +static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) +{ + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i; + + for( i=0; i< nr_indices-5; i+=6) + nv04_2triangles(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2], + indices[i+3], + indices[i+4], + indices[i+5] + ); + if (i != nr_indices) + { + nv04_1triangle(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2] + ); + i+=3; + } + if (i != nr_indices) + NOUVEAU_ERR("Houston, we have lost some vertices\n"); +} + +static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i,j; + + for(i = 0; i<nr_indices; i+=14) + { + int numvert = MIN2(16, nr_indices - i); + int numtri = numvert - 2; + if (numvert<3) + break; + + BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + for(j = 0; j<numvert; j++) + OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + + BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0) | NONINC_METHOD, (numtri+1)/2 ); + for(j = 0; j<numtri/2; j++ ) + OUT_RING(striptbl[j]); + if (numtri%2) + OUT_RING(striptbl[numtri/2]&0xFFF); + } +} + +static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i,j; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + + for(i = 1; i<nr_indices; i+=14) + { + int numvert=MIN2(15, nr_indices - i); + int numtri=numvert-2; + if (numvert < 3) + break; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + + for(j=0;j<numvert;j++) + OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0) | NONINC_METHOD, (numtri+1)/2); + for(j = 0; j<numtri/2; j++) + OUT_RING(fantbl[j]); + if (numtri%2) + OUT_RING(fantbl[numtri/2]&0xFFF); + } +} + +static void nv04_vbuf_render_quads_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i; + + for(i = 0; i < nr_indices; i += 4) + nv04_1quad(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2], + indices[i+3] + ); +} + + +static void +nv04_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + // emit the indices + switch( nv04_render->prim ) + { + case PIPE_PRIM_TRIANGLES: + nv04_vbuf_render_triangles_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_QUAD_STRIP: + case PIPE_PRIM_TRIANGLE_STRIP: + nv04_vbuf_render_tri_strip_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + nv04_vbuf_render_tri_fan_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_QUADS: + nv04_vbuf_render_quads_elts(nv04_render, indices, nr_indices); + break; + default: + NOUVEAU_ERR("You have to implement primitive %d, young padawan\n", nv04_render->prim); + break; + } +} + + +static void +nv04_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + free(nv04_render->buffer); + nv04_render->buffer = NULL; +} + + +static void +nv04_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + FREE(nv04_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv04_vbuf_render_create( struct nv04_context *nv04 ) +{ + struct nv04_vbuf_render *nv04_render = CALLOC_STRUCT(nv04_vbuf_render); + + nv04_render->nv04 = nv04; + + nv04_render->base.max_vertex_buffer_bytes = VERTEX_BUFFER_SIZE; + nv04_render->base.max_indices = 65536; + nv04_render->base.get_vertex_info = nv04_vbuf_render_get_vertex_info; + nv04_render->base.allocate_vertices = nv04_vbuf_render_allocate_vertices; + nv04_render->base.set_primitive = nv04_vbuf_render_set_primitive; + nv04_render->base.draw = nv04_vbuf_render_draw; + nv04_render->base.release_vertices = nv04_vbuf_render_release_vertices; + nv04_render->base.destroy = nv04_vbuf_render_destroy; + + return &nv04_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = nv04_vbuf_render_create(nv04); + if(!render) + return NULL; + + stage = draw_vbuf_stage( nv04->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c new file mode 100644 index 00000000000..9f34117b8c8 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -0,0 +1,216 @@ +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static const char * +nv04_screen_get_name(struct pipe_screen *screen) +{ + struct nv04_screen *nv04screen = nv04_screen(screen); + struct nouveau_device *dev = nv04screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv04_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv04_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 1; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 0; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 0; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv04_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 0.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv04_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, uint type) +{ + switch (type) { + case PIPE_SURFACE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + break; + case PIPE_TEXTURE: + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + return TRUE; + default: + break; + } + break; + default: + assert(0); + }; + + return FALSE; +} + +static void * +nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + +static void +nv04_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv04_screen *screen = nv04_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->fahrenheit); + + FREE(pscreen); +} + +struct pipe_screen * +nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv04_screen *screen = CALLOC_STRUCT(nv04_screen); + unsigned fahrenheit_class = 0, sub3d_class = 0; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + if (chipset>=0x20) { + fahrenheit_class = 0; + sub3d_class = 0; + } else if (chipset>=0x10) { + fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; + sub3d_class = NV10_CONTEXT_SURFACES_3D; + } else { + fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; + sub3d_class = NV04_CONTEXT_SURFACES_3D; + } + + if (!fahrenheit_class) { + NOUVEAU_ERR("Unknown nv04 chipset: nv%02x\n", chipset); + return NULL; + } + + /* 3D object */ + ret = nvws->grobj_alloc(nvws, fahrenheit_class, &screen->fahrenheit); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return NULL; + } + + /* 3D surface object */ + ret = nvws->grobj_alloc(nvws, sub3d_class, &screen->context_surfaces_3d); + if (ret) { + NOUVEAU_ERR("Error creating 3D surface object: %d\n", ret); + return NULL; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv04_screen_destroy(&screen->pipe); + return NULL; + } + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv04_screen_destroy; + + screen->pipe.get_name = nv04_screen_get_name; + screen->pipe.get_vendor = nv04_screen_get_vendor; + screen->pipe.get_param = nv04_screen_get_param; + screen->pipe.get_paramf = nv04_screen_get_paramf; + + screen->pipe.is_format_supported = nv04_screen_is_format_supported; + + screen->pipe.surface_map = nv04_surface_map; + screen->pipe.surface_unmap = nv04_surface_unmap; + + nv04_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv04/nv04_screen.h b/src/gallium/drivers/nv04/nv04_screen.h new file mode 100644 index 00000000000..99a49cdf7a9 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_screen.h @@ -0,0 +1,25 @@ +#ifndef __NV04_SCREEN_H__ +#define __NV04_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv04_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + unsigned chipset; + + /* HW graphics objects */ + struct nouveau_grobj *fahrenheit; + struct nouveau_grobj *context_surfaces_3d; + struct nouveau_notifier *sync; + +}; + +static INLINE struct nv04_screen * +nv04_screen(struct pipe_screen *screen) +{ + return (struct nv04_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c new file mode 100644 index 00000000000..39d7e8b42d0 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -0,0 +1,494 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + + +#include "nv04_context.h" +#include "nv04_state.h" + +static void * +nv04_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv04_blend_state *cb; + + cb = malloc(sizeof(struct nv04_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dst = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + + + return (void *)cb; +} + +static void +nv04_blend_state_bind(struct pipe_context *pipe, void *blend) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->blend = (struct nv04_blend_state*)blend; + + nv04->dirty |= NV04_NEW_BLEND; +} + +static void +nv04_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + } + return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; +} + +static void * +nv04_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + + struct nv04_sampler_state *ss; + uint32_t filter = 0; + + ss = malloc(sizeof(struct nv04_sampler_state)); + + ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | + (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + + if (cso->max_anisotropy > 1.0) { + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ss->filter = filter; + + return (void *)ss; +} + +static void +nv04_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv04_context *nv04 = nv04_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv04->sampler[unit] = sampler[unit]; + nv04->dirty_samplers |= (1 << unit); + } +} + +static void +nv04_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void +nv04_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv04_context *nv04 = nv04_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv04->tex_miptree[unit] = (struct nv04_miptree *)miptree[unit]; + nv04->dirty_samplers |= (1 << unit); + } +} + +static void * +nv04_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv04_rasterizer_state *rs; + + /*XXX: ignored: + * scissor + * points/lines (no hw support, emulated with tris in gallium) + */ + rs = malloc(sizeof(struct nv04_rasterizer_state)); + + rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + + return (void *)rs; +} + +static void +nv04_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->rast = (struct nv04_rasterizer_state*)rast; + + draw_set_rasterizer_state(nv04->draw, (nv04->rast ? nv04->rast->templ : NULL)); + + nv04->dirty |= NV04_NEW_RAST | NV04_NEW_BLEND; +} + +static void +nv04_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static INLINE uint32_t nv04_compare_func(uint32_t f) +{ + switch ( f ) { + case PIPE_FUNC_NEVER: return 1; + case PIPE_FUNC_LESS: return 2; + case PIPE_FUNC_EQUAL: return 3; + case PIPE_FUNC_LEQUAL: return 4; + case PIPE_FUNC_GREATER: return 5; + case PIPE_FUNC_NOTEQUAL: return 6; + case PIPE_FUNC_GEQUAL: return 7; + case PIPE_FUNC_ALWAYS: return 8; + } + NOUVEAU_MSG("Unable to find the function\n"); + return 0; +} + +static void * +nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv04_depth_stencil_alpha_state *hw; + + hw = malloc(sizeof(struct nv04_depth_stencil_alpha_state)); + + hw->control = float_to_ubyte(cso->alpha.ref); + hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); + hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; + hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; + hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); + hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; + hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; + hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + + return (void *)hw; +} + +static void +nv04_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->dsa = hwcso; + nv04->dirty |= NV04_NEW_CONTROL; +} + +static void +nv04_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv04_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + return draw_create_vertex_shader(nv04->draw, templ); +} + +static void +nv04_vp_state_bind(struct pipe_context *pipe, void *shader) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_bind_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); + + nv04->dirty |= NV04_NEW_VERTPROG; +} + +static void +nv04_vp_state_delete(struct pipe_context *pipe, void *shader) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_delete_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv04_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv04_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv04_fragment_program)); + fp->pipe = cso; + + return (void *)fp; +} + +static void +nv04_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = hwcso; + + nv04->fragprog.current = fp; + nv04->dirty |= NV04_NEW_FRAGPROG; +} + +static void +nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = hwcso; + + nv04_fragprog_destroy(nv04, fp); + free(fp); +} + +static void +nv04_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ +} + +static void +nv04_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv04->vertprog.constant_buf = buf->buffer; + nv04->dirty |= NV04_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv04->fragprog.constant_buf = buf->buffer; + nv04->dirty |= NV04_NEW_FRAGPROG; + } +} + +static void +nv04_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct pipe_surface *rt, *zeta; + uint32_t rt_format, w, h; + int colour_format = 0, zeta_format = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format = 0x108; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format = 0x103; + break; + default: + assert(0); + } + + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + OUT_RING(rt_format); + + /* FIXME pitches have to be aligned ! */ + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(rt->stride|(zeta->stride<<16)); + OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + if (fb->zsbuf) { + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } +} + +static void +nv04_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv04_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ +/* struct nv04_context *nv04 = nv04_context(pipe); + + // XXX + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void +nv04_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *viewport) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->viewport = *viewport; + + draw_set_viewport_state(nv04->draw, &nv04->viewport); +} + +static void +nv04_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_flush(nv04->draw); + + memcpy(nv04->vertex_buffer, buffers, count * sizeof(buffers[0])); + nv04->num_vertex_buffers = count; + + draw_set_vertex_buffers(nv04->draw, count, buffers); +} + +static void +nv04_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *elements) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_flush(nv04->draw); + + nv04->num_vertex_elements = count; + draw_set_vertex_elements(nv04->draw, count, elements); +} + +void +nv04_init_state_functions(struct nv04_context *nv04) +{ + nv04->pipe.create_blend_state = nv04_blend_state_create; + nv04->pipe.bind_blend_state = nv04_blend_state_bind; + nv04->pipe.delete_blend_state = nv04_blend_state_delete; + + nv04->pipe.create_sampler_state = nv04_sampler_state_create; + nv04->pipe.bind_sampler_states = nv04_sampler_state_bind; + nv04->pipe.delete_sampler_state = nv04_sampler_state_delete; + nv04->pipe.set_sampler_textures = nv04_set_sampler_texture; + + nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create; + nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind; + nv04->pipe.delete_rasterizer_state = nv04_rasterizer_state_delete; + + nv04->pipe.create_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_create; + nv04->pipe.bind_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_bind; + nv04->pipe.delete_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_delete; + + nv04->pipe.create_vs_state = nv04_vp_state_create; + nv04->pipe.bind_vs_state = nv04_vp_state_bind; + nv04->pipe.delete_vs_state = nv04_vp_state_delete; + + nv04->pipe.create_fs_state = nv04_fp_state_create; + nv04->pipe.bind_fs_state = nv04_fp_state_bind; + nv04->pipe.delete_fs_state = nv04_fp_state_delete; + + nv04->pipe.set_blend_color = nv04_set_blend_color; + nv04->pipe.set_clip_state = nv04_set_clip_state; + nv04->pipe.set_constant_buffer = nv04_set_constant_buffer; + nv04->pipe.set_framebuffer_state = nv04_set_framebuffer_state; + nv04->pipe.set_polygon_stipple = nv04_set_polygon_stipple; + nv04->pipe.set_scissor_state = nv04_set_scissor_state; + nv04->pipe.set_viewport_state = nv04_set_viewport_state; + + nv04->pipe.set_vertex_buffers = nv04_set_vertex_buffers; + nv04->pipe.set_vertex_elements = nv04_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h new file mode 100644 index 00000000000..7487819c3ab --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -0,0 +1,71 @@ +#ifndef __NV04_STATE_H__ +#define __NV04_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" + +struct nv04_blend_state { + uint32_t b_enable; + uint32_t b_src; + uint32_t b_dst; +}; + +struct nv04_fragtex_state { + uint32_t format; +}; + +struct nv04_sampler_state { + uint32_t filter; + uint32_t format; +}; + +struct nv04_depth_stencil_alpha_state { + uint32_t control; +}; + +struct nv04_rasterizer_state { + uint32_t blend; + + const struct pipe_rasterizer_state *templ; +}; + +struct nv04_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +struct nv04_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv04_fragment_program { + const struct pipe_shader_state *pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv04_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + + + +#endif diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c new file mode 100644 index 00000000000..0ad40a092ea --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -0,0 +1,156 @@ +#include "nv04_context.h" +#include "nv04_state.h" + +static void nv04_vertex_layout(struct pipe_context* pipe) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = nv04->fragprog.current; + uint32_t src = 0; + int i; + struct vertex_info vinfo; + + memset(&vinfo, 0, sizeof(vinfo)); + + for (i = 0; i < fp->info.num_inputs; i++) { + switch (i) { + case TGSI_SEMANTIC_POSITION: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + default: + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + } + } + draw_compute_vertex_size(&vinfo); +} + +static uint32_t nv04_blend_func(uint32_t f) +{ + switch ( f ) { + case PIPE_BLENDFACTOR_ZERO: return 0x1; + case PIPE_BLENDFACTOR_ONE: return 0x2; + case PIPE_BLENDFACTOR_SRC_COLOR: return 0x3; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return 0x4; + case PIPE_BLENDFACTOR_SRC_ALPHA: return 0x5; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return 0x6; + case PIPE_BLENDFACTOR_DST_ALPHA: return 0x7; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return 0x8; + case PIPE_BLENDFACTOR_DST_COLOR: return 0x9; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return 0xA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return 0xB; + } + NOUVEAU_MSG("Unable to find the blend function 0x%x\n",f); + return 0; +} + +static void nv04_emit_control(struct nv04_context* nv04) +{ + uint32_t control = nv04->dsa->control; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(control); +} + +static void nv04_emit_blend(struct nv04_context* nv04) +{ + uint32_t blend; + + blend=0x4; // texture MODULATE_ALPHA + blend|=0x20; // alpha is MSB + blend|=(2<<6); // flat shading + blend|=(1<<8); // persp correct + blend|=(0<<16); // no fog + blend|=(nv04->blend->b_enable<<20); + blend|=(nv04_blend_func(nv04->blend->b_src)<<24); + blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING(blend); +} + +static void nv04_emit_sampler(struct nv04_context *nv04, int unit) +{ + struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; + struct pipe_texture *pt = &nv04mt->base; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING(nv04->sampler[unit]->filter); +} + +void +nv04_emit_hw_state(struct nv04_context *nv04) +{ + int i; + + if (nv04->dirty & NV04_NEW_VERTPROG) { + //nv04_vertprog_bind(nv04, nv04->vertprog.current); + nv04->dirty &= ~NV04_NEW_VERTPROG; + } + + if (nv04->dirty & NV04_NEW_FRAGPROG) { + nv04_fragprog_bind(nv04, nv04->fragprog.current); + /*XXX: clear NV04_NEW_FRAGPROG if no new program uploaded */ + nv04->dirty_samplers |= (1<<10); + nv04->dirty_samplers = 0; + } + + if (nv04->dirty & NV04_NEW_CONTROL) { + nv04->dirty &= ~NV04_NEW_CONTROL; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(nv04->dsa->control); + } + + if (nv04->dirty & NV04_NEW_BLEND) { + nv04->dirty &= ~NV04_NEW_BLEND; + + nv04_emit_blend(nv04); + } + + if (nv04->dirty & NV04_NEW_SAMPLER) { + nv04->dirty &= ~NV04_NEW_SAMPLER; + + nv04_emit_sampler(nv04, 0); + } + + if (nv04->dirty & NV04_NEW_VIEWPORT) { + nv04->dirty &= ~NV04_NEW_VIEWPORT; +// nv04_state_emit_viewport(nv04); + } + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv04_vbo.c + */ + + /* Render target */ +/* BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(rt->stride|(zeta->stride<<16)); + OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + if (fb->zsbuf) { + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + }*/ + + /* Texture images */ + for (i = 0; i < 1; i++) { + if (!(nv04->fp_samplers & (1 << i))) + continue; + struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + } +} + diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c new file mode 100644 index 00000000000..b13ebf9f9b5 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_surface.c @@ -0,0 +1,65 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv04_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/p_tile.h" + +static void +nv04_surface_copy(struct pipe_context *pipe, unsigned do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nouveau_winsys *nvws = nv04->nvws; + + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); +} + +static void +nv04_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nouveau_winsys *nvws = nv04->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv04_init_surface_functions(struct nv04_context *nv04) +{ + nv04->pipe.surface_copy = nv04_surface_copy; + nv04->pipe.surface_fill = nv04_surface_fill; +} diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c new file mode 100644 index 00000000000..fbfe0cf406d --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv04_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct nv04_context *nv04 = nv04_context( pipe ); + struct draw_context *draw = nv04->draw; + unsigned i; + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv04->vertex_buffer[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + nv04->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + /* draw! */ + draw_arrays(nv04->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv04->vertex_buffer[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, nv04->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + +boolean nv04_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return nv04_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + -- cgit v1.2.3 From 97646f10459dd743d9bfd091aec27b28b24222f2 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Tue, 8 Jul 2008 01:30:48 +0200 Subject: nv04: use BEGIN_RING_NI --- src/gallium/drivers/nv04/nv04_prim_vbuf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index daf28e81f56..80a4dbc311c 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -1,8 +1,4 @@ -// XXX this has to go somewhere else -#define NONINC_METHOD 0x40000000 - - #include "draw/draw_vbuf.h" #include "pipe/p_debug.h" #include "pipe/p_util.h" @@ -160,7 +156,7 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con for(j = 0; j<numvert; j++) OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); - BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0) | NONINC_METHOD, (numtri+1)/2 ); + BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); for(j = 0; j<numtri/2; j++ ) OUT_RING(striptbl[j]); if (numtri%2) @@ -190,7 +186,7 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const for(j=0;j<numvert;j++) OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0) | NONINC_METHOD, (numtri+1)/2); + BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); for(j = 0; j<numtri/2; j++) OUT_RING(fantbl[j]); if (numtri%2) -- cgit v1.2.3 From 205101dafb0042ebeeab3dd50f549d8a3d3c08ce Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Tue, 8 Jul 2008 01:34:36 +0200 Subject: nv10: there are no 3D textures. --- src/gallium/drivers/nv10/nv10_miptree.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index f1486a35df5..9a68df29250 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -11,7 +11,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) { struct pipe_texture *pt = &nv10mt->base; boolean swizzled = FALSE; - uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint width = pt->width[0], height = pt->height[0]; uint offset = 0; int nr_faces, l, f; @@ -24,7 +24,6 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) for (l = 0; l <= pt->last_level; l++) { pt->width[l] = width; pt->height[l] = height; - pt->depth[l] = depth; pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); @@ -39,7 +38,6 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt) width = MAX2(1, width >> 1); height = MAX2(1, height >> 1); - depth = MAX2(1, depth >> 1); } -- cgit v1.2.3 From 06d87b44cffef9e335b9a0e70fd2aec61fcd171b Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Tue, 8 Jul 2008 12:51:29 +1000 Subject: nv50: make use of nouveau drm 0.0.11 to get 3d going --- src/gallium/drivers/nouveau/nouveau_bo.h | 2 + src/gallium/drivers/nouveau/nouveau_stateobj.h | 3 + src/gallium/drivers/nv50/nv50_state_validate.c | 6 +- src/gallium/winsys/dri/nouveau/nouveau_bo.c | 16 +++ src/gallium/winsys/dri/nouveau/nouveau_drmif.h | 1 + src/gallium/winsys/dri/nouveau/nouveau_screen.c | 2 +- .../winsys/dri/nouveau/nouveau_winsys_pipe.c | 12 ++ src/gallium/winsys/dri/nouveau/nv50_surface.c | 158 +++++++++++---------- 8 files changed, 120 insertions(+), 80 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_bo.h b/src/gallium/drivers/nouveau/nouveau_bo.h index 18020e9c652..65b138283c4 100644 --- a/src/gallium/drivers/nouveau/nouveau_bo.h +++ b/src/gallium/drivers/nouveau/nouveau_bo.h @@ -35,6 +35,8 @@ #define NOUVEAU_BO_HIGH (1 << 7) #define NOUVEAU_BO_OR (1 << 8) #define NOUVEAU_BO_LOCAL (1 << 9) +#define NOUVEAU_BO_TILED (1 << 10) +#define NOUVEAU_BO_ZTILE (1 << 11) #define NOUVEAU_BO_DUMMY (1 << 31) struct nouveau_bo { diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 78b27380702..998ec2d4ad4 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -136,6 +136,9 @@ so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) struct nouveau_pushbuf *pb = nvws->channel->pushbuf; unsigned i; + if (!so) + return; + i = so->cur_reloc << 1; if (nvws->channel->pushbuf->remaining < i) nvws->push_flush(nvws, i, NULL); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index cf5a071a8d6..88ef685bb1f 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -44,7 +44,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, 0xe6); break; } - so_data(so, 0x00000040); + so_data(so, 0x00000000); so_data(so, 0x00000000); so_method(so, tesla, 0x1224, 1); @@ -82,7 +82,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, 0x16); break; } - so_data(so, 0x00000040); + so_data(so, 0x00000000); so_data(so, 0x00000000); so_method(so, tesla, 0x1538, 1); @@ -265,7 +265,7 @@ scissor_uptodate: so_data (so, 0x2a712488); so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0xd0c05000); + so_data (so, 0xd0005000); so_data (so, 0x00300000); so_data (so, mt->base.width[0]); so_data (so, (mt->base.depth[0] << 16) | diff --git a/src/gallium/winsys/dri/nouveau/nouveau_bo.c b/src/gallium/winsys/dri/nouveau/nouveau_bo.c index 57f5b7d1ae1..b5942994d94 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_bo.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_bo.c @@ -182,6 +182,13 @@ nouveau_bo_new(struct nouveau_device *dev, uint32_t flags, int align, nvbo->drm.alignment = align; nvbo->refcount = 1; + if (flags & NOUVEAU_BO_TILED) { + nvbo->tiled = 1; + if (flags & NOUVEAU_BO_ZTILE) + nvbo->tiled |= 2; + flags &= ~NOUVEAU_BO_TILED; + } + ret = nouveau_bo_set_status(&nvbo->base, flags); if (ret) { free(nvbo); @@ -332,6 +339,12 @@ nouveau_bo_set_status(struct nouveau_bo *bo, uint32_t flags) else if (flags & NOUVEAU_BO_GART) new_flags |= (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI); + + if (nvbo->tiled && flags) { + new_flags |= NOUVEAU_MEM_TILE; + if (nvbo->tiled & 2) + new_flags |= NOUVEAU_MEM_TILE_ZETA; + } if (new_flags) { ret = nouveau_mem_alloc(bo->device, bo->size, @@ -347,9 +360,12 @@ nouveau_bo_set_status(struct nouveau_bo *bo, uint32_t flags) /* Copy old -> new */ /*XXX: use M2MF */ if (nvbo->sysmem || nvbo->map) { + struct nouveau_pushbuf_bo *pbo = nvbo->pending; + nvbo->pending = NULL; nouveau_bo_map(bo, NOUVEAU_BO_RD); memcpy(new_map, bo->map, bo->size); nouveau_bo_unmap(bo); + nvbo->pending = pbo; } /* Free old memory */ diff --git a/src/gallium/winsys/dri/nouveau/nouveau_drmif.h b/src/gallium/winsys/dri/nouveau/nouveau_drmif.h index 5829f649cd5..dcd6a5eb0a4 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_drmif.h +++ b/src/gallium/winsys/dri/nouveau/nouveau_drmif.h @@ -259,6 +259,7 @@ struct nouveau_bo_priv { uint64_t offset; uint64_t flags; + int tiled; }; #define nouveau_bo(n) ((struct nouveau_bo_priv *)(n)) diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.c b/src/gallium/winsys/dri/nouveau/nouveau_screen.c index b15ee7509cc..df1fe7e69b4 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_screen.c @@ -13,7 +13,7 @@ #include "nouveau_screen.h" #include "nouveau_swapbuffers.h" -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 10 +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 11 #error nouveau_drm.h version does not match expected version #endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c index ba5dd2eea4b..765ea36f881 100644 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c @@ -76,6 +76,18 @@ nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) flags |= NOUVEAU_BO_GART; flags |= NOUVEAU_BO_VRAM; + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + flags |= NOUVEAU_BO_TILED; + if (usage & NOUVEAU_BUFFER_USAGE_ZETA) + flags |= NOUVEAU_BO_ZTILE; + break; + default: + break; + } } if (usage & PIPE_BUFFER_USAGE_VERTEX) { diff --git a/src/gallium/winsys/dri/nouveau/nv50_surface.c b/src/gallium/winsys/dri/nouveau/nv50_surface.c index cf76d76cf52..c8ab7f690f3 100644 --- a/src/gallium/winsys/dri/nouveau/nv50_surface.c +++ b/src/gallium/winsys/dri/nouveau/nv50_surface.c @@ -22,52 +22,70 @@ nv50_format(enum pipe_format format) } static int -nv50_surface_copy_prep(struct nouveau_context *nv, - struct pipe_surface *dst, struct pipe_surface *src) +nv50_surface_set(struct nouveau_context *nv, struct pipe_surface *surf, int dst) { struct nouveau_channel *chan = nv->nvc->channel; struct nouveau_grobj *eng2d = nv->nvc->Nv2D; - int surf_format; - - assert(src->format == dst->format); + struct nouveau_bo *bo = nouveau_buffer(surf->buffer)->bo; + int surf_format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + + surf_format = nv50_format(surf->format); + if (surf_format < 0) + return 1; + + if (!nouveau_bo(bo)->tiled) { + BEGIN_RING(chan, eng2d, mthd, 2); + OUT_RING (chan, surf_format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, mthd + 0x14, 5); + OUT_RING (chan, surf->stride); + OUT_RING (chan, surf->width); + OUT_RING (chan, surf->height); + OUT_RELOCh(chan, bo, surf->offset, flags); + OUT_RELOCl(chan, bo, surf->offset, flags); + } else { + BEGIN_RING(chan, eng2d, mthd, 5); + OUT_RING (chan, surf_format); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, mthd + 0x18, 4); + OUT_RING (chan, surf->width); + OUT_RING (chan, surf->height); + OUT_RELOCh(chan, bo, surf->offset, flags); + OUT_RELOCl(chan, bo, surf->offset, flags); + } + +#if 0 + if (dst) { + BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, surf->width); + OUT_RING (chan, surf->height); + } +#endif + + return 0; +} - surf_format = nv50_format(dst->format); - assert(surf_format >= 0); +static int +nv50_surface_copy_prep(struct nouveau_context *nv, + struct pipe_surface *dst, struct pipe_surface *src) +{ + int ret; - BEGIN_RING(chan, eng2d, NV50_2D_DMA_IN_MEMORY0, 2); - OUT_RELOCo(chan, nouveau_buffer(src->buffer)->bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + assert(src->format == dst->format); - BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); - OUT_RING (chan, surf_format); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 5); - OUT_RING (chan, dst->stride); - OUT_RING (chan, dst->width); - OUT_RING (chan, dst->height); - OUT_RELOCh(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, dst->width); - OUT_RING (chan, dst->height); + ret = nv50_surface_set(nv, dst, 1); + if (ret) + return ret; - BEGIN_RING(chan, eng2d, NV50_2D_SRC_FORMAT, 2); - OUT_RING (chan, surf_format); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, NV50_2D_SRC_PITCH, 5); - OUT_RING (chan, src->stride); - OUT_RING (chan, src->width); - OUT_RING (chan, src->height); - OUT_RELOCh(chan, nouveau_buffer(src->buffer)->bo, src->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(chan, nouveau_buffer(src->buffer)->bo, src->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + ret = nv50_surface_set(nv, src, 0); + if (ret) + return ret; return 0; } @@ -79,17 +97,19 @@ nv50_surface_copy(struct nouveau_context *nv, unsigned dx, unsigned dy, struct nouveau_channel *chan = nv->nvc->channel; struct nouveau_grobj *eng2d = nv->nvc->Nv2D; - BEGIN_RING(chan, eng2d, 0x0110, 1); + BEGIN_RING(chan, eng2d, 0x088c, 1); OUT_RING (chan, 0); - BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 12); + BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4); OUT_RING (chan, dx); OUT_RING (chan, dy); OUT_RING (chan, w); OUT_RING (chan, h); + BEGIN_RING(chan, eng2d, 0x08c0, 4); OUT_RING (chan, 0); OUT_RING (chan, 1); OUT_RING (chan, 0); OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, 0x08d0, 4); OUT_RING (chan, 0); OUT_RING (chan, sx); OUT_RING (chan, 0); @@ -109,35 +129,15 @@ nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, { struct nouveau_channel *chan = nv->nvc->channel; struct nouveau_grobj *eng2d = nv->nvc->Nv2D; - int surf_format, rect_format; - - surf_format = nv50_format(dst->format); - if (surf_format < 0) - return 1; + int rect_format, ret; rect_format = nv50_format(dst->format); if (rect_format < 0) return 1; - BEGIN_RING(chan, eng2d, NV50_2D_DMA_IN_MEMORY1, 1); - OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); - OUT_RING (chan, surf_format); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 5); - OUT_RING (chan, dst->stride); - OUT_RING (chan, dst->width); - OUT_RING (chan, dst->height); - OUT_RELOCh(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, dst->width); - OUT_RING (chan, dst->height); + ret = nv50_surface_set(nv, dst, 1); + if (ret) + return ret; BEGIN_RING(chan, eng2d, 0x0580, 3); OUT_RING (chan, 4); @@ -151,27 +151,33 @@ nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, OUT_RING (chan, dy + h); FIRE_RING(chan); - return 0; } int nouveau_surface_channel_create_nv50(struct nouveau_channel_context *nvc) { + struct nouveau_channel *chan = nvc->channel; + struct nouveau_grobj *eng2d = NULL; int ret; - ret = nouveau_grobj_alloc(nvc->channel, nvc->next_handle++, NV50_2D, - &nvc->Nv2D); + ret = nouveau_grobj_alloc(chan, nvc->next_handle++, NV50_2D, &eng2d); if (ret) return ret; - BIND_RING (nvc->channel, nvc->Nv2D, nvc->next_subchannel++); - BEGIN_RING(nvc->channel, nvc->Nv2D, NV50_2D_DMA_NOTIFY, 1); - OUT_RING (nvc->channel, nvc->sync_notifier->handle); - BEGIN_RING(nvc->channel, nvc->Nv2D, NV50_2D_DMA_IN_MEMORY0, 2); - OUT_RING (nvc->channel, nvc->channel->vram->handle); - OUT_RING (nvc->channel, nvc->channel->vram->handle); - BEGIN_RING(nvc->channel, nvc->Nv2D, NV50_2D_OPERATION, 1); - OUT_RING (nvc->channel, NV50_2D_OPERATION_SRCCOPY); + nvc->Nv2D = eng2d; + + BIND_RING (chan, eng2d, nvc->next_subchannel++); + BEGIN_RING(chan, eng2d, NV50_2D_DMA_NOTIFY, 4); + OUT_RING (chan, nvc->sync_notifier->handle); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + BEGIN_RING(chan, eng2d, NV50_2D_OPERATION, 1); + OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY); + BEGIN_RING(chan, eng2d, 0x0290, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, 0x0888, 1); + OUT_RING (chan, 1); return 0; } -- cgit v1.2.3 From 28268f7b22905974a2cc3c099b1a842d20a7712a Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Tue, 8 Jul 2008 16:38:27 +0200 Subject: nv30: use native instructions. --- src/gallium/drivers/nv30/nv30_fragprog.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 54d6bea55f7..740d3ab430d 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -510,9 +510,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; // case TGSI_OPCODE_LIT: case TGSI_OPCODE_LRP: - tmp = temp(fpc); - arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); - arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAD: arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); @@ -530,13 +528,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: - tmp = temp(fpc); - arith(fpc, 0, LG2, tmp, MASK_X, - swz(src[0], X, X, X, X), none, none); - arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(fpc, sat, EX2, dst, mask, - swz(tmp, X, X, X, X), none, none); + arith(fpc, sat, POW, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_RCP: arith(fpc, sat, RCP, dst, mask, src[0], none, none); @@ -545,20 +537,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, assert(0); break; case TGSI_OPCODE_RFL: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); - arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); - arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, - swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); - arith(fpc, sat, MAD, dst, mask, - swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_RSQ: - tmp = temp(fpc); - arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, - abs(swz(src[0], X, X, X, X)), none, none); - arith(fpc, sat, EX2, dst, mask, - neg(swz(tmp, X, X, X, X)), none, none); + arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: if (mask & MASK_X) { -- cgit v1.2.3 From 511693d00c5f8641375efd16e08289586d62e562 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Tue, 8 Jul 2008 20:06:57 +0200 Subject: nv04: use FREE/MALLOC like a good gallium boy should. --- src/gallium/drivers/nv04/nv04_miptree.c | 6 +++--- src/gallium/drivers/nv04/nv04_prim_vbuf.c | 2 +- src/gallium/drivers/nv04/nv04_state.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index ab2d2386bee..97f679731ea 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -62,7 +62,7 @@ nv04_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { - free(mt); + FREE(mt); return NULL; } @@ -83,9 +83,9 @@ nv04_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) pipe_buffer_reference(ws, &nv04mt->buffer, NULL); for (l = 0; l <= mt->last_level; l++) { if (nv04mt->level[l].image_offset) - free(nv04mt->level[l].image_offset); + FREE(nv04mt->level[l].image_offset); } - free(nv04mt); + FREE(nv04mt); } } diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index 80a4dbc311c..d3963d1f59d 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -57,7 +57,7 @@ nv04_vbuf_render_allocate_vertices( struct vbuf_render *render, { struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); - nv04_render->buffer = (unsigned char*) malloc(VERTEX_BUFFER_SIZE); + nv04_render->buffer = (unsigned char*) MALLOC(VERTEX_BUFFER_SIZE); assert(!nv04_render->buffer); return nv04_render->buffer; diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index 39d7e8b42d0..d618465a201 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -14,7 +14,7 @@ nv04_blend_state_create(struct pipe_context *pipe, { struct nv04_blend_state *cb; - cb = malloc(sizeof(struct nv04_blend_state)); + cb = MALLOC(sizeof(struct nv04_blend_state)); cb->b_enable = cso->blend_enable ? 1 : 0; cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | @@ -81,7 +81,7 @@ nv04_sampler_state_create(struct pipe_context *pipe, struct nv04_sampler_state *ss; uint32_t filter = 0; - ss = malloc(sizeof(struct nv04_sampler_state)); + ss = MALLOC(sizeof(struct nv04_sampler_state)); ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); @@ -178,7 +178,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe, * scissor * points/lines (no hw support, emulated with tris in gallium) */ - rs = malloc(sizeof(struct nv04_rasterizer_state)); + rs = MALLOC(sizeof(struct nv04_rasterizer_state)); rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; @@ -225,7 +225,7 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv04_depth_stencil_alpha_state *hw; - hw = malloc(sizeof(struct nv04_depth_stencil_alpha_state)); + hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); hw->control = float_to_ubyte(cso->alpha.ref); hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); -- cgit v1.2.3 From a62a738f3df054ef802f9ea7bc778d468fd0a4ba Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Tue, 8 Jul 2008 20:26:04 +0200 Subject: nv10: use the gallium alloc/free wrappers. --- src/gallium/drivers/nv10/nv10_state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 11664fae2af..9b8b7801cdf 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -14,7 +14,7 @@ nv10_blend_state_create(struct pipe_context *pipe, { struct nv10_blend_state *cb; - cb = malloc(sizeof(struct nv10_blend_state)); + cb = MALLOC(sizeof(struct nv10_blend_state)); cb->b_enable = cso->blend_enable ? 1 : 0; cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | @@ -88,7 +88,7 @@ nv10_sampler_state_create(struct pipe_context *pipe, struct nv10_sampler_state *ps; uint32_t filter = 0; - ps = malloc(sizeof(struct nv10_sampler_state)); + ps = MALLOC(sizeof(struct nv10_sampler_state)); ps->wrap = ((wrap_mode(cso->wrap_s) << NV10TCL_TX_FORMAT_WRAP_S_SHIFT) | (wrap_mode(cso->wrap_t) << NV10TCL_TX_FORMAT_WRAP_T_SHIFT)); @@ -249,7 +249,7 @@ nv10_rasterizer_state_create(struct pipe_context *pipe, * multisample * offset_units / offset_scale */ - rs = malloc(sizeof(struct nv10_rasterizer_state)); + rs = MALLOC(sizeof(struct nv10_rasterizer_state)); rs->templ = cso; @@ -335,7 +335,7 @@ nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv10_depth_stencil_alpha_state *hw; - hw = malloc(sizeof(struct nv10_depth_stencil_alpha_state)); + hw = MALLOC(sizeof(struct nv10_depth_stencil_alpha_state)); hw->depth.func = nvgl_comparison_op(cso->depth.func); hw->depth.write_enable = cso->depth.writemask ? 1 : 0; -- cgit v1.2.3 From 456550293a50d72c02ed5ec930c12899e7458a6f Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Wed, 9 Jul 2008 21:51:22 +0200 Subject: nv30: update nouveau_class.h --- src/gallium/drivers/nouveau/nouveau_class.h | 61 ++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 3c29fa0d1be..4ec5062709e 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -3759,8 +3759,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_COLOR1_PITCH 0x0000021c #define NV34TCL_RT_ENABLE 0x00000220 #define NV34TCL_RT_ENABLE_MRT (1 << 4) -#define NV34TCL_RT_ENABLE_COLOR3 (1 << 3) -#define NV34TCL_RT_ENABLE_COLOR2 (1 << 2) #define NV34TCL_RT_ENABLE_COLOR1 (1 << 1) #define NV34TCL_RT_ENABLE_COLOR0 (1 << 0) #define NV34TCL_LMA_DEPTH_PITCH 0x0000022c @@ -3876,6 +3874,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 #define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 #define NV34TCL_BLEND_COLOR 0x0000031c +#define NV34TCL_BLEND_COLOR_B_SHIFT 0 +#define NV34TCL_BLEND_COLOR_B_MASK 0x000000ff +#define NV34TCL_BLEND_COLOR_G_SHIFT 8 +#define NV34TCL_BLEND_COLOR_G_MASK 0x0000ff00 +#define NV34TCL_BLEND_COLOR_R_SHIFT 16 +#define NV34TCL_BLEND_COLOR_R_MASK 0x00ff0000 +#define NV34TCL_BLEND_COLOR_A_SHIFT 24 +#define NV34TCL_BLEND_COLOR_A_MASK 0xff000000 #define NV34TCL_BLEND_EQUATION 0x00000320 #define NV34TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 #define NV34TCL_BLEND_EQUATION_MIN 0x00008007 @@ -4338,6 +4344,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_FRONT_FACE_CCW 0x00000901 #define NV34TCL_POLYGON_SMOOTH_ENABLE 0x00001838 #define NV34TCL_CULL_FACE_ENABLE 0x0000183c +#define NV34TCL_TX_PALETTE_OFFSET(x) (0x00001840+((x)*4)) +#define NV34TCL_TX_PALETTE_OFFSET__SIZE 0x00000004 #define NV34TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8)) #define NV34TCL_VTX_ATTR_2F_X__SIZE 0x00000010 #define NV34TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8)) @@ -4376,7 +4384,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_TX_FORMAT__SIZE 0x00000004 #define NV34TCL_TX_FORMAT_DMA0 (1 << 0) #define NV34TCL_TX_FORMAT_DMA1 (1 << 1) -#define NV34TCL_TX_FORMAT_CUBE_MAP (1 << 2) +#define NV34TCL_TX_FORMAT_CUBIC (1 << 2) +#define NV34TCL_TX_FORMAT_NO_BORDER (1 << 3) #define NV34TCL_TX_FORMAT_DIMS_SHIFT 4 #define NV34TCL_TX_FORMAT_DIMS_MASK 0x000000f0 #define NV34TCL_TX_FORMAT_DIMS_1D 0x00000010 @@ -4389,6 +4398,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200 #define NV34TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300 #define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400 +#define NV34TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000500 #define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600 #define NV34TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700 #define NV34TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00 @@ -4396,22 +4406,26 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00 #define NV34TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00 #define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000 +#define NV34TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100 #define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200 #define NV34TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300 #define NV34TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00 #define NV34TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00 -#define NV34TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00 #define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00 +#define NV34TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00 #define NV34TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000 #define NV34TCL_TX_FORMAT_FORMAT_A16 0x00003200 +#define NV34TCL_TX_FORMAT_FORMAT_HILO16 0x00003300 #define NV34TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500 +#define NV34TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00003600 +#define NV34TCL_TX_FORMAT_FORMAT_HILO8 0x00004400 +#define NV34TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500 +#define NV34TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00004600 +#define NV34TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700 #define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00 #define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00 #define NV34TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00 -#define NV34TCL_TX_FORMAT_NPOT (1 << 12) -#define NV34TCL_TX_FORMAT_RECT (1 << 14) -#define NV34TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT 16 -#define NV34TCL_TX_FORMAT_MIPMAP_LEVELS_MASK 0x000f0000 +#define NV34TCL_TX_FORMAT_MIPMAP (1 << 19) #define NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20 #define NV34TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000 #define NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24 @@ -4428,12 +4442,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004 #define NV34TCL_TX_WRAP_S_CLAMP 0x00000005 #define NV34TCL_TX_WRAP_T_SHIFT 8 -#define NV34TCL_TX_WRAP_T_MASK 0x0000ff00 +#define NV34TCL_TX_WRAP_T_MASK 0x00000f00 #define NV34TCL_TX_WRAP_T_REPEAT 0x00000100 #define NV34TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200 #define NV34TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300 #define NV34TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400 #define NV34TCL_TX_WRAP_T_CLAMP 0x00000500 +#define NV34TCL_TX_WRAP_EXPAND_NORMAL_SHIFT 12 +#define NV34TCL_TX_WRAP_EXPAND_NORMAL_MASK 0x0000f000 #define NV34TCL_TX_WRAP_R_SHIFT 16 #define NV34TCL_TX_WRAP_R_MASK 0x00ff0000 #define NV34TCL_TX_WRAP_R_REPEAT 0x00010000 @@ -4441,8 +4457,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000 #define NV34TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000 #define NV34TCL_TX_WRAP_R_CLAMP 0x00050000 +#define NV34TCL_TX_WRAP_RCOMP_SHIFT 28 +#define NV34TCL_TX_WRAP_RCOMP_MASK 0xf0000000 +#define NV34TCL_TX_WRAP_RCOMP_NEVER 0x00000000 +#define NV34TCL_TX_WRAP_RCOMP_GREATER 0x10000000 +#define NV34TCL_TX_WRAP_RCOMP_EQUAL 0x20000000 +#define NV34TCL_TX_WRAP_RCOMP_GEQUAL 0x30000000 +#define NV34TCL_TX_WRAP_RCOMP_LESS 0x40000000 +#define NV34TCL_TX_WRAP_RCOMP_NOTEQUAL 0x50000000 +#define NV34TCL_TX_WRAP_RCOMP_LEQUAL 0x60000000 +#define NV34TCL_TX_WRAP_RCOMP_ALWAYS 0x70000000 #define NV34TCL_TX_ENABLE(x) (0x00001a0c+((x)*32)) #define NV34TCL_TX_ENABLE__SIZE 0x00000004 +#define NV34TCL_TX_ENABLE_ANISO_SHIFT 4 +#define NV34TCL_TX_ENABLE_ANISO_MASK 0x00000030 +#define NV34TCL_TX_ENABLE_ANISO_NONE 0x00000000 +#define NV34TCL_TX_ENABLE_ANISO_2X 0x00000010 +#define NV34TCL_TX_ENABLE_ANISO_4X 0x00000020 +#define NV34TCL_TX_ENABLE_ANISO_8X 0x00000030 +#define NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14 +#define NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000 +#define NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26 +#define NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000 #define NV34TCL_TX_ENABLE_ENABLE (1 << 30) #define NV34TCL_TX_SWIZZLE(x) (0x00001a10+((x)*32)) #define NV34TCL_TX_SWIZZLE__SIZE 0x00000004 @@ -4494,6 +4530,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000 #define NV34TCL_TX_FILTER(x) (0x00001a14+((x)*32)) #define NV34TCL_TX_FILTER__SIZE 0x00000004 +#define NV34TCL_TX_FILTER_LOD_BIAS_SHIFT 8 +#define NV34TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00 #define NV34TCL_TX_FILTER_MINIFY_SHIFT 16 #define NV34TCL_TX_FILTER_MINIFY_MASK 0x000f0000 #define NV34TCL_TX_FILTER_MINIFY_NEAREST 0x00010000 @@ -4539,6 +4577,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_SHIFT 0 #define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_MASK 0x0000000f #define NV34TCL_MULTISAMPLE_CONTROL 0x00001d7c +#define NV34TCL_MULTISAMPLE_CONTROL_ENABLE (1 << 0) +#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_COVERAGE (1 << 4) +#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_ONE (1 << 8) +#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE_SHIFT 16 +#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE_MASK 0xffff0000 #define NV34TCL_CLEAR_DEPTH_VALUE 0x00001d8c #define NV34TCL_CLEAR_COLOR_VALUE 0x00001d90 #define NV34TCL_CLEAR_COLOR_VALUE_B_SHIFT 0 -- cgit v1.2.3 From 7a838ef411d2f4716bdcbcad9f593541c43a2ce7 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Wed, 9 Jul 2008 22:03:59 +0200 Subject: nv30: Update defines from nouveau_class.h --- src/gallium/drivers/nv30/nv30_state.c | 46 ++++++++++++++--------------------- 1 file changed, 18 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 97becd2be37..72b9515eb1a 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -120,38 +120,28 @@ nv30_sampler_state_create(struct pipe_context *pipe, ps = MALLOC(sizeof(struct nv30_sampler_state)); ps->fmt = 0; + /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format + are the tx format to use. We should store normalized coord flag + in sampler state structure, and set appropriate format in + nvxx_fragtex_build() + */ if (!cso->normalized_coords) - ps->fmt |= NV34TCL_TX_FORMAT_RECT; + ps->fmt |= (1<<14) /*NV34TCL_TX_FORMAT_RECT*/; ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); ps->en = 0; + + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else if (cso->max_anisotropy >= 2.0) { - /* no idea, binary driver sets it, works without it.. meh.. */ - ps->wrap |= (1 << 5); - -/* if (cso->max_anisotropy >= 16.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4.0) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; - } else { - ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; - }*/ + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; } switch (cso->mag_img_filter) { @@ -198,7 +188,7 @@ nv30_sampler_state_create(struct pipe_context *pipe, ps->filt = filter; - /*{ + { float limit; limit = CLAMP(cso->lod_bias, -16.0, 15.0); @@ -209,9 +199,9 @@ nv30_sampler_state_create(struct pipe_context *pipe, limit = CLAMP(cso->min_lod, 0.0, 15.0); ps->en |= (int)(limit * 256.0) << 19; - }*/ + } -/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { switch (cso->compare_func) { case PIPE_FUNC_NEVER: ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; @@ -240,7 +230,7 @@ nv30_sampler_state_create(struct pipe_context *pipe, default: break; } - }*/ + } ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | (float_to_ubyte(cso->border_color[0]) << 16) | -- cgit v1.2.3 From 2fed964a27f208f69edc0b2d9a2d0b0717184ee5 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Wed, 9 Jul 2008 22:13:01 +0200 Subject: nv30: Update defines from nouveau_class.h --- src/gallium/drivers/nv30/nv30_fragtex.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 47f3ff047db..92b3f939297 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -87,6 +87,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) struct nv30_texture_format *tf; uint32_t txf, txs, txp; int swizzled = 0; /*XXX: implement in region code? */ + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; tf = nv30_fragtex_format(pt->format); if (!tf || !tf->defined) { @@ -99,20 +100,20 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txf |= log2i(pt->width[0]) << 20; txf |= log2i(pt->height[0]) << 24; txf |= log2i(pt->depth[0]) << 28; - txf |= 8; + txf |= NV34TCL_TX_FORMAT_NO_BORDER; switch (pt->target) { -/* case PIPE_TEXTURE_CUBE: - txf |= NV34TCL_TEX_FORMAT_CUBIC;*/ + case PIPE_TEXTURE_CUBE: + txf |= NV34TCL_TX_FORMAT_CUBIC; /* fall-through */ case PIPE_TEXTURE_2D: - txf |= (2<<4); + txf |= NV34TCL_TX_FORMAT_DIMS_2D; break; case PIPE_TEXTURE_3D: - txf |= (3<<4); + txf |= NV34TCL_TX_FORMAT_DIMS_3D; break; case PIPE_TEXTURE_1D: - txf |= (1<<4); + txf |= NV34TCL_TX_FORMAT_DIMS_1D; break; default: NOUVEAU_ERR("Unknown target %d\n", pt->target); @@ -122,13 +123,13 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txs = tf->swizzle; BEGIN_RING(rankine, NV34TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv30mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv30mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RELOCl(nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW); + OUT_RELOCd(nv30mt->buffer,txf, tex_flags | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ + OUT_RING (NV34TCL_TX_ENABLE_ENABLE | ps->en); OUT_RING (txs); OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING ((pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height[0]); OUT_RING (ps->bcol); } -- cgit v1.2.3 From 225863aeb5f2dfe4980ae5887f5623ecb05e9ced Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Wed, 9 Jul 2008 23:23:39 +0200 Subject: nv30: min/max lod are used for mipmap, there is just enable bit in tx_format --- src/gallium/drivers/nv30/nv30_fragtex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 92b3f939297..f253087c884 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -96,11 +96,11 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) } txf = tf->format << 8; - txf |= (pt->last_level + 1) << 16; + txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); txf |= log2i(pt->width[0]) << 20; txf |= log2i(pt->height[0]) << 24; txf |= log2i(pt->depth[0]) << 28; - txf |= NV34TCL_TX_FORMAT_NO_BORDER; + txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; switch (pt->target) { case PIPE_TEXTURE_CUBE: -- cgit v1.2.3 From 861629d1fd4a1d256c913470c33d9522e83d615d Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 00:15:01 +1000 Subject: nv50: more "abuse" by using libc malloc etc.. --- src/gallium/drivers/nv50/nv50_context.c | 2 +- src/gallium/drivers/nv50/nv50_draw.c | 2 +- src/gallium/drivers/nv50/nv50_program.c | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index a225c4bf728..2494784a94e 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -21,7 +21,7 @@ nv50_destroy(struct pipe_context *pipe) struct nv50_context *nv50 = (struct nv50_context *)pipe; draw_destroy(nv50->draw); - free(nv50); + FREE(nv50); } diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c index d185d999502..2b0c0384a37 100644 --- a/src/gallium/drivers/nv50/nv50_draw.c +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -46,7 +46,7 @@ nv50_render_reset_stipple_counter(struct draw_stage *stage) static void nv50_render_destroy(struct draw_stage *stage) { - free(stage); + FREE(stage); } struct draw_stage * diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index cd6967b3668..ff81719f03e 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -180,8 +180,8 @@ kill_temp_temp(struct nv50_pc *pc) static int ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) { - pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * - sizeof(float)); + pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), + (pc->immd_nr + 1) * 4 * sizeof(float)); pc->immd_buf[(pc->immd_nr * 4) + 0] = x; pc->immd_buf[(pc->immd_nr * 4) + 1] = y; pc->immd_buf[(pc->immd_nr * 4) + 2] = z; @@ -1276,7 +1276,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } if (pc->temp_nr) { - pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); + pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); if (!pc->temp) goto out_err; @@ -1293,7 +1293,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct nv50_reg *iv = NULL; int aid = 0; - pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); + pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); if (!pc->attr) goto out_err; @@ -1339,7 +1339,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->result_nr) { int rid = 0; - pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); + pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); if (!pc->result) goto out_err; @@ -1360,7 +1360,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->param_nr) { int rid = 0; - pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); + pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); if (!pc->param) goto out_err; @@ -1376,7 +1376,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->immd_nr) { int rid = pc->param_nr * 4; - pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); + pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); if (!pc->immd) goto out_err; -- cgit v1.2.3 From add89c78455f04654c3706d46e3d3e6b92b73b71 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 00:21:42 +1000 Subject: nouveau: update to latest object header --- src/gallium/drivers/nouveau/nouveau_class.h | 52 +++++++++++++++++++++-------- src/gallium/drivers/nv50/nv50_screen.c | 12 +++---- 2 files changed, 44 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 4ec5062709e..18d8d3677db 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -1798,6 +1798,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000100 #define NV10TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000180 #define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000200 +#define NV10TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000280 #define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000300 #define NV10TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000380 #define NV10TCL_TX_FORMAT_FORMAT_INDEX8 0x00000580 @@ -1805,15 +1806,22 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV10TCL_TX_FORMAT_FORMAT_DXT3 0x00000700 #define NV10TCL_TX_FORMAT_FORMAT_DXT5 0x00000780 #define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00000800 +#define NV10TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00000880 #define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00000900 #define NV10TCL_TX_FORMAT_FORMAT_L8_RECT 0x00000980 #define NV10TCL_TX_FORMAT_FORMAT_A8L8 0x00000d00 #define NV10TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00000d80 -#define NV10TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00000f00 #define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00000e80 +#define NV10TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00000f00 #define NV10TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00001000 #define NV10TCL_TX_FORMAT_FORMAT_A16 0x00001900 +#define NV10TCL_TX_FORMAT_FORMAT_HILO16 0x00001980 #define NV10TCL_TX_FORMAT_FORMAT_A16_RECT 0x00001a80 +#define NV10TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00001b00 +#define NV10TCL_TX_FORMAT_FORMAT_HILO8 0x00002200 +#define NV10TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00002280 +#define NV10TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00002300 +#define NV10TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00002380 #define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00002500 #define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00002580 #define NV10TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00002600 @@ -5486,18 +5494,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50TCL_NOP 0x00000100 #define NV50TCL_NOTIFY 0x00000104 #define NV50TCL_DMA_NOTIFY 0x00000180 -#define NV50TCL_DMA_IN_MEMORY0(x) (0x00000184+((x)*4)) -#define NV50TCL_DMA_IN_MEMORY0__SIZE 0x0000000b -#define NV50TCL_DMA_IN_MEMORY1(x) (0x000001c0+((x)*4)) -#define NV50TCL_DMA_IN_MEMORY1__SIZE 0x00000008 +#define NV50TCL_DMA_UNK0(x) (0x00000184+((x)*4)) +#define NV50TCL_DMA_UNK0__SIZE 0x0000000b +#define NV50TCL_DMA_UNK1(x) (0x000001c0+((x)*4)) +#define NV50TCL_DMA_UNK1__SIZE 0x00000008 #define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32)) #define NV50TCL_RT_ADDRESS_HIGH__SIZE 0x00000008 #define NV50TCL_RT_ADDRESS_LOW(x) (0x00000204+((x)*32)) #define NV50TCL_RT_ADDRESS_LOW__SIZE 0x00000008 #define NV50TCL_RT_FORMAT(x) (0x00000208+((x)*32)) #define NV50TCL_RT_FORMAT__SIZE 0x00000008 -#define NV50TCL_RT_UNK3(x) (0x0000020c+((x)*32)) -#define NV50TCL_RT_UNK3__SIZE 0x00000008 +#define NV50TCL_RT_FORMAT_32BPP 0x000000cf +#define NV50TCL_RT_FORMAT_24BPP 0x000000e6 +#define NV50TCL_RT_FORMAT_16BPP 0x000000e8 +#define NV50TCL_RT_FORMAT_8BPP 0x000000f3 +#define NV50TCL_RT_FORMAT_15BPP 0x000000f8 +#define NV50TCL_RT_TILE_UNK(x) (0x0000020c+((x)*32)) +#define NV50TCL_RT_TILE_UNK__SIZE 0x00000008 #define NV50TCL_RT_UNK4(x) (0x00000210+((x)*32)) #define NV50TCL_RT_UNK4__SIZE 0x00000008 #define NV50TCL_VTX_ATTR_1F(x) (0x00000300+((x)*4)) @@ -5602,9 +5615,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50TCL_SCISSOR_VERT_T_MASK 0x0000ffff #define NV50TCL_SCISSOR_VERT_B_SHIFT 16 #define NV50TCL_SCISSOR_VERT_B_MASK 0xffff0000 -#define NV50TCL_VP_UPLOAD_CONST_ID 0x00000f00 -#define NV50TCL_VP_UPLOAD_CONST(x) (0x00000f04+((x)*4)) -#define NV50TCL_VP_UPLOAD_CONST__SIZE 0x00000010 +#define NV50TCL_CB_ADDR 0x00000f00 +#define NV50TCL_CB_ADDR_ID_SHIFT 8 +#define NV50TCL_CB_ADDR_ID_MASK 0xffffff00 +#define NV50TCL_CB_ADDR_BUFFER_SHIFT 0 +#define NV50TCL_CB_ADDR_BUFFER_MASK 0x000000ff +#define NV50TCL_CB_DATA(x) (0x00000f04+((x)*4)) +#define NV50TCL_CB_DATA__SIZE 0x00000010 #define NV50TCL_STENCIL_FRONT_FUNC_REF 0x00000f54 #define NV50TCL_STENCIL_FRONT_MASK 0x00000f58 #define NV50TCL_STENCIL_FRONT_FUNC_MASK 0x00000f5c @@ -5626,6 +5643,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50TCL_RT_HORIZ__SIZE 0x00000008 #define NV50TCL_RT_VERT(x) (0x00001244+((x)*8)) #define NV50TCL_RT_VERT__SIZE 0x00000008 +#define NV50TCL_CB_DEF_ADDRESS_HIGH 0x00001280 +#define NV50TCL_CB_DEF_ADDRESS_LOW 0x00001284 +#define NV50TCL_CB_DEF_SET 0x00001288 +#define NV50TCL_CB_DEF_SET_SIZE_SHIFT 0 +#define NV50TCL_CB_DEF_SET_SIZE_MASK 0x0000ffff +#define NV50TCL_CB_DEF_SET_BUFFER_SHIFT 16 +#define NV50TCL_CB_DEF_SET_BUFFER_MASK 0xffff0000 #define NV50TCL_DEPTH_TEST_ENABLE 0x000012cc #define NV50TCL_SHADE_MODEL 0x000012d4 #define NV50TCL_SHADE_MODEL_FLAT 0x00001d00 @@ -5779,12 +5803,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV50TCL_GP_START_ID 0x00001410 #define NV50TCL_FP_START_ID 0x00001414 #define NV50TCL_POINT_SIZE 0x00001518 -#define NV50TCL_TEX_CB0_ADDRESS_HIGH 0x0000155c -#define NV50TCL_TEX_CB0_ADDRESS_LOW 0x00001560 +#define NV50TCL_TSC_ADDRESS_HIGH 0x0000155c +#define NV50TCL_TSC_ADDRESS_LOW 0x00001560 #define NV50TCL_POLYGON_OFFSET_FACTOR 0x0000156c #define NV50TCL_LINE_SMOOTH_ENABLE 0x00001570 -#define NV50TCL_TEX_CB1_ADDRESS_HIGH 0x00001574 -#define NV50TCL_TEX_CB1_ADDRESS_LOW 0x00001578 +#define NV50TCL_TIC_ADDRESS_HIGH 0x00001574 +#define NV50TCL_TIC_ADDRESS_LOW 0x00001578 #define NV50TCL_STENCIL_FRONT_ENABLE 0x00001594 #define NV50TCL_STENCIL_FRONT_OP_FAIL 0x00001598 #define NV50TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 5b4c5f96ac3..e4c09a99d8b 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -189,13 +189,13 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 1); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); - so_method(so, screen->tesla, NV50TCL_DMA_IN_MEMORY0(0), - NV50TCL_DMA_IN_MEMORY0__SIZE); - for (i = 0; i < NV50TCL_DMA_IN_MEMORY0__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), + NV50TCL_DMA_UNK0__SIZE); + for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) so_data(so, nvws->channel->vram->handle); - so_method(so, screen->tesla, NV50TCL_DMA_IN_MEMORY1(0), - NV50TCL_DMA_IN_MEMORY1__SIZE); - for (i = 0; i < NV50TCL_DMA_IN_MEMORY1__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), + NV50TCL_DMA_UNK1__SIZE); + for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) so_data(so, nvws->channel->vram->handle); so_method(so, screen->tesla, 0x121c, 1); so_data (so, 1); -- cgit v1.2.3 From 7a81ffa1154e6efd09fe91a944514a6f56369b4a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 01:53:28 +1000 Subject: nv50: get clear-scissor working --- src/gallium/drivers/nv50/nv50_context.h | 1 + src/gallium/drivers/nv50/nv50_state_validate.c | 44 ++++++++++++++++++++------ 2 files changed, 36 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 4ea01009fb6..9b77fc1225d 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -86,6 +86,7 @@ struct nv50_state { struct nouveau_stateobj *scissor; unsigned scissor_enabled; struct nouveau_stateobj *viewport; + unsigned viewport_bypass; struct nouveau_stateobj *tsc_upload; struct nouveau_stateobj *tic_upload; struct nouveau_stateobj *vertprog; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 88ef685bb1f..bd1f048f878 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -228,17 +228,43 @@ nv50_state_validate(struct nv50_context *nv50) scissor_uptodate: if (nv50->dirty & NV50_NEW_VIEWPORT) { - so = so_new(8, 0); - so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); - so_data (so, fui(nv50->viewport.translate[0])); - so_data (so, fui(nv50->viewport.translate[1])); - so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); - so_data (so, fui(nv50->viewport.scale[0])); - so_data (so, fui(-nv50->viewport.scale[1])); - so_data (so, fui(nv50->viewport.scale[2])); + unsigned bypass; + + if (!nv50->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv50->state.viewport && + (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) && + nv50->state.viewport_bypass == bypass) + goto viewport_uptodate; + nv50->state.viewport_bypass = bypass; + + so = so_new(12, 0); + if (!bypass) { + so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); + so_data (so, fui(nv50->viewport.translate[0])); + so_data (so, fui(nv50->viewport.translate[1])); + so_data (so, fui(nv50->viewport.translate[2])); + so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); + so_data (so, fui(nv50->viewport.scale[0])); + so_data (so, fui(-nv50->viewport.scale[1])); + so_data (so, fui(nv50->viewport.scale[2])); + so_method(so, tesla, 0x192c, 1); + so_data (so, 1); + so_method(so, tesla, 0x0f90, 1); + so_data (so, 0); + } else { + so_method(so, tesla, 0x192c, 1); + so_data (so, 0); + so_method(so, tesla, 0x0f90, 1); + so_data (so, 1); + } + so_ref(so, &nv50->state.viewport); } +viewport_uptodate: if (nv50->dirty & NV50_NEW_SAMPLER) { int i; -- cgit v1.2.3 From 17e95de85250540c8e2448f138d81b7b055be511 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 02:01:23 +1000 Subject: nv50: enable GART usage for vertex buffers AKA "I can haz fast b0rk3d glxgears!!!" --- src/gallium/drivers/nv50/nv50_screen.c | 1 + src/gallium/drivers/nv50/nv50_vbo.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index e4c09a99d8b..9f1fba6e42d 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -90,6 +90,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; case NOUVEAU_CAP_HW_VTXBUF: + return 1; case NOUVEAU_CAP_HW_IDXBUF: return 0; default: diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index c16bfe3a937..0318b9b5503 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -204,11 +204,11 @@ nv50_vbo_validate(struct nv50_context *nv50) so_method(vtxbuf, tesla, 0x900 + (i * 16), 3); so_data (vtxbuf, 0x20000000 | vb->pitch); so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + - ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + - ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); } so_ref (vtxfmt, &nv50->state.vtxfmt); -- cgit v1.2.3 From 857a3294a959015bf893241199f7fd7f7882a6ab Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 20:44:39 +1000 Subject: nv50: add license headers to .c files --- src/gallium/drivers/nv50/nv50_clear.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_context.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_draw.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_miptree.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_program.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_query.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_screen.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_state.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_state_validate.c | 22 ++++++++++++ src/gallium/drivers/nv50/nv50_surface.c | 46 +++++++++++--------------- src/gallium/drivers/nv50/nv50_vbo.c | 22 ++++++++++++ 11 files changed, 240 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c index f35087b37ef..fbc6cd09cee 100644 --- a/src/gallium/drivers/nv50/nv50_clear.c +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 2494784a94e..07987c7d02d 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c index 2b0c0384a37..4fd81bd27a1 100644 --- a/src/gallium/drivers/nv50/nv50_draw.c +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "draw/draw_pipe.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index f936a1f0e20..8b61ca2a1fa 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index ff81719f03e..2668acf2999 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index d8c3491c2c5..26bd90ccc5e 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_context.h" #include "nv50_context.h" diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 9f1fba6e42d..ec6d34e6845 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_screen.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 743de089d6f..5c364c4ddff 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index bd1f048f878..5c75a242cc6 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "nv50_context.h" #include "nouveau/nouveau_stateobj.h" diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index acd7b501ef4..6229b636260 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -1,30 +1,24 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * +/* + * Copyright 2008 Ben Skeggs + * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ #include "nv50_context.h" #include "pipe/p_defines.h" diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 0318b9b5503..8e420ad172e 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -1,3 +1,25 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "pipe/p_context.h" #include "pipe/p_state.h" #include "pipe/p_util.h" -- cgit v1.2.3 From 62100692b8ad7fa868743c4698dac109beceaf7f Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 20:49:06 +1000 Subject: nv50: split tic construction out into own file --- src/gallium/drivers/nv50/Makefile | 1 + src/gallium/drivers/nv50/nv50_context.h | 3 + src/gallium/drivers/nv50/nv50_state_validate.c | 27 +----- src/gallium/drivers/nv50/nv50_tex.c | 57 ++++++++++++ src/gallium/drivers/nv50/nv50_texture.h | 124 +++++++++++++++++++++++++ 5 files changed, 187 insertions(+), 25 deletions(-) create mode 100644 src/gallium/drivers/nv50/nv50_tex.c create mode 100644 src/gallium/drivers/nv50/nv50_texture.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile index 2cc6e9de287..be30400c039 100644 --- a/src/gallium/drivers/nv50/Makefile +++ b/src/gallium/drivers/nv50/Makefile @@ -14,6 +14,7 @@ DRIVER_SOURCES = \ nv50_state.c \ nv50_state_validate.c \ nv50_surface.c \ + nv50_tex.c \ nv50_vbo.c C_SOURCES = \ diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 9b77fc1225d..cb51fb02ebd 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -164,4 +164,7 @@ extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program /* nv50_state_validate.c */ extern boolean nv50_state_validate(struct nv50_context *nv50); +/* nv50_tex.c */ +extern void nv50_tex_validate(struct nv50_context *); + #endif diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 5c75a242cc6..8f9ee05acc8 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -300,31 +300,8 @@ viewport_uptodate: so_ref(so, &nv50->state.tsc_upload); } - if (nv50->dirty & NV50_NEW_TEXTURE) { - int i; - - so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2); - so_method(so, tesla, 0x0f00, 1); - so_data (so, NV50_CB_TIC); - so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); - for (i = 0; i < nv50->miptree_nr; i++) { - struct nv50_miptree *mt = nv50->miptree[i]; - - so_data (so, 0x2a712488); - so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0xd0005000); - so_data (so, 0x00300000); - so_data (so, mt->base.width[0]); - so_data (so, (mt->base.depth[0] << 16) | - mt->base.height[0]); - so_data (so, 0x03000000); - so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_HIGH, 0, 0); - } - - so_ref(so, &nv50->state.tic_upload); - } + if (nv50->dirty & NV50_NEW_TEXTURE) + nv50_tex_validate(nv50); if (nv50->dirty & NV50_NEW_ARRAYS) nv50_vbo_validate(nv50); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c new file mode 100644 index 00000000000..581a9e2ffb6 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -0,0 +1,57 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +void +nv50_tex_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + int i; + + so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TIC); + so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); + for (i = 0; i < nv50->miptree_nr; i++) { + struct nv50_miptree *mt = nv50->miptree[i]; + + so_data (so, 0x2a712488); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0xd0005000); + so_data (so, 0x00300000); + so_data (so, mt->base.width[0]); + so_data (so, (mt->base.depth[0] << 16) | + mt->base.height[0]); + so_data (so, 0x03000000); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_HIGH, 0, 0); + } + + so_ref(so, &nv50->state.tic_upload); +} + diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h new file mode 100644 index 00000000000..c8fb282f21f --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -0,0 +1,124 @@ +#ifndef __NV50_TEXTURE_H__ +#define __NV50_TEXTURE_H__ + +/* It'd be really nice to have these in nouveau_class.h generated by + * renouveau like the rest of the object header - but not sure it can + * handle non-object stuff nicely - need to look into it. + */ + +/* Texture image control block */ +#define NV50TIC_0_0_MAPA_MASK 0x38000000 +#define NV50TIC_0_0_MAPA_ZERO 0x00000000 +#define NV50TIC_0_0_MAPA_C0 0x10000000 +#define NV50TIC_0_0_MAPA_C1 0x18000000 +#define NV50TIC_0_0_MAPA_C2 0x20000000 +#define NV50TIC_0_0_MAPA_C3 0x28000000 +#define NV50TIC_0_0_MAPA_ONE 0x38000000 +#define NV50TIC_0_0_MAPR_MASK 0x07000000 +#define NV50TIC_0_0_MAPR_ZERO 0x00000000 +#define NV50TIC_0_0_MAPR_C0 0x02000000 +#define NV50TIC_0_0_MAPR_C1 0x03000000 +#define NV50TIC_0_0_MAPR_C2 0x04000000 +#define NV50TIC_0_0_MAPR_C3 0x05000000 +#define NV50TIC_0_0_MAPR_ONE 0x07000000 +#define NV50TIC_0_0_MAPG_MASK 0x00e00000 +#define NV50TIC_0_0_MAPG_ZERO 0x00000000 +#define NV50TIC_0_0_MAPG_C0 0x00400000 +#define NV50TIC_0_0_MAPG_C1 0x00600000 +#define NV50TIC_0_0_MAPG_C2 0x00800000 +#define NV50TIC_0_0_MAPG_C3 0x00a00000 +#define NV50TIC_0_0_MAPG_ONE 0x00e00000 +#define NV50TIC_0_0_MAPB_MASK 0x001c0000 +#define NV50TIC_0_0_MAPB_ZERO 0x00000000 +#define NV50TIC_0_0_MAPB_C0 0x00080000 +#define NV50TIC_0_0_MAPB_C1 0x000c0000 +#define NV50TIC_0_0_MAPB_C2 0x00100000 +#define NV50TIC_0_0_MAPB_C3 0x00140000 +#define NV50TIC_0_0_MAPB_ONE 0x001c0000 +#define NV50TIC_0_0_TYPEA_MASK 0x00038000 +#define NV50TIC_0_0_TYPEA_UNORM 0x00010000 +#define NV50TIC_0_0_TYPER_MASK 0x00007000 +#define NV50TIC_0_0_TYPER_UNORM 0x00002000 +#define NV50TIC_0_0_TYPEG_MASK 0x00000e00 +#define NV50TIC_0_0_TYPEG_UNORM 0x00000400 +#define NV50TIC_0_0_TYPEB_MASK 0x000001c0 +#define NV50TIC_0_0_TYPEB_UNORM 0x00000080 +#define NV50TIC_0_0_FMT_MASK 0x0000003c +#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 +#define NV50TIC_0_0_FMT_5_6_5 0x00000015 +#define NV50TIC_0_0_FMT_8_8 0x00000018 +#define NV50TIC_0_0_FMT_8 0x0000001d + +#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff +#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 + +#define NV50TIC_0_2_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff +#define NV50TIC_0_4_WIDTH_SHIFT 0 + +#define NV50TIC_0_5_DEPTH_MASK 0xffff0000 +#define NV50TIC_0_5_DEPTH_SHIFT 16 +#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff +#define NV50TIC_0_5_HEIGHT_SHIFT 0 + +#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_7_OFFSET_HIGH_MASK 0xffffffff +#define NV50TIC_0_7_OFFSET_HIGH_SHIFT 0 + +/* Texture sampler control block */ +#define NV50TSC_1_0_WRAPS_MASK 0x00000007 +#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003 +#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007 +#define NV50TSC_1_0_WRAPT_MASK 0x00000038 +#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018 +#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038 +#define NV50TSC_1_0_WRAPR_MASK 0x000001c0 +#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0 +#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0 + +#define NV50TSC_1_1_MAGF_MASK 0x00000003 +#define NV50TSC_1_1_MAGF_NEAREST 0x00000001 +#define NV50TSC_1_1_MAGF_LINEAR 0x00000002 +#define NV50TSC_1_1_MINF_MASK 0x00000030 +#define NV50TSC_1_1_MINF_NEAREST 0x00000010 +#define NV50TSC_1_1_MINF_LINEAR 0x00000020 +#define NV50TSC_1_1_MIPF_MASK 0x000000c0 +#define NV50TSC_1_1_MIPF_NONE 0x00000040 +#define NV50TSC_1_1_MIPF_NEAREST 0x00000080 +#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0 + +#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff + +#endif -- cgit v1.2.3 From 7c745de74997e859d7e2640092bda9ad900e28a9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 21:19:41 +1000 Subject: nv50: add some texture formats --- src/gallium/drivers/nv50/nv50_program.c | 2 + src/gallium/drivers/nv50/nv50_screen.c | 6 +++ src/gallium/drivers/nv50/nv50_tex.c | 93 ++++++++++++++++++++++++++++----- src/gallium/drivers/nv50/nv50_texture.h | 2 + 4 files changed, 90 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2668acf2999..0d3ddb8a596 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1701,6 +1701,8 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) if (p->buffer) pipe_buffer_reference(ws, &p->buffer, NULL); + nv50->screen->nvws->res_free(&p->data); + p->translated = 0; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ec6d34e6845..bba5ca4d4e5 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -51,7 +51,13 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_TEXTURE: switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: return TRUE; default: break; diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 581a9e2ffb6..fde3c97c059 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -25,6 +25,81 @@ #include "nouveau/nouveau_stateobj.h" +static int +nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt) +{ + switch (mt->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8_8_8); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_1_5_5_5); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_4_4_4_4); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_5_6_5); + break; + case PIPE_FORMAT_L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_I8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8); + break; + default: + return 1; + } + + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0xd0005000); + so_data (so, 0x00300000); + so_data (so, mt->base.width[0]); + so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]); + so_data (so, 0x03000000); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); + + return 0; +} + void nv50_tex_validate(struct nv50_context *nv50) { @@ -37,19 +112,11 @@ nv50_tex_validate(struct nv50_context *nv50) so_data (so, NV50_CB_TIC); so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); for (i = 0; i < nv50->miptree_nr; i++) { - struct nv50_miptree *mt = nv50->miptree[i]; - - so_data (so, 0x2a712488); - so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0xd0005000); - so_data (so, 0x00300000); - so_data (so, mt->base.width[0]); - so_data (so, (mt->base.depth[0] << 16) | - mt->base.height[0]); - so_data (so, 0x03000000); - so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_HIGH, 0, 0); + if (nv50_tex_construct(so, nv50->miptree[i])) { + NOUVEAU_ERR("failed tex validate\n"); + so_ref(NULL, &so); + return; + } } so_ref(so, &nv50->state.tic_upload); diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h index c8fb282f21f..6861d67b4d5 100644 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -45,6 +45,8 @@ #define NV50TIC_0_0_TYPEB_UNORM 0x00000080 #define NV50TIC_0_0_FMT_MASK 0x0000003c #define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 +#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 +#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 #define NV50TIC_0_0_FMT_5_6_5 0x00000015 #define NV50TIC_0_0_FMT_8_8 0x00000018 #define NV50TIC_0_0_FMT_8 0x0000001d -- cgit v1.2.3 From 866a68dbd1ec9c3a6100de6d50890ac5068247dc Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 22:01:29 +1000 Subject: nv50: demagic tex filter / wrap mode --- src/gallium/drivers/nv50/nv50_state.c | 65 +++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 5c364c4ddff..c552a0b0aa0 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "nv50_context.h" +#include "nv50_texture.h" #include "nouveau/nouveau_stateobj.h" @@ -104,14 +105,74 @@ nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) FREE(bso); } +static INLINE unsigned +wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return NV50TSC_1_0_WRAPS_REPEAT; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return NV50TSC_1_0_WRAPS_MIRROR_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_CLAMP: + return NV50TSC_1_0_WRAPS_CLAMP; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50TSC_1_0_WRAPS_REPEAT; + } +} static void * nv50_sampler_state_create(struct pipe_context *pipe, const struct pipe_sampler_state *cso) { unsigned *tsc = CALLOC(8, sizeof(unsigned)); - tsc[0] = 0x00024080; - tsc[1] = 0x00000062; + tsc[0] = (0x00024000 | + (wrap_mode(cso->wrap_s) << 0) | + (wrap_mode(cso->wrap_t) << 3) | + (wrap_mode(cso->wrap_r) << 6)); + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + tsc[1] |= NV50TSC_1_1_MAGF_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MINF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + tsc[1] |= NV50TSC_1_1_MINF_NEAREST; + break; + } + + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MIPF_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + tsc[1] |= NV50TSC_1_1_MIPF_NEAREST; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + tsc[1] |= NV50TSC_1_1_MIPF_NONE; + break; + } return (void *)tsc; } -- cgit v1.2.3 From 9b0add0be4a3ba7fc72779daf8361d8cd98d9f64 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 22:45:11 +1000 Subject: nv50: quick hack to get textures untiled on map, and tiled on unmap progs/fp/tri-tex is all good now rather than all scrambled :) --- src/gallium/drivers/nv50/nv50_context.h | 11 ++++++++++ src/gallium/drivers/nv50/nv50_miptree.c | 18 +++++++++++------ src/gallium/drivers/nv50/nv50_surface.c | 36 ++++++++++++++++++++++++++++----- 3 files changed, 54 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index cb51fb02ebd..1c069f1625f 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -74,6 +74,17 @@ nv50_miptree(struct pipe_texture *pt) return (struct nv50_miptree *)pt; } +struct nv50_surface { + struct pipe_surface base; + struct pipe_buffer *untiled; +}; + +static INLINE struct nv50_surface * +nv50_surface(struct pipe_surface *pt) +{ + return (struct nv50_surface *)pt; +} + struct nv50_state { unsigned dirty; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 8b61ca2a1fa..a02ad41885b 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -82,9 +82,14 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, { struct pipe_winsys *ws = pscreen->winsys; struct nv50_miptree *mt = nv50_miptree(pt); + struct nv50_surface *s; struct pipe_surface *ps; - ps = CALLOC_STRUCT(pipe_surface); + s = CALLOC_STRUCT(nv50_surface); + if (!s) + return NULL; + ps = &s->base; + ps->refcount = 1; ps->winsys = ws; ps->format = pt->format; @@ -109,14 +114,15 @@ nv50_miptree_surface_del(struct pipe_screen *pscreen, struct pipe_surface **psurface) { struct pipe_winsys *ws = pscreen->winsys; - struct pipe_surface *surf = *psurface; + struct pipe_surface *ps = *psurface; + struct nv50_surface *s = nv50_surface(ps); *psurface = NULL; - if (--surf->refcount <= 0) { - pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(ws, &surf->buffer, NULL); - FREE(surf); + if (--ps->refcount <= 0) { + pipe_texture_reference(&ps->texture, NULL); + pipe_buffer_reference(ws, &ps->buffer, NULL); + FREE(s); } } diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 6229b636260..8d3f1edcfe9 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -52,25 +52,51 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, } static void * -nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, +nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, unsigned flags ) { + struct nouveau_winsys *nvws = nv50_screen(screen)->nvws; struct pipe_winsys *ws = screen->winsys; + struct nv50_surface *s = nv50_surface(ps); + struct nv50_surface m = *s; void *map; - map = ws->buffer_map(ws, surface->buffer, flags); + if (!s->untiled) { + s->untiled = ws->buffer_create(ws, 0, 0, ps->buffer->size); + + m.base.buffer = s->untiled; + nvws->surface_copy(nvws, &m.base, 0, 0, &s->base, 0, 0, + ps->width, ps->height); + } + + /* Map original tiled surface to disallow it being validated while + * untiled mirror is mapped. + */ + ws->buffer_map(ws, ps->buffer, flags); + + map = ws->buffer_map(ws, s->untiled, flags); if (!map) return NULL; - return map + surface->offset; + return map; } static void -nv50_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +nv50_surface_unmap(struct pipe_screen *screen, struct pipe_surface *ps) { + struct nouveau_winsys *nvws = nv50_screen(screen)->nvws; struct pipe_winsys *ws = screen->winsys; + struct nv50_surface *s = nv50_surface(ps); + struct nv50_surface m = *s; + + ws->buffer_unmap(ws, s->untiled); + ws->buffer_unmap(ws, ps->buffer); + + m.base.buffer = s->untiled; + nvws->surface_copy(nvws, &s->base, 0, 0, &m.base, 0, 0, + ps->width, ps->height); - ws->buffer_unmap(ws, surface->buffer); + pipe_buffer_reference(ws, &s->untiled, NULL); } void -- cgit v1.2.3 From 7c949fb2a2cd4e9b05efd4133e5ae8ea938934d8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Fri, 11 Jul 2008 22:48:11 +1000 Subject: nv50: obey do_flip in surface_copy() --- src/gallium/drivers/nv50/nv50_surface.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 8d3f1edcfe9..a9daeee3694 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -28,7 +28,7 @@ #include "util/p_tile.h" static void -nv50_surface_copy(struct pipe_context *pipe, unsigned flip, +nv50_surface_copy(struct pipe_context *pipe, boolean flip, struct pipe_surface *dest, unsigned destx, unsigned desty, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) @@ -36,8 +36,16 @@ nv50_surface_copy(struct pipe_context *pipe, unsigned flip, struct nv50_context *nv50 = (struct nv50_context *)pipe; struct nouveau_winsys *nvws = nv50->screen->nvws; - nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, - width, height); + if (flip) { + desty += height; + while (height--) { + nvws->surface_copy(nvws, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + } else { + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); + } } static void -- cgit v1.2.3 From 36ac2ade17a1b8f2c47d41945430983834b9ef01 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 11 Jul 2008 21:14:32 +0200 Subject: nv30: Update miptree to match latest changes --- src/gallium/drivers/nv30/nv30_miptree.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index ad0b257fe2b..a0e488c09b1 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -32,8 +32,8 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); if (swizzled) - pitch = pt->width[l]; - pitch = (pitch + 63) & ~63; + pitch = pt->nblocksx[l]; + pitch = align_int(pitch, 64); nv30mt->level[l].pitch = pitch * pt->block.size; nv30mt->level[l].image_offset = @@ -42,7 +42,6 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) width = MAX2(1, width >> 1); height = MAX2(1, height >> 1); depth = MAX2(1, depth >> 1); - } for (f = 0; f < nr_faces; f++) { @@ -111,17 +110,20 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; struct pipe_surface *ps; - ps = ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface); if (!ps) return NULL; + pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; + ps->block = pt->block; ps->nblocksx = pt->nblocksx[level]; ps->nblocksy = pt->nblocksy[level]; - ps->block = pt->block; ps->stride = nv30mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; if (pt->target == PIPE_TEXTURE_CUBE) { ps->offset = nv30mt->level[level].image_offset[face]; @@ -139,6 +141,15 @@ static void nv30_miptree_surface_del(struct pipe_screen *pscreen, struct pipe_surface **psurface) { + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL); + FREE(ps); } void -- cgit v1.2.3 From 37a418b3b05db51aea0a1010bad8118b92e0e9d6 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 11 Jul 2008 21:31:24 +0200 Subject: nv30: split fragprog_prepare from fragprog_translate --- src/gallium/drivers/nv30/nv30_fragprog.c | 91 ++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 740d3ab430d..7278bf58869 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -1,7 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_parse.h" @@ -650,30 +649,22 @@ nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, return TRUE; } -void -nv30_fragprog_translate(struct nv30_context *nv30, - struct nv30_fragment_program *fp) +static boolean +nv30_fragprog_prepare(struct nv30_fpc *fpc) { - struct tgsi_parse_context parse; - struct nv30_fpc *fpc = NULL; - - fpc = CALLOC(1, sizeof(struct nv30_fpc)); - if (!fpc) - return; - fpc->fp = fp; - fpc->high_temp = -1; - fpc->num_regs = 2; + struct tgsi_parse_context p; + /*int high_temp = -1, i;*/ - tgsi_parse_init(&parse, fp->pipe.tokens); + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { + tgsi_parse_token(&p); + switch(tok->Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; + fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { case TGSI_FILE_INPUT: if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) @@ -683,6 +674,12 @@ nv30_fragprog_translate(struct nv30_context *nv30, if (!nv30_fragprog_parse_decl_output(fpc, fdec)) goto out_err; break; + /*case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break;*/ default: break; } @@ -692,17 +689,65 @@ nv30_fragprog_translate(struct nv30_context *nv30, { struct tgsi_full_immediate *imm; float vals[4]; - int i; - imm = &parse.FullToken.FullImmediate; + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); - for (i = 0; i < 4; i++) - vals[i] = imm->u.ImmediateFloat32[i].Float; + vals[0] = imm->u.ImmediateFloat32[0].Float; + vals[1] = imm->u.ImmediateFloat32[1].Float; + vals[2] = imm->u.ImmediateFloat32[2].Float; + vals[3] = imm->u.ImmediateFloat32[3].Float; fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); } break; + default: + break; + } + } + tgsi_parse_free(&p); + + /*if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0; + }*/ + + return TRUE; + +out_err: + /*if (fpc->r_temp) + FREE(fpc->r_temp);*/ + tgsi_parse_free(&p); + return FALSE; +} + +void +nv30_fragprog_translate(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv30_fpc *fpc = NULL; + + fpc = CALLOC(1, sizeof(struct nv30_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->high_temp = -1; + fpc->num_regs = 2; + + if (!nv30_fragprog_prepare(fpc)) { + FREE(fpc); + return; + } + + tgsi_parse_init(&parse, fp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: { const struct tgsi_full_instruction *finst; -- cgit v1.2.3 From b327b064058aaab1debb1af08f4b9abe1385adc6 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 11 Jul 2008 22:06:12 +0200 Subject: nv30: split fragprog_upload from fragprog_bind --- src/gallium/drivers/nv30/nv30_fragprog.c | 57 ++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 7278bf58869..6af86e4f4f9 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -783,6 +783,40 @@ out_err: FREE(fpc); } +static void +nv30_fragprog_upload(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + struct pipe_winsys *ws = nv30->pipe.winsys; + const uint32_t le = 1; + uint32_t *map; + int i; + + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + fflush(stdout); fflush(stderr); + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + fflush(stdout); fflush(stderr); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); +} + void nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) { @@ -818,28 +852,7 @@ nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) if (!fp->buffer) fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); - map = ws->buffer_map(ws, fp->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - } -#endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - ws->buffer_unmap(ws, fp->buffer); + nv30_fragprog_upload(nv30, fp); fp->on_hw = TRUE; } -- cgit v1.2.3 From 5acbd0b0961089f9553adbe9b3d1341997ccb220 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 11 Jul 2008 22:42:42 +0200 Subject: nv30: Emit fragtex state using state objects --- src/gallium/drivers/nv30/nv30_context.h | 1 + src/gallium/drivers/nv30/nv30_fragtex.c | 82 +++++++++++++++++++----------- src/gallium/drivers/nv30/nv30_state_emit.c | 38 +++----------- 3 files changed, 59 insertions(+), 62 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 5404685da25..72f803c80ad 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -220,6 +220,7 @@ extern struct nv30_state_entry nv30_state_blend_colour; extern struct nv30_state_entry nv30_state_zsa; extern struct nv30_state_entry nv30_state_viewport; extern struct nv30_state_entry nv30_state_framebuffer; +extern struct nv30_state_entry nv30_state_fragtex; /* nv30_vbo.c */ extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index f253087c884..abe77b51df2 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -49,17 +49,17 @@ nv30_texture_formats[] = { _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), -// _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), // _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), // _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), -// _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT1 , 0x86, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT3 , 0x87, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), -// _(RGBA_DXT5 , 0x88, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), {}, }; @@ -67,6 +67,7 @@ static struct nv30_texture_format * nv30_fragtex_format(uint pipe_format) { struct nv30_texture_format *tf = nv30_texture_formats; + char fs[128]; while (tf->defined) { if (tf->pipe == pipe_format) @@ -74,26 +75,27 @@ nv30_fragtex_format(uint pipe_format) tf++; } + pf_sprint_name(fs, pipe_format); + NOUVEAU_ERR("unknown texture format %s\n", fs); return NULL; } -static void +static struct nouveau_stateobj * nv30_fragtex_build(struct nv30_context *nv30, int unit) { struct nv30_sampler_state *ps = nv30->tex_sampler[unit]; struct nv30_miptree *nv30mt = nv30->tex_miptree[unit]; struct pipe_texture *pt = &nv30mt->base; struct nv30_texture_format *tf; - uint32_t txf, txs, txp; - int swizzled = 0; /*XXX: implement in region code? */ + struct nouveau_stateobj *so; + uint32_t txf, txs /*, txp*/; + /*int swizzled = 0;*/ /*XXX: implement in region code? */ unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; tf = nv30_fragtex_format(pt->format); - if (!tf || !tf->defined) { - NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); - return; - } + if (!tf) + assert(0); txf = tf->format << 8; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); @@ -117,35 +119,45 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) break; default: NOUVEAU_ERR("Unknown target %d\n", pt->target); - return; + return NULL; } txs = tf->swizzle; - BEGIN_RING(rankine, NV34TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW); - OUT_RELOCd(nv30mt->buffer,txf, tex_flags | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (NV34TCL_TX_ENABLE_ENABLE | ps->en); - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height[0]); - OUT_RING (ps->bcol); + so = so_new(16, 2); + so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); + so_reloc (so, nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, nv30mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | 0x2000 /*voodoo*/); + so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + pt->height[0]); + so_data (so, ps->bcol); + + return so; } -void -nv30_fragtex_bind(struct nv30_context *nv30) +static boolean +nv30_fragtex_validate(struct nv30_context *nv30) { - struct nv30_fragment_program *fp = nv30->fragprog.active; + struct nv30_fragment_program *fp = nv30->fragprog.current; + struct nv30_state *state = &nv30->state; + struct nouveau_stateobj *so; unsigned samplers, unit; - samplers = nv30->fp_samplers & ~fp->samplers; + samplers = state->fp_samplers & ~fp->samplers; while (samplers) { unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(rankine, NV34TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + so = so_new(2, 0); + so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); } samplers = nv30->dirty_samplers & fp->samplers; @@ -153,9 +165,19 @@ nv30_fragtex_bind(struct nv30_context *nv30) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - nv30_fragtex_build(nv30, unit); + so = nv30_fragtex_build(nv30, unit); + so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); } - nv30->fp_samplers = fp->samplers; + nv30->state.fp_samplers = fp->samplers; + return FALSE; } +struct nv30_state_entry nv30_state_fragtex = { + .validate = nv30_fragtex_validate, + .dirty = { + .pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 0ea78571972..cf10ddd4b63 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -6,6 +6,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_rasterizer, &nv30_state_scissor, &nv30_state_stipple, + &nv30_state_fragtex, &nv30_state_blend, &nv30_state_blend_colour, &nv30_state_zsa, @@ -59,6 +60,11 @@ nv30_emit_hw_state(struct nv30_context *nv30) screen->cur_pctx = nv30->pctx_id; } + if (nv30->dirty & NV30_NEW_FRAGPROG) { + nv30_fragprog_bind(nv30, nv30->fragprog.current); + /*XXX: clear NV30_NEW_FRAGPROG if no new program uploaded */ + } + for (i = 0, states = state->dirty; states; i++) { if (!(states & (1ULL << i))) continue; @@ -68,28 +74,11 @@ nv30_emit_hw_state(struct nv30_context *nv30) states &= ~(1ULL << i); } - if (nv30->dirty & NV30_NEW_FRAGPROG) { - nv30_fragprog_bind(nv30, nv30->fragprog.current); - /*XXX: clear NV30_NEW_FRAGPROG if no new program uploaded */ - } - - if (nv30->dirty_samplers || (nv30->dirty & NV30_NEW_FRAGPROG)) { - nv30_fragtex_bind(nv30); -/* - BEGIN_RING(rankine, NV34TCL_TX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(rankine, NV34TCL_TX_CACHE_CTL, 1); - OUT_RING (1);*/ - nv30->dirty &= ~NV30_NEW_FRAGPROG; - } - if (nv30->dirty & NV30_NEW_VERTPROG) { nv30_vertprog_bind(nv30, nv30->vertprog.current); nv30->dirty &= ~NV30_NEW_VERTPROG; } - nv30->dirty_samplers = 0; - so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { if (!(samplers & (1 << i))) @@ -99,21 +88,6 @@ nv30_emit_hw_state(struct nv30_context *nv30) samplers &= ~(1ULL << i); } - /* Texture images, emitted in nv30_fragtex_build */ -#if 0 - for (i = 0; i < 16; i++) { - if (!(nv30->fp_samplers & (1 << i))) - continue; - BEGIN_RING(rankine, NV34TCL_TX_OFFSET(i), 2); - OUT_RELOCl(nv30->tex[i].buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv30->tex[i].buffer, nv30->tex[i].format, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | - NOUVEAU_BO_OR, NV34TCL_TX_FORMAT_DMA0, - NV34TCL_TX_FORMAT_DMA1); - } -#endif - /* Fragment program */ BEGIN_RING(rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); OUT_RELOC (nv30->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM | -- cgit v1.2.3 From c7086277546d065eb94ba8dbeca1620605f167ea Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Fri, 11 Jul 2008 23:17:47 +0200 Subject: nv30: Move constant buffers out of vert/frag prog structures --- src/gallium/drivers/nv30/nv30_context.h | 6 ++---- src/gallium/drivers/nv30/nv30_fragprog.c | 12 ++++++------ src/gallium/drivers/nv30/nv30_state.c | 5 +++-- src/gallium/drivers/nv30/nv30_vertprog.c | 18 ++++++++---------- 4 files changed, 19 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 72f803c80ad..c3c8b733090 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -124,6 +124,8 @@ struct nv30_context { unsigned dirty; struct pipe_scissor_state scissor; unsigned stipple[32]; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; struct nv30_rasterizer_state *rasterizer; struct nv30_zsa_state *zsa; struct nv30_blend_state *blend; @@ -150,16 +152,12 @@ struct nv30_context { struct { struct nv30_vertex_program *active; - struct nv30_vertex_program *current; - struct pipe_buffer *constant_buf; } vertprog; struct { struct nv30_fragment_program *active; - struct nv30_fragment_program *current; - struct pipe_buffer *constant_buf; } fragprog; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 6af86e4f4f9..b560455a02f 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -820,6 +820,8 @@ nv30_fragprog_upload(struct nv30_context *nv30, void nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) { + struct pipe_buffer *constbuf = + nv30->constbuf[PIPE_SHADER_FRAGMENT]; struct pipe_winsys *ws = nv30->pipe.winsys; int i; @@ -830,8 +832,9 @@ nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) } if (fp->nr_consts) { - float *map = ws->buffer_map(ws, nv30->fragprog.constant_buf, - PIPE_BUFFER_USAGE_CPU_READ); + float *map; + + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { struct nv30_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; @@ -842,13 +845,10 @@ nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) memcpy(p, cb, 4 * sizeof(float)); fp->on_hw = 0; } - ws->buffer_unmap(ws, nv30->fragprog.constant_buf); + ws->buffer_unmap(ws, constbuf); } if (!fp->on_hw) { - const uint32_t le = 1; - uint32_t *map; - if (!fp->buffer) fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 72b9515eb1a..aec6fa2b155 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -589,12 +589,13 @@ nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, { struct nv30_context *nv30 = nv30_context(pipe); + nv30->constbuf[shader] = buf->buffer; + nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + if (shader == PIPE_SHADER_VERTEX) { - nv30->vertprog.constant_buf = buf->buffer; nv30->dirty |= NV30_NEW_VERTPROG; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv30->fragprog.constant_buf = buf->buffer; nv30->dirty |= NV30_NEW_FRAGPROG; } } diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 1b34d2e4b29..90756febd28 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -135,7 +135,7 @@ emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) /* * |VVV| - * d�.�b + * d�.�b * \u/ * */ @@ -641,9 +641,12 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) { struct nouveau_winsys *nvws = nv30->nvws; struct pipe_winsys *ws = nv30->pipe.winsys; + struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; int i; + constbuf = nv30->constbuf[PIPE_SHADER_VERTEX]; + /* Translate TGSI shader into hw bytecode */ if (!vp->translated) { nv30_vertprog_translate(nv30, vp); @@ -730,8 +733,8 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) if (vp->nr_consts) { float *map = NULL; - if (nv30->vertprog.constant_buf) { - map = ws->buffer_map(ws, nv30->vertprog.constant_buf, + if (constbuf) { + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -750,15 +753,10 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); OUT_RING (i + vp->data->start); OUT_RINGp ((uint32_t *)vpd->value, 4); -#if 0 - NOUVEAU_MSG("VP const %d: %f %f %f %f\n", - i, vpd->value[0], vpd->value[1], - vpd->value[2], vpd->value[3]); -#endif } - if (map) { - ws->buffer_unmap(ws, nv30->vertprog.constant_buf); + if (constbuf) { + ws->buffer_unmap(ws, constbuf); } } -- cgit v1.2.3 From 2419a5fe3601851989506a11b0bd4e3cfb071035 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 00:19:15 +0200 Subject: nv30: Emit vertex program using state objects --- src/gallium/drivers/nv30/nv30_context.h | 11 ++------ src/gallium/drivers/nv30/nv30_state.c | 11 ++++---- src/gallium/drivers/nv30/nv30_state.h | 2 ++ src/gallium/drivers/nv30/nv30_state_emit.c | 6 +---- src/gallium/drivers/nv30/nv30_vbo.c | 2 +- src/gallium/drivers/nv30/nv30_vertprog.c | 40 +++++++++++++++++++++++------- 6 files changed, 42 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index c3c8b733090..5d7080a555d 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -124,6 +124,7 @@ struct nv30_context { unsigned dirty; struct pipe_scissor_state scissor; unsigned stipple[32]; + struct nv30_vertex_program *vertprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; unsigned constbuf_nr[PIPE_SHADER_TYPES]; struct nv30_rasterizer_state *rasterizer; @@ -150,11 +151,6 @@ struct nv30_context { unsigned delta; } vb[16]; - struct { - struct nv30_vertex_program *active; - struct nv30_vertex_program *current; - } vertprog; - struct { struct nv30_fragment_program *active; struct nv30_fragment_program *current; @@ -188,10 +184,6 @@ extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); /* nv30_vertprog.c */ -extern void nv30_vertprog_translate(struct nv30_context *, - struct nv30_vertex_program *); -extern void nv30_vertprog_bind(struct nv30_context *, - struct nv30_vertex_program *); extern void nv30_vertprog_destroy(struct nv30_context *, struct nv30_vertex_program *); @@ -213,6 +205,7 @@ extern void nv30_state_tex_update(struct nv30_context *nv30); extern struct nv30_state_entry nv30_state_rasterizer; extern struct nv30_state_entry nv30_state_scissor; extern struct nv30_state_entry nv30_state_stipple; +extern struct nv30_state_entry nv30_state_vertprog; extern struct nv30_state_entry nv30_state_blend; extern struct nv30_state_entry nv30_state_blend_colour; extern struct nv30_state_entry nv30_state_zsa; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index aec6fa2b155..92695ce23c0 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -499,10 +499,12 @@ static void * nv30_vp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso) { + /*struct nv30_context *nv30 = nv30_context(pipe);*/ struct nv30_vertex_program *vp; vp = CALLOC(1, sizeof(struct nv30_vertex_program)); vp->pipe = *cso; + /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/ return (void *)vp; } @@ -511,14 +513,10 @@ static void nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_vertex_program *vp = hwcso; - - if (!hwcso) { - return; - } - nv30->vertprog.current = vp; + nv30->vertprog = hwcso; nv30->dirty |= NV30_NEW_VERTPROG; + /*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/ } static void @@ -527,6 +525,7 @@ nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv30_context *nv30 = nv30_context(pipe); struct nv30_vertex_program *vp = hwcso; + /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/ nv30_vertprog_destroy(nv30, vp); FREE(vp); } diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index c65a937467e..a897bc50687 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -26,6 +26,7 @@ struct nv30_vertex_program { struct pipe_shader_state pipe; boolean translated; + struct nv30_vertex_program_exec *insns; unsigned nr_insns; struct nv30_vertex_program_data *consts; @@ -39,6 +40,7 @@ struct nv30_vertex_program { uint32_t ir; uint32_t or; + struct nouveau_stateobj *so; }; struct nv30_fragment_program_data { diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index cf10ddd4b63..4ab62ddc016 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -7,6 +7,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_scissor, &nv30_state_stipple, &nv30_state_fragtex, + &nv30_state_vertprog, &nv30_state_blend, &nv30_state_blend_colour, &nv30_state_zsa, @@ -74,11 +75,6 @@ nv30_emit_hw_state(struct nv30_context *nv30) states &= ~(1ULL << i); } - if (nv30->dirty & NV30_NEW_VERTPROG) { - nv30_vertprog_bind(nv30, nv30->vertprog.current); - nv30->dirty &= ~NV30_NEW_VERTPROG; - } - so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { if (!(samplers & (1 << i))) diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index d23164eb485..8e4ee7a8742 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -100,7 +100,7 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib, static void nv30_vbo_arrays_update(struct nv30_context *nv30) { - struct nv30_vertex_program *vp = nv30->vertprog.active; + struct nv30_vertex_program *vp = nv30->vertprog; uint32_t inputs, vtxfmt[16]; int hw, num_hw = 0; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 90756febd28..65e00132055 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -563,7 +563,7 @@ nv30_vertprog_prepare(struct nv30_vpc *vpc) return TRUE; } -void +static void nv30_vertprog_translate(struct nv30_context *nv30, struct nv30_vertex_program *vp) { @@ -636,27 +636,31 @@ out_err: FREE(vpc); } -void -nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) +static boolean +nv30_vertprog_validate(struct nv30_context *nv30) { struct nouveau_winsys *nvws = nv30->nvws; struct pipe_winsys *ws = nv30->pipe.winsys; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; int i; + vp = nv30->vertprog; constbuf = nv30->constbuf[PIPE_SHADER_VERTEX]; /* Translate TGSI shader into hw bytecode */ if (!vp->translated) { nv30_vertprog_translate(nv30, vp); if (!vp->translated) - assert(0); + return FALSE; } /* Allocate hw vtxprog exec slots */ if (!vp->exec) { struct nouveau_resource *heap = nv30->screen->vp_exec_heap; + struct nouveau_stateobj *so; uint vplen = vp->nr_insns; if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { @@ -671,6 +675,15 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) assert(0); } + so = so_new(2, 0); + so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + /* Add these, and you'll go 1/3 speed */ + /*so_method(so, rankine, NV34TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or);*/ + so_ref(so, &vp->so); + upload_code = TRUE; } @@ -777,10 +790,12 @@ nv30_vertprog_bind(struct nv30_context *nv30, struct nv30_vertex_program *vp) } } - BEGIN_RING(rankine, NV34TCL_VP_START_FROM_ID, 1); - OUT_RING (vp->exec->start); + if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) { + so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]); + return TRUE; + } - nv30->vertprog.active = vp; + return FALSE; } void @@ -808,7 +823,14 @@ nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) vp->data_start = 0; vp->data_start_min = 0; - /* vp->ir = vp->or = vp->clip_ctrl = 0; - so_ref(NULL, &vp->so); */ + vp->ir = vp->or = 0; + so_ref(NULL, &vp->so); } +struct nv30_state_entry nv30_state_vertprog = { + .validate = nv30_vertprog_validate, + .dirty = { + .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, + .hw = NV30_STATE_VERTPROG, + } +}; -- cgit v1.2.3 From 58737dc87575625438d288fe2f816b6a9d2086f1 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 00:48:26 +0200 Subject: nv30: Emit fragment program using state objects --- src/gallium/drivers/nv30/nv30_context.h | 11 +---- src/gallium/drivers/nv30/nv30_fragprog.c | 65 +++++++++++++++++++++--------- src/gallium/drivers/nv30/nv30_fragtex.c | 2 +- src/gallium/drivers/nv30/nv30_state.c | 9 ++--- src/gallium/drivers/nv30/nv30_state.h | 3 ++ src/gallium/drivers/nv30/nv30_state_emit.c | 14 +------ 6 files changed, 56 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 5d7080a555d..0c900c5598f 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -125,6 +125,7 @@ struct nv30_context { struct pipe_scissor_state scissor; unsigned stipple[32]; struct nv30_vertex_program *vertprog; + struct nv30_fragment_program *fragprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; unsigned constbuf_nr[PIPE_SHADER_TYPES]; struct nv30_rasterizer_state *rasterizer; @@ -151,11 +152,6 @@ struct nv30_context { unsigned delta; } vb[16]; - struct { - struct nv30_fragment_program *active; - struct nv30_fragment_program *current; - } fragprog; - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; }; @@ -188,10 +184,6 @@ extern void nv30_vertprog_destroy(struct nv30_context *, struct nv30_vertex_program *); /* nv30_fragprog.c */ -extern void nv30_fragprog_translate(struct nv30_context *, - struct nv30_fragment_program *); -extern void nv30_fragprog_bind(struct nv30_context *, - struct nv30_fragment_program *); extern void nv30_fragprog_destroy(struct nv30_context *, struct nv30_fragment_program *); @@ -205,6 +197,7 @@ extern void nv30_state_tex_update(struct nv30_context *nv30); extern struct nv30_state_entry nv30_state_rasterizer; extern struct nv30_state_entry nv30_state_scissor; extern struct nv30_state_entry nv30_state_stipple; +extern struct nv30_state_entry nv30_state_fragprog; extern struct nv30_state_entry nv30_state_vertprog; extern struct nv30_state_entry nv30_state_blend; extern struct nv30_state_entry nv30_state_blend_colour; diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index b560455a02f..06ed04cbfeb 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -723,7 +723,7 @@ out_err: return FALSE; } -void +static void nv30_fragprog_translate(struct nv30_context *nv30, struct nv30_fragment_program *fp) { @@ -817,23 +817,46 @@ nv30_fragprog_upload(struct nv30_context *nv30, ws->buffer_unmap(ws, fp->buffer); } -void -nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) +static boolean +nv30_fragprog_validate(struct nv30_context *nv30) { + struct nv30_fragment_program *fp = nv30->fragprog; struct pipe_buffer *constbuf = nv30->constbuf[PIPE_SHADER_FRAGMENT]; struct pipe_winsys *ws = nv30->pipe.winsys; + struct nouveau_stateobj *so; + boolean new_consts = FALSE; int i; + if (fp->translated) + goto update_constants; + + /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ + nv30_fragprog_translate(nv30, fp); if (!fp->translated) { - nv30_fragprog_translate(nv30, fp); - if (!fp->translated) - assert(0); + /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ + return FALSE; } + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + nv30_fragprog_upload(nv30, fp); + + so = so_new(4, 1); + so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); + so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + /* FIXME: Add these, and you'll have big slowdown */ + /*so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); + so_data (so, fp->fp_control);*/ + so_ref(so, &fp->so); + +update_constants: if (fp->nr_consts) { float *map; - + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { struct nv30_fragment_program_data *fpd = &fp->consts[i]; @@ -843,25 +866,20 @@ nv30_fragprog_bind(struct nv30_context *nv30, struct nv30_fragment_program *fp) if (!memcmp(p, cb, 4 * sizeof(float))) continue; memcpy(p, cb, 4 * sizeof(float)); - fp->on_hw = 0; + new_consts = TRUE; } ws->buffer_unmap(ws, constbuf); - } - if (!fp->on_hw) { - if (!fp->buffer) - fp->buffer = ws->buffer_create(ws, 0x100, 0, - fp->insn_len * 4); - nv30_fragprog_upload(nv30, fp); - fp->on_hw = TRUE; + if (new_consts) + nv30_fragprog_upload(nv30, fp); } - BEGIN_RING(rankine, NV34TCL_FP_CONTROL, 1); - OUT_RING (fp->fp_control); - BEGIN_RING(rankine, NV34TCL_FP_REG_CONTROL, 1); - OUT_RING (fp->fp_reg_control); + if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { + so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); + return TRUE; + } - nv30->fragprog.active = fp; + return FALSE; } void @@ -872,3 +890,10 @@ nv30_fragprog_destroy(struct nv30_context *nv30, FREE(fp->insn); } +struct nv30_state_entry nv30_state_fragprog = { + .validate = nv30_fragprog_validate, + .dirty = { + .pipe = NV30_NEW_FRAGPROG, + .hw = NV30_STATE_FRAGPROG + } +}; diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index abe77b51df2..8c5e88ea1da 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -143,7 +143,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) static boolean nv30_fragtex_validate(struct nv30_context *nv30) { - struct nv30_fragment_program *fp = nv30->fragprog.current; + struct nv30_fragment_program *fp = nv30->fragprog; struct nv30_state *state = &nv30->state; struct nouveau_stateobj *so; unsigned samplers, unit; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 92695ce23c0..8dc16d361d8 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -539,6 +539,8 @@ nv30_fp_state_create(struct pipe_context *pipe, fp = CALLOC(1, sizeof(struct nv30_fragment_program)); fp->pipe = *cso; + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + return (void *)fp; } @@ -546,13 +548,8 @@ static void nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) { struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_fragment_program *fp = hwcso; - - if (!hwcso) { - return; - } - nv30->fragprog.current = fp; + nv30->fragprog = hwcso; nv30->dirty |= NV30_NEW_FRAGPROG; } diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index a897bc50687..20c51761607 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -2,6 +2,7 @@ #define __NV30_STATE_H__ #include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" struct nv30_sampler_state { uint32_t fmt; @@ -50,6 +51,7 @@ struct nv30_fragment_program_data { struct nv30_fragment_program { struct pipe_shader_state pipe; + struct tgsi_shader_info info; boolean translated; boolean on_hw; @@ -65,6 +67,7 @@ struct nv30_fragment_program { uint32_t fp_control; uint32_t fp_reg_control; + struct nouveau_stateobj *so; }; struct nv30_miptree { diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 4ab62ddc016..c4ccc9422be 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -6,6 +6,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_rasterizer, &nv30_state_scissor, &nv30_state_stipple, + &nv30_state_fragprog, &nv30_state_fragtex, &nv30_state_vertprog, &nv30_state_blend, @@ -61,11 +62,6 @@ nv30_emit_hw_state(struct nv30_context *nv30) screen->cur_pctx = nv30->pctx_id; } - if (nv30->dirty & NV30_NEW_FRAGPROG) { - nv30_fragprog_bind(nv30, nv30->fragprog.current); - /*XXX: clear NV30_NEW_FRAGPROG if no new program uploaded */ - } - for (i = 0, states = state->dirty; states; i++) { if (!(states & (1ULL << i))) continue; @@ -83,13 +79,7 @@ nv30_emit_hw_state(struct nv30_context *nv30) state->hw[NV30_STATE_FRAGTEX0+i]); samplers &= ~(1ULL << i); } - - /* Fragment program */ - BEGIN_RING(rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); - OUT_RELOC (nv30->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FRAGPROG]); } boolean -- cgit v1.2.3 From 740c93a08ce7f5fb43a0f4cd5a50d95459c8aa8e Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 00:48:44 +0200 Subject: nv30: Change comment about slowdown --- src/gallium/drivers/nv30/nv30_vertprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 65e00132055..be6a327dad5 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -678,7 +678,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) so = so_new(2, 0); so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); - /* Add these, and you'll go 1/3 speed */ + /* FIXME: Add these, and you'll have big slowdown */ /*so_method(so, rankine, NV34TCL_VP_ATTRIB_EN, 2); so_data (so, vp->ir); so_data (so, vp->or);*/ -- cgit v1.2.3 From b23e20a386729e75492069445bb924412dc29a0c Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 10:10:16 +0200 Subject: nv30: does not have vp_attrib/result --- src/gallium/drivers/nouveau/nouveau_class.h | 2 -- src/gallium/drivers/nv30/nv30_vertprog.c | 4 ---- 2 files changed, 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 18d8d3677db..749fbf041e7 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -4643,8 +4643,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV34TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004 #define NV34TCL_UNK1f80(x) (0x00001f80+((x)*4)) #define NV34TCL_UNK1f80__SIZE 0x00000010 -#define NV34TCL_VP_ATTRIB_EN 0x00001ff0 -#define NV34TCL_VP_RESULT_EN 0x00001ff4 #define NV40_CONTEXT_SURFACES_2D 0x00003062 diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index be6a327dad5..39852ce9834 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -678,10 +678,6 @@ nv30_vertprog_validate(struct nv30_context *nv30) so = so_new(2, 0); so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); - /* FIXME: Add these, and you'll have big slowdown */ - /*so_method(so, rankine, NV34TCL_VP_ATTRIB_EN, 2); - so_data (so, vp->ir); - so_data (so, vp->or);*/ so_ref(so, &vp->so); upload_code = TRUE; -- cgit v1.2.3 From 9489de99802e635271c1ae84630fc02892af1699 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 10:13:58 +0200 Subject: nv30: was setting wrong register --- src/gallium/drivers/nv30/nv30_fragprog.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 06ed04cbfeb..59e79d66f2a 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -841,16 +841,15 @@ nv30_fragprog_validate(struct nv30_context *nv30) fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); - so = so_new(4, 1); + so = so_new(6, 1); so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1); so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); so_data (so, fp->fp_control); - /* FIXME: Add these, and you'll have big slowdown */ - /*so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); - so_data (so, fp->fp_control);*/ + so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); + so_data (so, fp->fp_reg_control); so_ref(so, &fp->so); update_constants: -- cgit v1.2.3 From 4ca346a8c0d0d4ea38705f8d3a3e5e690aa77daf Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 11:45:51 +0200 Subject: nv30: Rename state emission func --- src/gallium/drivers/nv30/nv30_context.h | 2 +- src/gallium/drivers/nv30/nv30_state_emit.c | 2 +- src/gallium/drivers/nv30/nv30_vbo.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 0c900c5598f..c41cb292017 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -192,7 +192,7 @@ extern void nv30_fragtex_bind(struct nv30_context *); /* nv30_state.c and friends */ extern boolean nv30_state_validate(struct nv30_context *nv30); -extern void nv30_emit_hw_state(struct nv30_context *nv30); +extern void nv30_state_emit(struct nv30_context *nv30); extern void nv30_state_tex_update(struct nv30_context *nv30); extern struct nv30_state_entry nv30_state_rasterizer; extern struct nv30_state_entry nv30_state_scissor; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index c4ccc9422be..1e69a6a6e26 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -46,7 +46,7 @@ nv30_state_do_validate(struct nv30_context *nv30, } void -nv30_emit_hw_state(struct nv30_context *nv30) +nv30_state_emit(struct nv30_context *nv30) { struct nv30_state *state = &nv30->state; struct nv30_screen *screen = nv30->screen; diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 8e4ee7a8742..0f82adfee04 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -156,7 +156,7 @@ nv30_vbo_validate_state(struct nv30_context *nv30, nv30_state_validate(nv30); - nv30_emit_hw_state(nv30); + nv30_state_emit(nv30); if (nv30->dirty & NV30_NEW_ARRAYS) { nv30_vbo_arrays_update(nv30); -- cgit v1.2.3 From 3d0e18ff5fba368a66bf34d18d219bf9a2dfba90 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 11:53:10 +0200 Subject: nv30: Reorder, remove useless stuff --- src/gallium/drivers/nv30/nv30_context.h | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index c41cb292017..ef8c16bdbb5 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -114,12 +114,6 @@ struct nv30_context { /* HW state derived from pipe states */ struct nv30_state state; - struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned dirty_samplers; - unsigned fp_samplers; - unsigned vp_samplers; - /* Context state */ unsigned dirty; struct pipe_scissor_state scissor; @@ -134,17 +128,11 @@ struct nv30_context { struct pipe_blend_color blend_colour; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; + struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; unsigned nr_textures; - - uint32_t rt_enable; - struct pipe_buffer *rt[2]; - struct pipe_buffer *zeta; - - /*struct { - struct pipe_buffer *buffer; - uint32_t format; - } tex[16];*/ + unsigned dirty_samplers; unsigned vb_enable; struct { @@ -193,7 +181,6 @@ extern void nv30_fragtex_bind(struct nv30_context *); /* nv30_state.c and friends */ extern boolean nv30_state_validate(struct nv30_context *nv30); extern void nv30_state_emit(struct nv30_context *nv30); -extern void nv30_state_tex_update(struct nv30_context *nv30); extern struct nv30_state_entry nv30_state_rasterizer; extern struct nv30_state_entry nv30_state_scissor; extern struct nv30_state_entry nv30_state_stipple; -- cgit v1.2.3 From ac44f334e3492ab68eb310cfe43ed22206a042d8 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 12:24:37 +0200 Subject: nv30: Move edgeflag stuff --- src/gallium/drivers/nv30/nv30_context.c | 7 ------- src/gallium/drivers/nv30/nv30_context.h | 1 + src/gallium/drivers/nv30/nv30_state.c | 11 +++++++++++ 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index b2d9d3f1814..eefc614e5b5 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -32,12 +32,6 @@ nv30_destroy(struct pipe_context *pipe) FREE(nv30); } - -static void -nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) -{ -} - struct pipe_context * nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -57,7 +51,6 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) nv30->pipe.winsys = ws; nv30->pipe.screen = pscreen; nv30->pipe.destroy = nv30_destroy; - nv30->pipe.set_edgeflags = nv30_set_edgeflags; nv30->pipe.draw_arrays = nv30_draw_arrays; nv30->pipe.draw_elements = nv30_draw_elements; nv30->pipe.clear = nv30_clear; diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index ef8c16bdbb5..2e7b62a69a3 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -142,6 +142,7 @@ struct nv30_context { struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + const unsigned *edgeflags; }; static INLINE struct nv30_context * diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 8dc16d361d8..4fe1def74d6 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -657,6 +657,16 @@ nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count, nv30->dirty |= NV30_NEW_ARRAYS; } +static void +nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->edgeflags = bitfield; + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + void nv30_init_state_functions(struct nv30_context *nv30) { @@ -696,6 +706,7 @@ nv30_init_state_functions(struct nv30_context *nv30) nv30->pipe.set_scissor_state = nv30_set_scissor_state; nv30->pipe.set_viewport_state = nv30_set_viewport_state; + nv30->pipe.set_edgeflags = nv30_set_edgeflags; nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers; nv30->pipe.set_vertex_elements = nv30_set_vertex_elements; } -- cgit v1.2.3 From 12118fcd123992f48ce78629e79e9949b96cd525 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 13:16:47 +0200 Subject: nv30: Emit vertex buffer objects using state objects --- src/gallium/drivers/nv30/nv30_context.h | 14 +- src/gallium/drivers/nv30/nv30_screen.c | 2 +- src/gallium/drivers/nv30/nv30_state.c | 6 + src/gallium/drivers/nv30/nv30_state_emit.c | 8 +- src/gallium/drivers/nv30/nv30_vbo.c | 628 +++++++++++++++++------------ 5 files changed, 399 insertions(+), 259 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 2e7b62a69a3..823b34a7c3b 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -128,20 +128,17 @@ struct nv30_context { struct pipe_blend_color blend_colour; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; unsigned nr_textures; unsigned dirty_samplers; - - unsigned vb_enable; - struct { - struct pipe_buffer *buffer; - unsigned delta; - } vb[16]; - - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; const unsigned *edgeflags; }; @@ -193,6 +190,7 @@ extern struct nv30_state_entry nv30_state_zsa; extern struct nv30_state_entry nv30_state_viewport; extern struct nv30_state_entry nv30_state_framebuffer; extern struct nv30_state_entry nv30_state_fragtex; +extern struct nv30_state_entry nv30_state_vbo; /* nv30_vbo.c */ extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 9c576369891..de5590af146 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -57,7 +57,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) return 13; case NOUVEAU_CAP_HW_VTXBUF: case NOUVEAU_CAP_HW_IDXBUF: - return 0; + return 1; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 4fe1def74d6..4d6303ebc28 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -644,7 +644,10 @@ nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count, struct nv30_context *nv30 = nv30_context(pipe); memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count); + nv30->vtxbuf_nr = count; + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ } static void @@ -654,7 +657,10 @@ nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count, struct nv30_context *nv30 = nv30_context(pipe); memcpy(nv30->vtxelt, ve, sizeof(*ve) * count); + nv30->vtxelt_nr = count; + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ } static void diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 1e69a6a6e26..40fed621b24 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -13,6 +13,7 @@ static struct nv30_state_entry *render_states[] = { &nv30_state_blend_colour, &nv30_state_zsa, &nv30_state_viewport, + &nv30_state_vbo, NULL }; @@ -39,10 +40,7 @@ nv30_state_do_validate(struct nv30_context *nv30, states++; } - -/* TODO: uncomment when finished converting nv30->dirty = 0; -*/ } void @@ -71,6 +69,8 @@ nv30_state_emit(struct nv30_context *nv30) states &= ~(1ULL << i); } + state->dirty = 0; + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { if (!(samplers & (1 << i))) @@ -80,6 +80,8 @@ nv30_state_emit(struct nv30_context *nv30) samplers &= ~(1ULL << i); } so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FRAGPROG]); + if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/) + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_VTXBUF]); } boolean diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 0f82adfee04..d930557f6bb 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -7,45 +7,119 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 static INLINE int -nv30_vbo_ncomp(uint format) +nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) { - int ncomp = 0; + char fs[128]; + + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; + break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = NV34TCL_VTXFMT_TYPE_USHORT; + break; + default: + pf_sprint_name(fs, pipe); + NOUVEAU_ERR("Unknown format %s\n", fs); + return 1; + } - if (pf_size_x(format)) ncomp++; - if (pf_size_y(format)) ncomp++; - if (pf_size_z(format)) ncomp++; - if (pf_size_w(format)) ncomp++; + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_SSCALED: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *ncomp = 4; + break; + default: + pf_sprint_name(fs, pipe); + NOUVEAU_ERR("Unknown format %s\n", fs); + return 1; + } - return ncomp; + return 0; } -static INLINE int -nv30_vbo_type(uint format) +static boolean +nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib, + unsigned ib_size) { - switch (pf_type(format)) { - case PIPE_FORMAT_TYPE_FLOAT: - return NV34TCL_VTXFMT_TYPE_FLOAT; - case PIPE_FORMAT_TYPE_UNORM: - return NV34TCL_VTXFMT_TYPE_UBYTE; + struct pipe_screen *pscreen = &nv30->screen->pipe; + unsigned type; + + if (!ib) { + nv30->idxbuf = NULL; + nv30->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; + break; default: - NOUVEAU_ERR("Unknown format 0x%08x\n", format); - return NV34TCL_VTXFMT_TYPE_FLOAT; + return FALSE; } + + if (ib != nv30->idxbuf || + type != nv30->idxbuf_format) { + nv30->dirty |= NV30_NEW_ARRAYS; + nv30->idxbuf = ib; + nv30->idxbuf_format = type; + } + + return TRUE; } static boolean -nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib, - struct pipe_vertex_element *ve, +nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, + int attrib, struct pipe_vertex_element *ve, struct pipe_vertex_buffer *vb) { struct pipe_winsys *ws = nv30->pipe.winsys; - int type, ncomp; + struct nouveau_grobj *rankine = nv30->screen->rankine; + unsigned type, ncomp; void *map; - type = nv30_vbo_type(ve->src_format); - ncomp = nv30_vbo_ncomp(ve->src_format); + if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + return FALSE; map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); map += vb->buffer_offset + ve->src_offset; @@ -55,31 +129,28 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib, { float *v = map; - BEGIN_RING(rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4); switch (ncomp) { case 4: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(v[2]); - OUT_RINGf(v[3]); + so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + so_data (so, fui(v[3])); break; case 3: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(v[2]); - OUT_RINGf(1.0); + so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); break; case 2: - OUT_RINGf(v[0]); - OUT_RINGf(v[1]); - OUT_RINGf(0.0); - OUT_RINGf(1.0); + so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); break; case 1: - OUT_RINGf(v[0]); - OUT_RINGf(0.0); - OUT_RINGf(0.0); - OUT_RINGf(1.0); + so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1); + so_data (so, fui(v[0])); break; default: ws->buffer_unmap(ws, vb->buffer); @@ -97,209 +168,202 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, int attrib, return TRUE; } -static void -nv30_vbo_arrays_update(struct nv30_context *nv30) +boolean +nv30_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) { - struct nv30_vertex_program *vp = nv30->vertprog; - uint32_t inputs, vtxfmt[16]; - int hw, num_hw = 0; - - nv30->vb_enable = 0; + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_channel *chan = nv30->nvws->channel; + unsigned restart = 0; - inputs = vp->ir; - for (hw = 0; hw < 16 && inputs; hw++) { - if (inputs & (1 << hw)) { - num_hw = hw; - inputs &= ~(1 << hw); - } + nv30_vbo_set_idxbuf(nv30, NULL, 0); + if (FORCE_SWTNL || !nv30_state_validate(nv30)) { + /*return nv30_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count);*/ + return FALSE; } - num_hw++; - inputs = vp->ir; - for (hw = 0; hw < num_hw; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; + while (count) { + unsigned vc, nr; - if (!(inputs & (1 << hw))) { - vtxfmt[hw] = NV34TCL_VTXFMT_TYPE_FLOAT; + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); continue; } - ve = &nv30->vtxelt[hw]; - vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); - if (vb->pitch == 0) { - vtxfmt[hw] = NV34TCL_VTXFMT_TYPE_FLOAT; - if (nv30_vbo_static_attrib(nv30, hw, ve, vb) == TRUE) - continue; + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; } - nv30->vb_enable |= (1 << hw); - nv30->vb[hw].delta = vb->buffer_offset + ve->src_offset; - nv30->vb[hw].buffer = vb->buffer; + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; - vtxfmt[hw] = ((vb->pitch << NV34TCL_VTXFMT_STRIDE_SHIFT) | - (nv30_vbo_ncomp(ve->src_format) << - NV34TCL_VTXFMT_SIZE_SHIFT) | - nv30_vbo_type(ve->src_format)); + BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; } - BEGIN_RING(rankine, NV34TCL_VTXFMT(0), num_hw); - OUT_RINGp (vtxfmt, num_hw); + pipe->flush(pipe, 0, NULL); + return TRUE; } -static boolean -nv30_vbo_validate_state(struct nv30_context *nv30, - struct pipe_buffer *ib, unsigned ib_format) +static INLINE void +nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) { - unsigned inputs; + struct nouveau_channel *chan = nv30->nvws->channel; - nv30_state_validate(nv30); + while (count) { + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart = 0; - nv30_state_emit(nv30); + nv30_state_emit(nv30); - if (nv30->dirty & NV30_NEW_ARRAYS) { - nv30_vbo_arrays_update(nv30); - nv30->dirty &= ~NV30_NEW_ARRAYS; - } + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; - inputs = nv30->vb_enable; - while (inputs) { - unsigned a = ffs(inputs) - 1; + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); - inputs &= ~(1 << a); + if (vc & 1) { + BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } - BEGIN_RING(rankine, NV34TCL_VTXBUF_ADDRESS(a), 1); - OUT_RELOC (nv30->vb[a].buffer, nv30->vb[a].delta, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0, - NV34TCL_VTXBUF_ADDRESS_DMA1); - } + while (vc) { + unsigned i; - if (ib) { - BEGIN_RING(rankine, NV34TCL_IDXBUF_ADDRESS, 2); - OUT_RELOCl(ib, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD); - OUT_RELOCd(ib, ib_format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | - NOUVEAU_BO_RD | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); - } + push = MIN2(vc, 2047 * 2); - BEGIN_RING(rankine, 0x1710, 1); - OUT_RING (0); /* vtx cache flush */ + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); - return TRUE; -} + vc -= push; + elts += push; + } -boolean -nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, - unsigned count) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned nr; - boolean ret; + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); - ret = nv30_vbo_validate_state(nv30, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; + start = restart; } +} - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - - nr = (count & 0xff); - if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); - start += nr; - } +static INLINE void +nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv30->nvws->channel; - nr = count >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; + while (count) { + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart = 0; - nr -= push; + nv30_state_emit(nv30); - BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); - while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); - start += 0x100; + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; } - } + count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); - pipe->flush(pipe, 0, NULL); - return TRUE; -} + if (vc & 1) { + BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } -static INLINE void -nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, - unsigned start, unsigned count) -{ - uint8_t *elts = (uint8_t *)ib + start; - int push, i; + while (vc) { + unsigned i; - if (count & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); - elts++; count--; - } + push = MIN2(vc, 2047 * 2); - while (count) { - push = MIN2(count, 2047 * 2); + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + vc -= push; + elts += push; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); - count -= push; - elts += push; + start = restart; } } static INLINE void -nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, - unsigned start, unsigned count) +nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) { - uint16_t *elts = (uint16_t *)ib + start; - int push, i; - - if (count & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); - elts++; count--; - } + struct nouveau_channel *chan = nv30->nvws->channel; while (count) { - push = MIN2(count, 2047 * 2); + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart = 0; - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + nv30_state_emit(nv30); - count -= push; - elts += push; - } -} + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; -static INLINE void -nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, - unsigned start, unsigned count) -{ - uint32_t *elts = (uint32_t *)ib + start; - int push; + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); - while (count) { - push = MIN2(count, 2047); + while (vc) { + push = MIN2(vc, 2047); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); - count -= push; - elts += push; + vc -= push; + elts += push; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + start = restart; } } @@ -310,99 +374,82 @@ nv30_draw_elements_inline(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct pipe_winsys *ws = pipe->winsys; - boolean ret; void *map; - ret = nv30_vbo_validate_state(nv30, NULL, 0); - if (!ret) { - NOUVEAU_ERR("state validate failed\n"); - return FALSE; - } - map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); return FALSE; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); - switch (ib_size) { case 1: - nv30_draw_elements_u08(nv30, map, start, count); + nv30_draw_elements_u08(nv30, map, mode, start, count); break; case 2: - nv30_draw_elements_u16(nv30, map, start, count); + nv30_draw_elements_u16(nv30, map, mode, start, count); break; case 4: - nv30_draw_elements_u32(nv30, map, start, count); + nv30_draw_elements_u32(nv30, map, mode, start, count); break; default: NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); break; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); - ws->buffer_unmap(ws, ib); - return TRUE; } static boolean nv30_draw_elements_vbo(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - unsigned nr, type; - boolean ret; - - switch (ib_size) { - case 2: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - return FALSE; - } + struct nouveau_channel *chan = nv30->nvws->channel; + unsigned restart = 0; - ret = nv30_vbo_validate_state(nv30, ib, type); - if (!ret) { - NOUVEAU_ERR("failed state validation\n"); - return FALSE; - } + while (count) { + unsigned nr, vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + nv30_state_emit(nv30); - nr = (count & 0xff); - if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); - start += nr; - } + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } - nr = count >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; - nr -= push; + nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); - while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); - start += 0x100; + BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } } - } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } return TRUE; } @@ -412,10 +459,19 @@ nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { -/* if (indexSize != 1) { - nv30_draw_elements_vbo(pipe, indexBuffer, indexSize, - mode, start, count); - } else */{ + struct nv30_context *nv30 = nv30_context(pipe); + boolean idxbuf; + + idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize); + if (FORCE_SWTNL || !nv30_state_validate(nv30)) { + /*return nv30_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count);*/ + return FALSE; + } + + if (idxbuf) { + nv30_draw_elements_vbo(pipe, mode, start, count); + } else { nv30_draw_elements_inline(pipe, indexBuffer, indexSize, mode, start, count); } @@ -424,4 +480,82 @@ nv30_draw_elements(struct pipe_context *pipe, return TRUE; } +static boolean +nv30_vbo_validate(struct nv30_context *nv30) +{ + struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct pipe_buffer *ib = nv30->idxbuf; + unsigned ib_format = nv30->idxbuf_format; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + int hw; + + if (nv30->edgeflags) { + /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ + return FALSE; + } + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); + for (hw = 0; hw < nv30->vtxelt_nr; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned type, ncomp; + + ve = &nv30->vtxelt[hw]; + vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->pitch) { + if (!sattr) + sattr = so_new(16 * 5, 0); + + if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); + continue; + } + } + + if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ + so_ref(NULL, &vtxbuf); + so_ref(NULL, &vtxfmt); + return FALSE; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->pitch << NV34TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); + } + + if (ib) { + so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, rankine, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF); + so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT); + so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR); + return FALSE; +} + +struct nv30_state_entry nv30_state_vbo = { + .validate = nv30_vbo_validate, + .dirty = { + .pipe = NV30_NEW_ARRAYS, + .hw = 0, + } +}; -- cgit v1.2.3 From a9863caefc9ed1ae098ea9033a82dbdc556edd30 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 13:59:01 +0200 Subject: nv30: do not shift texture format --- src/gallium/drivers/nv30/nv30_fragtex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 8c5e88ea1da..4242f86f76f 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -97,7 +97,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) if (!tf) assert(0); - txf = tf->format << 8; + txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); txf |= log2i(pt->width[0]) << 20; txf |= log2i(pt->height[0]) << 24; -- cgit v1.2.3 From fa167eedeba601f3281655f779331ea9f4eaa5cf Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 13:59:24 +0200 Subject: nv30: update caps --- src/gallium/drivers/nv30/nv30_screen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index de5590af146..54fc245bc63 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -32,7 +32,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return 16; case PIPE_CAP_NPOT_TEXTURES: - return 0; + return 1; case PIPE_CAP_TWO_SIDED_STENCIL: return 1; case PIPE_CAP_GLSL: @@ -75,7 +75,7 @@ nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) case PIPE_CAP_MAX_POINT_WIDTH_AA: return 64.0; case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; + return 8.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 4.0; default: -- cgit v1.2.3 From 1d50e26f4afc0c7cdcd843a1336a90cdfc76765b Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 12 Jul 2008 15:07:02 +0200 Subject: nv30: no npot textures --- src/gallium/drivers/nv30/nv30_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 54fc245bc63..b216a703187 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -32,7 +32,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return 16; case PIPE_CAP_NPOT_TEXTURES: - return 1; + return 0; case PIPE_CAP_TWO_SIDED_STENCIL: return 1; case PIPE_CAP_GLSL: -- cgit v1.2.3 From 36dd89c8a7f2a911e8f7f18d1edcaf982a75a438 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sun, 13 Jul 2008 22:39:58 +0900 Subject: util: Eliminate pipe from the arguments to pipe_get/put_tile_xxx functions. You don't need a pipe_context * for this, and all other necessary info is already inside pipe_surface. --- src/gallium/auxiliary/util/p_tile.c | 42 +++++++++++----------------- src/gallium/auxiliary/util/p_tile.h | 19 ++++--------- src/gallium/drivers/softpipe/sp_tile_cache.c | 16 +++++------ src/mesa/drivers/x11/xm_surface.c | 4 +-- src/mesa/state_tracker/st_cb_accum.c | 12 ++++---- src/mesa/state_tracker/st_cb_drawpixels.c | 8 +++--- src/mesa/state_tracker/st_cb_readpixels.c | 10 +++---- src/mesa/state_tracker/st_cb_texture.c | 8 +++--- 8 files changed, 51 insertions(+), 68 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 93abef98792..1a1a2d96cc6 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -45,12 +45,10 @@ * This should be usable by any hw driver that has mappable surfaces. */ void -pipe_get_tile_raw(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_raw(struct pipe_surface *ps, uint x, uint y, uint w, uint h, void *dst, int dst_stride) { - struct pipe_screen *screen = pipe->screen; const void *src; if (pipe_clip_tile(x, y, &w, &h, ps)) @@ -59,14 +57,14 @@ pipe_get_tile_raw(struct pipe_context *pipe, if (dst_stride == 0) dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - src = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); assert(src); if(!src) return; pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); - screen->surface_unmap(screen, ps); + pipe_surface_unmap(ps); } @@ -75,12 +73,10 @@ pipe_get_tile_raw(struct pipe_context *pipe, * This should be usable by any hw driver that has mappable surfaces. */ void -pipe_put_tile_raw(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_raw(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const void *src, int src_stride) { - struct pipe_screen *screen = pipe->screen; void *dst; if (pipe_clip_tile(x, y, &w, &h, ps)) @@ -89,14 +85,14 @@ pipe_put_tile_raw(struct pipe_context *pipe, if (src_stride == 0) src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - dst = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); assert(dst); if(!dst) return; pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); - screen->surface_unmap(screen, ps); + pipe_surface_unmap(ps); } @@ -686,8 +682,7 @@ ycbcr_get_tile_rgba(ushort *src, void -pipe_get_tile_rgba(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_rgba(struct pipe_surface *ps, uint x, uint y, uint w, uint h, float *p) { @@ -702,7 +697,7 @@ pipe_get_tile_rgba(struct pipe_context *pipe, if (!packed) return; - pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, 0); + pipe_get_tile_raw(ps, x, y, w, h, packed, 0); switch (ps->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -768,8 +763,7 @@ pipe_get_tile_rgba(struct pipe_context *pipe, void -pipe_put_tile_rgba(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_rgba(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const float *p) { @@ -838,7 +832,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe, assert(0); } - pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, 0); + pipe_put_tile_raw(ps, x, y, w, h, packed, 0); FREE(packed); } @@ -848,12 +842,10 @@ pipe_put_tile_rgba(struct pipe_context *pipe, * Get a block of Z values, converted to 32-bit range. */ void -pipe_get_tile_z(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, uint *z) { - struct pipe_screen *screen = pipe->screen; const uint dstStride = w; ubyte *map; uint *pDest = z; @@ -862,7 +854,7 @@ pipe_get_tile_z(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - map = (ubyte *)screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); if (!map) { assert(0); return; @@ -913,17 +905,15 @@ pipe_get_tile_z(struct pipe_context *pipe, assert(0); } - screen->surface_unmap(screen, ps); + pipe_surface_unmap(ps); } void -pipe_put_tile_z(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const uint *zSrc) { - struct pipe_screen *screen = pipe->screen; const uint srcStride = w; const uint *pSrc = zSrc; ubyte *map; @@ -932,7 +922,7 @@ pipe_put_tile_z(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - map = (ubyte *)screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); if (!map) { assert(0); return; @@ -980,7 +970,7 @@ pipe_put_tile_z(struct pipe_context *pipe, assert(0); } - screen->surface_unmap(screen, ps); + pipe_surface_unmap(ps); } diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h index fdc80a13b38..adfec8bcee7 100644 --- a/src/gallium/auxiliary/util/p_tile.h +++ b/src/gallium/auxiliary/util/p_tile.h @@ -30,7 +30,6 @@ #include "pipe/p_compiler.h" -struct pipe_context; struct pipe_surface; @@ -57,40 +56,34 @@ extern "C" { #endif void -pipe_get_tile_raw(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_raw(struct pipe_surface *ps, uint x, uint y, uint w, uint h, void *p, int dst_stride); void -pipe_put_tile_raw(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_raw(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const void *p, int src_stride); void -pipe_get_tile_rgba(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_rgba(struct pipe_surface *ps, uint x, uint y, uint w, uint h, float *p); void -pipe_put_tile_rgba(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_rgba(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const float *p); void -pipe_get_tile_z(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, uint *z); void -pipe_put_tile_z(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const uint *z); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 2d5d2b50f51..bfdaaa6b8f4 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -339,7 +339,7 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, for (y = 0; y < h; y += TILE_SIZE) { for (x = 0; x < w; x += TILE_SIZE) { if (is_clear_flag_set(tc->clear_flags, x, y)) { - pipe_put_tile_raw(pipe, ps, + pipe_put_tile_raw(ps, x, y, TILE_SIZE, TILE_SIZE, tc->tile.data.color32, 0/*STRIDE*/); @@ -374,12 +374,12 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, struct softpipe_cached_tile *tile = tc->entries + pos; if (tile->x >= 0) { if (tc->depth_stencil) { - pipe_put_tile_raw(pipe, ps, + pipe_put_tile_raw(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(pipe, ps, + pipe_put_tile_rgba(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -431,12 +431,12 @@ sp_get_cached_tile(struct softpipe_context *softpipe, if (tile->x != -1) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { - pipe_put_tile_raw(pipe, ps, + pipe_put_tile_raw(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(pipe, ps, + pipe_put_tile_rgba(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -458,12 +458,12 @@ sp_get_cached_tile(struct softpipe_context *softpipe, else { /* get new tile data from surface */ if (tc->depth_stencil) { - pipe_get_tile_raw(pipe, ps, + pipe_get_tile_raw(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_get_tile_rgba(pipe, ps, + pipe_get_tile_rgba(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -544,7 +544,7 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, } /* get tile from the surface (view into texture) */ - pipe_get_tile_rgba(pipe, tc->tex_surf, + pipe_get_tile_rgba(tc->tex_surf, tile_x, tile_y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); tile->x = tile_x; diff --git a/src/mesa/drivers/x11/xm_surface.c b/src/mesa/drivers/x11/xm_surface.c index 81616b92d96..a3f2fe7d685 100644 --- a/src/mesa/drivers/x11/xm_surface.c +++ b/src/mesa/drivers/x11/xm_surface.c @@ -106,7 +106,7 @@ xmesa_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, } else { /* other softpipe surface */ - softpipe_get_tile_rgba(pipe, ps, x, y, w, h, p); + softpipe_get_tile_rgba(ps, x, y, w, h, p); } } @@ -142,7 +142,7 @@ xmesa_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, } else { /* other softpipe surface */ - softpipe_put_tile_rgba(pipe, ps, x, y, w, h, p); + softpipe_put_tile_rgba(ps, x, y, w, h, p); } } diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 2283905662f..c0e8c6bf33e 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -73,7 +73,7 @@ acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, acc_ps->block.width = 1; acc_ps->block.height = 1; - pipe_get_tile_rgba(pipe, acc_ps, x, y, w, h, p); + pipe_get_tile_rgba(acc_ps, x, y, w, h, p); acc_ps->format = f; acc_ps->block = b; @@ -97,7 +97,7 @@ acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, acc_ps->block.width = 1; acc_ps->block.height = 1; - pipe_put_tile_rgba(pipe, acc_ps, x, y, w, h, p); + pipe_put_tile_rgba(acc_ps, x, y, w, h, p); acc_ps->format = f; acc_ps->block = b; @@ -208,7 +208,7 @@ accum_accum(struct pipe_context *pipe, GLfloat value, colorBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(pipe, color_surf, xpos, ypos, width, height, colorBuf); + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, colorBuf); acc_get_tile_rgba(pipe, acc_surf, xpos, ypos, width, height, accBuf); for (i = 0; i < 4 * width * height; i++) { @@ -243,7 +243,7 @@ accum_load(struct pipe_context *pipe, GLfloat value, buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(pipe, color_surf, xpos, ypos, width, height, buf); + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, buf); for (i = 0; i < 4 * width * height; i++) { buf[i] = buf[i] * value; @@ -283,7 +283,7 @@ accum_return(GLcontext *ctx, GLfloat value, if (!colormask[0] || !colormask[1] || !colormask[2] || !colormask[3]) { cbuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(pipe, color_surf, xpos, ypos, width, height, cbuf); + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, cbuf); } for (i = 0; i < width * height; i++) { @@ -298,7 +298,7 @@ accum_return(GLcontext *ctx, GLfloat value, } } - pipe_put_tile_rgba(pipe, color_surf, xpos, ypos, width, height, abuf); + pipe_put_tile_rgba(color_surf, xpos, ypos, width, height, abuf); free(abuf); if (cbuf) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index a7781f3ab2a..6b0137dcf87 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -1049,16 +1049,16 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, /* alternate path using get/put_tile() */ GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(pipe, psRead, srcx, srcy, width, height, buf); - pipe_put_tile_rgba(pipe, psTex, 0, 0, width, height, buf); + pipe_get_tile_rgba(psRead, srcx, srcy, width, height, buf); + pipe_put_tile_rgba(psTex, 0, 0, width, height, buf); free(buf); } else { /* GL_DEPTH */ GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); - pipe_get_tile_z(pipe, psRead, srcx, srcy, width, height, buf); - pipe_put_tile_z(pipe, psTex, 0, 0, width, height, buf); + pipe_get_tile_z(psRead, srcx, srcy, width, height, buf); + pipe_put_tile_z(psTex, 0, 0, width, height, buf); free(buf); } pipe_surface_reference(&psRead, NULL); diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 09d9c29e446..eb71779cc9f 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -262,7 +262,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLuint ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / ((1 << 24) - 1); - pipe_get_tile_raw(pipe, surf, x, y, width, 1, ztemp, 0); + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * (ztemp[j] & 0xffffff)); @@ -276,7 +276,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, /* untested, but simple: */ assert(format == GL_DEPTH_STENCIL_EXT); for (i = 0; i < height; i++) { - pipe_get_tile_raw(pipe, surf, x, y, width, 1, dst, 0); + pipe_get_tile_raw(surf, x, y, width, 1, dst, 0); y += yStep; dst += dstStride; } @@ -287,7 +287,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLushort ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / 0xffff; - pipe_get_tile_raw(pipe, surf, x, y, width, 1, ztemp, 0); + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * ztemp[j]); @@ -302,7 +302,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLuint ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / 0xffffffff; - pipe_get_tile_raw(pipe, surf, x, y, width, 1, ztemp, 0); + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * ztemp[j]); @@ -316,7 +316,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, /* RGBA format */ /* Do a row at a time to flip image data vertically */ for (i = 0; i < height; i++) { - pipe_get_tile_rgba(pipe, surf, x, y, width, 1, df); + pipe_get_tile_rgba(surf, x, y, width, 1, df); y += yStep; df += dfStride; if (!dfStride) { diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index b9aa513d723..73f8b8f7880 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1100,25 +1100,25 @@ fallback_copy_texsubimage(GLcontext *ctx, for (row = 0; row < height; row++, srcY++, destY += yStep) { uint data[MAX_WIDTH]; - pipe_get_tile_z(pipe, src_surf, srcX, srcY, width, 1, data); + pipe_get_tile_z(src_surf, srcX, srcY, width, 1, data); if (scaleOrBias) { _mesa_scale_and_bias_depth_uint(ctx, width, data); } - pipe_put_tile_z(pipe, dest_surf, destX, destY, width, 1, data); + pipe_put_tile_z(dest_surf, destX, destY, width, 1, data); } } else { /* RGBA format */ for (row = 0; row < height; row++, srcY++, destY += yStep) { float data[4 * MAX_WIDTH]; - pipe_get_tile_rgba(pipe, src_surf, srcX, srcY, width, 1, data); + pipe_get_tile_rgba(src_surf, srcX, srcY, width, 1, data); /* XXX we're ignoring convolution for now */ if (ctx->_ImageTransferState) { _mesa_apply_rgba_transfer_ops(ctx, ctx->_ImageTransferState & ~IMAGE_CONVOLUTION_BIT, width, (GLfloat (*)[4]) data); } - pipe_put_tile_rgba(pipe, dest_surf, destX, destY, width, 1, data); + pipe_put_tile_rgba(dest_surf, destX, destY, width, 1, data); } } -- cgit v1.2.3 From 930a863c4f6f11d0fd5cf396ef76054d52c69b9f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@aurora.(none)> Date: Thu, 10 Jul 2008 20:20:46 +0200 Subject: i915: WIP swap rework --- src/gallium/drivers/i915simple/i915_texture.c | 6 +- src/gallium/winsys/dri/intel/intel_context.h | 6 + src/gallium/winsys/dri/intel/intel_swapbuffers.c | 303 ++++++++++------------- src/gallium/winsys/dri/intel/intel_swapbuffers.h | 4 +- 4 files changed, 147 insertions(+), 172 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index cf4964b26b3..8427d8271a7 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -79,7 +79,7 @@ static unsigned power_of_two(unsigned x) { unsigned value = 1; - while (value <= x) + while (value < x) value = value << 1; return value; } @@ -205,8 +205,8 @@ i945_miptree_layout_2d( struct i915_texture *tex ) unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; -#if 0 /* used for tiled display targets */ - if (pt->last_level == 0 && pt->cpp == 4) +#if 1 /* used for tiled display targets */ + if (pt->last_level == 0 /*&& pt->bpp == 4*/) if (i915_displaytarget_layout(tex)) return; #endif diff --git a/src/gallium/winsys/dri/intel/intel_context.h b/src/gallium/winsys/dri/intel/intel_context.h index ced18da1433..18b96217ac8 100644 --- a/src/gallium/winsys/dri/intel/intel_context.h +++ b/src/gallium/winsys/dri/intel/intel_context.h @@ -32,6 +32,7 @@ #include "drm.h" #include "pipe/p_debug.h" +#include "util/u_blit.h" #include "intel_screen.h" #include "i915_drm.h" @@ -82,6 +83,11 @@ struct intel_context * Configuration cache */ driOptionCache optionCache; + + /** + * Blit state for swapbuffers + */ + struct blit_state *blit; }; diff --git a/src/gallium/winsys/dri/intel/intel_swapbuffers.c b/src/gallium/winsys/dri/intel/intel_swapbuffers.c index f751f975245..6e10235e89f 100644 --- a/src/gallium/winsys/dri/intel/intel_swapbuffers.c +++ b/src/gallium/winsys/dri/intel/intel_swapbuffers.c @@ -28,16 +28,13 @@ #include "intel_screen.h" #include "intel_context.h" #include "intel_swapbuffers.h" - +#include "intel_batchbuffer.h" #include "intel_reg.h" -#include "pipe/p_context.h" #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" -#include "state_tracker/st_cb_fbo.h" - #include "intel_drm/ws_dri_bufmgr.h" -#include "intel_batchbuffer.h" + /** * Display a colorbuffer surface in an X window. @@ -49,158 +46,130 @@ */ void intelDisplaySurface(__DRIdrawablePrivate *dPriv, - struct pipe_surface *surf, - const drm_clip_rect_t *rect) + struct pipe_texture *tex, + const drm_clip_rect_t *rect) { - struct intel_screen *intelScreen = intel_screen(dPriv->driScreenPriv); - struct intel_context *intel = intelScreen->dummyContext; - - DBG(SWAP, "%s\n", __FUNCTION__); - - if (!intel) { - /* XXX this is where some kind of extra/meta context could be useful */ - return; - } - - if (intel->last_swap_fence) { - driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, TRUE); - driFenceUnReference(&intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - intel->last_swap_fence = intel->first_swap_fence; - intel->first_swap_fence = NULL; - - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets - * should work regardless. - */ - LOCK_HARDWARE(intel); - /* if this drawable isn't currently bound the LOCK_HARDWARE done on the - * current context (which is what intelScreenContext should return) might - * not get a contended lock and thus cliprects not updated (tests/manywin) - */ - if (intel_context(dPriv->driContextPriv) != intel) - DRI_VALIDATE_DRAWABLE_INFO(intel->driScreen, dPriv); - - - if (dPriv && dPriv->numClipRects) { - const int srcWidth = surf->width; - const int srcHeight = surf->height; - const int nbox = dPriv->numClipRects; - const drm_clip_rect_t *pbox = dPriv->pClipRects; - const int pitch = intelScreen->front.pitch / intelScreen->front.cpp; - const int cpp = intelScreen->front.cpp; - const int srcpitch = surf->stride / cpp; - int BR13, CMD; - int i; - - ASSERT(surf->buffer); - ASSERT(surf->cpp == cpp); - - DBG(SWAP, "screen pitch %d src surface pitch %d\n", - pitch, surf->stride); - - if (cpp == 2) { - BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24); - CMD = XY_SRC_COPY_BLT_CMD; - } - else { - BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24) | (1 << 25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - } - - for (i = 0; i < nbox; i++, pbox++) { - drm_clip_rect_t box; - drm_clip_rect_t sbox; - - if (pbox->x1 > pbox->x2 || - pbox->y1 > pbox->y2 || - pbox->x2 > intelScreen->front.width || - pbox->y2 > intelScreen->front.height) { - /* invalid cliprect, skip it */ - continue; - } - - box = *pbox; - - if (rect) { - /* intersect cliprect with user-provided src rect */ - drm_clip_rect_t rrect; - - rrect.x1 = dPriv->x + rect->x1; - rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y; - rrect.x2 = rect->x2 + rrect.x1; - rrect.y2 = rect->y2 + rrect.y1; - if (rrect.x1 > box.x1) - box.x1 = rrect.x1; - if (rrect.y1 > box.y1) - box.y1 = rrect.y1; - if (rrect.x2 < box.x2) - box.x2 = rrect.x2; - if (rrect.y2 < box.y2) - box.y2 = rrect.y2; - - if (box.x1 > box.x2 || box.y1 > box.y2) - continue; - } - - /* restrict blit to size of actually rendered area */ - if (box.x2 - box.x1 > srcWidth) - box.x2 = srcWidth + box.x1; - if (box.y2 - box.y1 > srcHeight) - box.y2 = srcHeight + box.y1; - - DBG(SWAP, "box x1 x2 y1 y2 %d %d %d %d\n", - box.x1, box.x2, box.y1, box.y2); - - sbox.x1 = box.x1 - dPriv->x; - sbox.y1 = box.y1 - dPriv->y; - - assert(box.x1 < box.x2); - assert(box.y1 < box.y2); - - /* XXX this could be done with pipe->surface_copy() */ - /* XXX should have its own batch buffer */ - if (!BEGIN_BATCH(8, 2)) { - /* - * Since we share this batch buffer with a context - * we can't flush it since that risks a GPU lockup - */ - assert(0); - continue; - } - - OUT_BATCH(CMD); - OUT_BATCH(BR13); - OUT_BATCH((box.y1 << 16) | box.x1); - OUT_BATCH((box.y2 << 16) | box.x2); - - OUT_RELOC(intelScreen->front.buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, - DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, 0); - OUT_BATCH((sbox.y1 << 16) | sbox.x1); - OUT_BATCH((srcpitch * cpp) & 0xffff); - OUT_RELOC(dri_bo(surf->buffer), - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, 0); - - } - - if (intel->first_swap_fence) - driFenceUnReference(&intel->first_swap_fence); - intel->first_swap_fence = intel_be_batchbuffer_flush(intel->base.batch); - } - - UNLOCK_HARDWARE(intel); - - if (intel->lastStamp != dPriv->lastStamp) { - intelUpdateWindowSize(dPriv); - intel->lastStamp = dPriv->lastStamp; - } + struct intel_screen *intelScreen = intel_screen(dPriv->driScreenPriv); + struct intel_context *intel = intelScreen->dummyContext; + + DBG(SWAP, "%s\n", __FUNCTION__); + + if (!intel) { + /* XXX this is where some kind of extra/meta context could be useful */ + return; + } else if (!intel->blit) { + intel->blit = util_create_blit(intel->st->pipe, intel->st->cso_context); + } + + if (intel->last_swap_fence) { + driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, TRUE); + driFenceUnReference(&intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + intel->last_swap_fence = intel->first_swap_fence; + intel->first_swap_fence = NULL; + + /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets + * should work regardless. + */ + LOCK_HARDWARE(intel); + /* if this drawable isn't currently bound the LOCK_HARDWARE done on the + * current context (which is what intelScreenContext should return) might + * not get a contended lock and thus cliprects not updated (tests/manywin) + */ + if (intel_context(dPriv->driContextPriv) != intel) + DRI_VALIDATE_DRAWABLE_INFO(intel->driScreen, dPriv); + + if (dPriv && dPriv->numClipRects) { + const int srcWidth = tex->width[0]; + const int srcHeight = tex->height[0]; + const int nbox = dPriv->numClipRects; + const drm_clip_rect_t *pbox = dPriv->pClipRects; + int i; + + assert(surf->buffer); + assert(surf->cpp == cpp); + + + for (i = 0; i < nbox; i++, pbox++) { + drm_clip_rect_t box; + drm_clip_rect_t sbox; + + if (pbox->x1 > pbox->x2 || + pbox->y1 > pbox->y2 || + pbox->x2 > intelScreen->front.width || + pbox->y2 > intelScreen->front.height) { + /* invalid cliprect, skip it */ + continue; + } + + box = *pbox; + + if (rect) { + /* intersect cliprect with user-provided src rect */ + drm_clip_rect_t rrect; + + rrect.x1 = dPriv->x + rect->x1; + rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y; + rrect.x2 = rect->x2 + rrect.x1; + rrect.y2 = rect->y2 + rrect.y1; + if (rrect.x1 > box.x1) + box.x1 = rrect.x1; + if (rrect.y1 > box.y1) + box.y1 = rrect.y1; + if (rrect.x2 < box.x2) + box.x2 = rrect.x2; + if (rrect.y2 < box.y2) + box.y2 = rrect.y2; + + if (box.x1 > box.x2 || box.y1 > box.y2) + continue; + } + + /* restrict blit to size of actually rendered area */ + if (box.x2 - box.x1 > srcWidth) + box.x2 = srcWidth + box.x1; + if (box.y2 - box.y1 > srcHeight) + box.y2 = srcHeight + box.y1; + + DBG(SWAP, "\tdbox x1 x2 y1 y2 %d %d %d %d\n", + box.x1, box.x2, box.y1, box.y2); + + sbox.x1 = box.x1 - dPriv->x; + sbox.y1 = box.y1 - dPriv->y; + sbox.x2 = box.x2 - dPriv->x; + sbox.y2 = box.y2 - dPriv->y; + DBG(SWAP, "\tsbox x1 x2 y1 y2 %d %d %d %d\n", + sbox.x1, sbox.x2, sbox.y1, sbox.y2); + + assert(box.x1 < box.x2); + assert(box.y1 < box.y2); + + util_blit_pixels_tex(intel->blit, + tex, + sbox.x1, sbox.y1, + sbox.x2, sbox.y2, + intelScreen->front.surface, + box.x1, box.y1, + box.x2, box.y2, + 0.0, + PIPE_TEX_MIPFILTER_NEAREST); + } + + if (intel->first_swap_fence) + driFenceUnReference(&intel->first_swap_fence); + intel->first_swap_fence = intel_be_batchbuffer_flush(intel->base.batch); + } + + + UNLOCK_HARDWARE(intel); + + if (intel->lastStamp != dPriv->lastStamp) { + intelUpdateWindowSize(dPriv); + intel->lastStamp = dPriv->lastStamp; + } } - - /** * This will be called whenever the currently bound window is moved/resized. */ @@ -212,27 +181,27 @@ intelUpdateWindowSize(__DRIdrawablePrivate *dPriv) st_resize_framebuffer(intelfb->stfb, dPriv->w, dPriv->h); } - - +/** + * Called from glx code + */ void intelSwapBuffers(__DRIdrawablePrivate * dPriv) { struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv); - struct pipe_surface *back_surf; + struct pipe_texture *back_tex; assert(intel_fb); assert(intel_fb->stfb); - back_surf = st_get_framebuffer_surface(intel_fb->stfb, - ST_SURFACE_BACK_LEFT); - if (back_surf) { + back_tex = st_get_framebuffer_texture(intel_fb->stfb, + ST_SURFACE_BACK_LEFT); + if (back_tex) { st_notify_swapbuffers(intel_fb->stfb); - intelDisplaySurface(dPriv, back_surf, NULL); + intelDisplaySurface(dPriv, back_tex, NULL); st_notify_swapbuffers_complete(intel_fb->stfb); } } - /** * Called via glXCopySubBufferMESA() to copy a subrect of the back * buffer to the front buffer/screen. @@ -241,14 +210,14 @@ void intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) { struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv); - struct pipe_surface *back_surf; + struct pipe_texture *back_tex; assert(intel_fb); assert(intel_fb->stfb); - back_surf = st_get_framebuffer_surface(intel_fb->stfb, - ST_SURFACE_BACK_LEFT); - if (back_surf) { + back_tex = st_get_framebuffer_texture(intel_fb->stfb, + ST_SURFACE_BACK_LEFT); + if (back_tex) { drm_clip_rect_t rect; rect.x1 = x; rect.y1 = y; @@ -256,6 +225,6 @@ intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) rect.y2 = h; st_notify_swapbuffers(intel_fb->stfb); - intelDisplaySurface(dPriv, back_surf, &rect); + intelDisplaySurface(dPriv, back_tex, &rect); } } diff --git a/src/gallium/winsys/dri/intel/intel_swapbuffers.h b/src/gallium/winsys/dri/intel/intel_swapbuffers.h index 46c9bab3af2..88c479f81d2 100644 --- a/src/gallium/winsys/dri/intel/intel_swapbuffers.h +++ b/src/gallium/winsys/dri/intel/intel_swapbuffers.h @@ -29,11 +29,11 @@ #define INTEL_SWAPBUFFERS_H -struct pipe_surface; +struct pipe_texture; extern void intelDisplaySurface(__DRIdrawablePrivate * dPriv, - struct pipe_surface *surf, + struct pipe_texture *surf, const drm_clip_rect_t * rect); extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv); -- cgit v1.2.3 From 16c2267d55fb14d0ffcb676540345a14ecc0f323 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@tungstengraphics.com> Date: Sun, 13 Jul 2008 18:55:38 +0200 Subject: i915: Ops should not have pushed that This reverts commit 930a863c4f6f11d0fd5cf396ef76054d52c69b9f. --- src/gallium/drivers/i915simple/i915_texture.c | 6 +- src/gallium/winsys/dri/intel/intel_context.h | 6 - src/gallium/winsys/dri/intel/intel_swapbuffers.c | 303 +++++++++++++---------- src/gallium/winsys/dri/intel/intel_swapbuffers.h | 4 +- 4 files changed, 172 insertions(+), 147 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 8427d8271a7..cf4964b26b3 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -79,7 +79,7 @@ static unsigned power_of_two(unsigned x) { unsigned value = 1; - while (value < x) + while (value <= x) value = value << 1; return value; } @@ -205,8 +205,8 @@ i945_miptree_layout_2d( struct i915_texture *tex ) unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; -#if 1 /* used for tiled display targets */ - if (pt->last_level == 0 /*&& pt->bpp == 4*/) +#if 0 /* used for tiled display targets */ + if (pt->last_level == 0 && pt->cpp == 4) if (i915_displaytarget_layout(tex)) return; #endif diff --git a/src/gallium/winsys/dri/intel/intel_context.h b/src/gallium/winsys/dri/intel/intel_context.h index 18b96217ac8..ced18da1433 100644 --- a/src/gallium/winsys/dri/intel/intel_context.h +++ b/src/gallium/winsys/dri/intel/intel_context.h @@ -32,7 +32,6 @@ #include "drm.h" #include "pipe/p_debug.h" -#include "util/u_blit.h" #include "intel_screen.h" #include "i915_drm.h" @@ -83,11 +82,6 @@ struct intel_context * Configuration cache */ driOptionCache optionCache; - - /** - * Blit state for swapbuffers - */ - struct blit_state *blit; }; diff --git a/src/gallium/winsys/dri/intel/intel_swapbuffers.c b/src/gallium/winsys/dri/intel/intel_swapbuffers.c index 6e10235e89f..f751f975245 100644 --- a/src/gallium/winsys/dri/intel/intel_swapbuffers.c +++ b/src/gallium/winsys/dri/intel/intel_swapbuffers.c @@ -28,13 +28,16 @@ #include "intel_screen.h" #include "intel_context.h" #include "intel_swapbuffers.h" -#include "intel_batchbuffer.h" + #include "intel_reg.h" +#include "pipe/p_context.h" #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" -#include "intel_drm/ws_dri_bufmgr.h" +#include "state_tracker/st_cb_fbo.h" +#include "intel_drm/ws_dri_bufmgr.h" +#include "intel_batchbuffer.h" /** * Display a colorbuffer surface in an X window. @@ -46,130 +49,158 @@ */ void intelDisplaySurface(__DRIdrawablePrivate *dPriv, - struct pipe_texture *tex, - const drm_clip_rect_t *rect) + struct pipe_surface *surf, + const drm_clip_rect_t *rect) { - struct intel_screen *intelScreen = intel_screen(dPriv->driScreenPriv); - struct intel_context *intel = intelScreen->dummyContext; - - DBG(SWAP, "%s\n", __FUNCTION__); - - if (!intel) { - /* XXX this is where some kind of extra/meta context could be useful */ - return; - } else if (!intel->blit) { - intel->blit = util_create_blit(intel->st->pipe, intel->st->cso_context); - } - - if (intel->last_swap_fence) { - driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, TRUE); - driFenceUnReference(&intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - intel->last_swap_fence = intel->first_swap_fence; - intel->first_swap_fence = NULL; - - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets - * should work regardless. - */ - LOCK_HARDWARE(intel); - /* if this drawable isn't currently bound the LOCK_HARDWARE done on the - * current context (which is what intelScreenContext should return) might - * not get a contended lock and thus cliprects not updated (tests/manywin) - */ - if (intel_context(dPriv->driContextPriv) != intel) - DRI_VALIDATE_DRAWABLE_INFO(intel->driScreen, dPriv); - - if (dPriv && dPriv->numClipRects) { - const int srcWidth = tex->width[0]; - const int srcHeight = tex->height[0]; - const int nbox = dPriv->numClipRects; - const drm_clip_rect_t *pbox = dPriv->pClipRects; - int i; - - assert(surf->buffer); - assert(surf->cpp == cpp); - - - for (i = 0; i < nbox; i++, pbox++) { - drm_clip_rect_t box; - drm_clip_rect_t sbox; - - if (pbox->x1 > pbox->x2 || - pbox->y1 > pbox->y2 || - pbox->x2 > intelScreen->front.width || - pbox->y2 > intelScreen->front.height) { - /* invalid cliprect, skip it */ - continue; - } - - box = *pbox; - - if (rect) { - /* intersect cliprect with user-provided src rect */ - drm_clip_rect_t rrect; - - rrect.x1 = dPriv->x + rect->x1; - rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y; - rrect.x2 = rect->x2 + rrect.x1; - rrect.y2 = rect->y2 + rrect.y1; - if (rrect.x1 > box.x1) - box.x1 = rrect.x1; - if (rrect.y1 > box.y1) - box.y1 = rrect.y1; - if (rrect.x2 < box.x2) - box.x2 = rrect.x2; - if (rrect.y2 < box.y2) - box.y2 = rrect.y2; - - if (box.x1 > box.x2 || box.y1 > box.y2) - continue; - } - - /* restrict blit to size of actually rendered area */ - if (box.x2 - box.x1 > srcWidth) - box.x2 = srcWidth + box.x1; - if (box.y2 - box.y1 > srcHeight) - box.y2 = srcHeight + box.y1; - - DBG(SWAP, "\tdbox x1 x2 y1 y2 %d %d %d %d\n", - box.x1, box.x2, box.y1, box.y2); - - sbox.x1 = box.x1 - dPriv->x; - sbox.y1 = box.y1 - dPriv->y; - sbox.x2 = box.x2 - dPriv->x; - sbox.y2 = box.y2 - dPriv->y; - DBG(SWAP, "\tsbox x1 x2 y1 y2 %d %d %d %d\n", - sbox.x1, sbox.x2, sbox.y1, sbox.y2); - - assert(box.x1 < box.x2); - assert(box.y1 < box.y2); - - util_blit_pixels_tex(intel->blit, - tex, - sbox.x1, sbox.y1, - sbox.x2, sbox.y2, - intelScreen->front.surface, - box.x1, box.y1, - box.x2, box.y2, - 0.0, - PIPE_TEX_MIPFILTER_NEAREST); - } - - if (intel->first_swap_fence) - driFenceUnReference(&intel->first_swap_fence); - intel->first_swap_fence = intel_be_batchbuffer_flush(intel->base.batch); - } - - - UNLOCK_HARDWARE(intel); - - if (intel->lastStamp != dPriv->lastStamp) { - intelUpdateWindowSize(dPriv); - intel->lastStamp = dPriv->lastStamp; - } + struct intel_screen *intelScreen = intel_screen(dPriv->driScreenPriv); + struct intel_context *intel = intelScreen->dummyContext; + + DBG(SWAP, "%s\n", __FUNCTION__); + + if (!intel) { + /* XXX this is where some kind of extra/meta context could be useful */ + return; + } + + if (intel->last_swap_fence) { + driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, TRUE); + driFenceUnReference(&intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + intel->last_swap_fence = intel->first_swap_fence; + intel->first_swap_fence = NULL; + + /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets + * should work regardless. + */ + LOCK_HARDWARE(intel); + /* if this drawable isn't currently bound the LOCK_HARDWARE done on the + * current context (which is what intelScreenContext should return) might + * not get a contended lock and thus cliprects not updated (tests/manywin) + */ + if (intel_context(dPriv->driContextPriv) != intel) + DRI_VALIDATE_DRAWABLE_INFO(intel->driScreen, dPriv); + + + if (dPriv && dPriv->numClipRects) { + const int srcWidth = surf->width; + const int srcHeight = surf->height; + const int nbox = dPriv->numClipRects; + const drm_clip_rect_t *pbox = dPriv->pClipRects; + const int pitch = intelScreen->front.pitch / intelScreen->front.cpp; + const int cpp = intelScreen->front.cpp; + const int srcpitch = surf->stride / cpp; + int BR13, CMD; + int i; + + ASSERT(surf->buffer); + ASSERT(surf->cpp == cpp); + + DBG(SWAP, "screen pitch %d src surface pitch %d\n", + pitch, surf->stride); + + if (cpp == 2) { + BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + } + else { + BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + } + + for (i = 0; i < nbox; i++, pbox++) { + drm_clip_rect_t box; + drm_clip_rect_t sbox; + + if (pbox->x1 > pbox->x2 || + pbox->y1 > pbox->y2 || + pbox->x2 > intelScreen->front.width || + pbox->y2 > intelScreen->front.height) { + /* invalid cliprect, skip it */ + continue; + } + + box = *pbox; + + if (rect) { + /* intersect cliprect with user-provided src rect */ + drm_clip_rect_t rrect; + + rrect.x1 = dPriv->x + rect->x1; + rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y; + rrect.x2 = rect->x2 + rrect.x1; + rrect.y2 = rect->y2 + rrect.y1; + if (rrect.x1 > box.x1) + box.x1 = rrect.x1; + if (rrect.y1 > box.y1) + box.y1 = rrect.y1; + if (rrect.x2 < box.x2) + box.x2 = rrect.x2; + if (rrect.y2 < box.y2) + box.y2 = rrect.y2; + + if (box.x1 > box.x2 || box.y1 > box.y2) + continue; + } + + /* restrict blit to size of actually rendered area */ + if (box.x2 - box.x1 > srcWidth) + box.x2 = srcWidth + box.x1; + if (box.y2 - box.y1 > srcHeight) + box.y2 = srcHeight + box.y1; + + DBG(SWAP, "box x1 x2 y1 y2 %d %d %d %d\n", + box.x1, box.x2, box.y1, box.y2); + + sbox.x1 = box.x1 - dPriv->x; + sbox.y1 = box.y1 - dPriv->y; + + assert(box.x1 < box.x2); + assert(box.y1 < box.y2); + + /* XXX this could be done with pipe->surface_copy() */ + /* XXX should have its own batch buffer */ + if (!BEGIN_BATCH(8, 2)) { + /* + * Since we share this batch buffer with a context + * we can't flush it since that risks a GPU lockup + */ + assert(0); + continue; + } + + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((box.y1 << 16) | box.x1); + OUT_BATCH((box.y2 << 16) | box.x2); + + OUT_RELOC(intelScreen->front.buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, 0); + OUT_BATCH((sbox.y1 << 16) | sbox.x1); + OUT_BATCH((srcpitch * cpp) & 0xffff); + OUT_RELOC(dri_bo(surf->buffer), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, 0); + + } + + if (intel->first_swap_fence) + driFenceUnReference(&intel->first_swap_fence); + intel->first_swap_fence = intel_be_batchbuffer_flush(intel->base.batch); + } + + UNLOCK_HARDWARE(intel); + + if (intel->lastStamp != dPriv->lastStamp) { + intelUpdateWindowSize(dPriv); + intel->lastStamp = dPriv->lastStamp; + } } + + /** * This will be called whenever the currently bound window is moved/resized. */ @@ -181,27 +212,27 @@ intelUpdateWindowSize(__DRIdrawablePrivate *dPriv) st_resize_framebuffer(intelfb->stfb, dPriv->w, dPriv->h); } -/** - * Called from glx code - */ + + void intelSwapBuffers(__DRIdrawablePrivate * dPriv) { struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv); - struct pipe_texture *back_tex; + struct pipe_surface *back_surf; assert(intel_fb); assert(intel_fb->stfb); - back_tex = st_get_framebuffer_texture(intel_fb->stfb, - ST_SURFACE_BACK_LEFT); - if (back_tex) { + back_surf = st_get_framebuffer_surface(intel_fb->stfb, + ST_SURFACE_BACK_LEFT); + if (back_surf) { st_notify_swapbuffers(intel_fb->stfb); - intelDisplaySurface(dPriv, back_tex, NULL); + intelDisplaySurface(dPriv, back_surf, NULL); st_notify_swapbuffers_complete(intel_fb->stfb); } } + /** * Called via glXCopySubBufferMESA() to copy a subrect of the back * buffer to the front buffer/screen. @@ -210,14 +241,14 @@ void intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) { struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv); - struct pipe_texture *back_tex; + struct pipe_surface *back_surf; assert(intel_fb); assert(intel_fb->stfb); - back_tex = st_get_framebuffer_texture(intel_fb->stfb, - ST_SURFACE_BACK_LEFT); - if (back_tex) { + back_surf = st_get_framebuffer_surface(intel_fb->stfb, + ST_SURFACE_BACK_LEFT); + if (back_surf) { drm_clip_rect_t rect; rect.x1 = x; rect.y1 = y; @@ -225,6 +256,6 @@ intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) rect.y2 = h; st_notify_swapbuffers(intel_fb->stfb); - intelDisplaySurface(dPriv, back_tex, &rect); + intelDisplaySurface(dPriv, back_surf, &rect); } } diff --git a/src/gallium/winsys/dri/intel/intel_swapbuffers.h b/src/gallium/winsys/dri/intel/intel_swapbuffers.h index 88c479f81d2..46c9bab3af2 100644 --- a/src/gallium/winsys/dri/intel/intel_swapbuffers.h +++ b/src/gallium/winsys/dri/intel/intel_swapbuffers.h @@ -29,11 +29,11 @@ #define INTEL_SWAPBUFFERS_H -struct pipe_texture; +struct pipe_surface; extern void intelDisplaySurface(__DRIdrawablePrivate * dPriv, - struct pipe_texture *surf, + struct pipe_surface *surf, const drm_clip_rect_t * rect); extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv); -- cgit v1.2.3 From f006358d56e6b24c3e32665e088d5ee11877ec2c Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 15 Jul 2008 13:27:44 -0600 Subject: gallium: added some sanity check assertions --- src/gallium/drivers/softpipe/sp_setup.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index e99df9d0181..4321ca46f42 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -941,6 +941,11 @@ setup_line(struct setup_context *setup, print_vertex(setup, v1); #endif + assert(v0[0][0] < 1.0e9); + assert(v0[0][1] < 1.0e9); + assert(v1[0][0] < 1.0e9); + assert(v1[0][1] < 1.0e9); + if (setup->softpipe->no_rast) return; -- cgit v1.2.3 From 0c2c0a862c40c0ed39a9ac52344d22c8a7c8b100 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Wed, 16 Jul 2008 14:10:10 +0900 Subject: softpipe: DXT formats not really supported. --- src/gallium/drivers/softpipe/sp_screen.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index e9926bf41f9..f6193bfaf9a 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -119,11 +119,16 @@ softpipe_is_format_supported( struct pipe_screen *screen, { switch (type) { case PIPE_TEXTURE: - /* softpipe supports all texture formats */ - return TRUE; case PIPE_SURFACE: - /* softpipe supports all (off-screen) surface formats */ - return TRUE; + switch(format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return FALSE; + default: + return TRUE; + } default: assert(0); return FALSE; -- cgit v1.2.3 From 8aafc03b260ab8923f1b373f7effa75bcdb40a72 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 19 Jul 2008 12:04:37 +0900 Subject: gallium: Finer grained is_format_supported. --- src/gallium/auxiliary/util/u_blit.c | 12 ++- src/gallium/auxiliary/util/u_gen_mipmap.c | 3 +- src/gallium/drivers/cell/ppu/cell_screen.c | 24 ++--- src/gallium/drivers/i915simple/i915_screen.c | 18 ++-- src/gallium/drivers/i965simple/brw_screen.c | 5 +- src/gallium/drivers/softpipe/sp_screen.c | 26 +++--- src/gallium/include/pipe/p_defines.h | 7 +- src/gallium/include/pipe/p_screen.h | 9 +- src/gallium/state_trackers/python/gallium.i | 15 ++- src/gallium/state_trackers/python/samples/tri.py | 8 +- src/gallium/state_trackers/python/tests/texture.py | 13 ++- src/mesa/state_tracker/st_atom_pixeltransfer.c | 2 +- src/mesa/state_tracker/st_cb_bitmap.c | 3 +- src/mesa/state_tracker/st_cb_drawpixels.c | 9 +- src/mesa/state_tracker/st_cb_texture.c | 8 +- src/mesa/state_tracker/st_extensions.c | 14 ++- src/mesa/state_tracker/st_format.c | 101 +++++++++++---------- src/mesa/state_tracker/st_format.h | 2 +- src/mesa/state_tracker/st_gen_mipmap.c | 3 +- src/mesa/state_tracker/st_texture.c | 3 +- 20 files changed, 158 insertions(+), 127 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 3dc9fdd11e7..ae087df4cf7 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -307,8 +307,10 @@ util_blit_pixels(struct blit_state *ctx, dstY1 = tmp; } - assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE)); - assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE)); + assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); if(dst->format == src->format && (dstX1 - dstX0) == srcW && (dstY1 - dstY0) == srcH) { /* FIXME: this will most surely fail for overlapping rectangles */ @@ -319,7 +321,8 @@ util_blit_pixels(struct blit_state *ctx, return; } - assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); /* * XXX for now we're always creating a temporary texture. @@ -449,7 +452,8 @@ util_blit_pixels_tex(struct blit_state *ctx, t0 = srcY0 / (float)tex->height[0]; t1 = srcY1 / (float)tex->height[0]; - assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); /* save state (restored below) */ cso_save_blend(ctx->cso); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 5313a8008a9..4999822068a 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -858,7 +858,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, uint zslice = 0; /* check if we can render in the texture's format */ - if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) { + if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); return; } diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 5198b51441a..cf9b68b695f 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -115,23 +115,17 @@ cell_get_paramf(struct pipe_screen *screen, int param) static boolean cell_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, uint type ) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { - switch (type) { - case PIPE_TEXTURE: - /* cell supports most texture formats, XXX for now anyway */ - if (format == PIPE_FORMAT_DXT5_RGBA || - format == PIPE_FORMAT_R8G8B8A8_SRGB) - return FALSE; - else - return TRUE; - case PIPE_SURFACE: - /* cell supports all (off-screen) surface formats, XXX for now */ - return TRUE; - default: - assert(0); + /* cell supports most formats, XXX for now anyway */ + if (format == PIPE_FORMAT_DXT5_RGBA || + format == PIPE_FORMAT_R8G8B8A8_SRGB) return FALSE; - } + else + return TRUE; } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index ba8f183bdf0..4b1b8af7da8 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -148,7 +148,10 @@ i915_get_paramf(struct pipe_screen *screen, int param) static boolean i915_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, uint type ) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { static const enum pipe_format tex_supported[] = { PIPE_FORMAT_R8G8B8A8_UNORM, @@ -173,17 +176,10 @@ i915_is_format_supported( struct pipe_screen *screen, const enum pipe_format *list; uint i; - switch (type) { - case PIPE_TEXTURE: - list = tex_supported; - break; - case PIPE_SURFACE: + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) list = surface_supported; - break; - default: - assert(0); - return FALSE; - } + else + list = tex_supported; for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { if (list[i] == format) diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index b700f7e4f5d..6d8f24d1c49 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -136,7 +136,10 @@ brw_get_paramf(struct pipe_screen *screen, int param) static boolean brw_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, uint type ) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { #if 0 /* XXX: This is broken -- rewrite if still needed. */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index f6193bfaf9a..3f9d4b0ed31 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -115,23 +115,19 @@ softpipe_get_paramf(struct pipe_screen *screen, int param) */ static boolean softpipe_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, uint type ) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { - switch (type) { - case PIPE_TEXTURE: - case PIPE_SURFACE: - switch(format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return FALSE; - default: - return TRUE; - } - default: - assert(0); + switch(format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: return FALSE; + default: + return TRUE; } } diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index bc4d7c845a9..b1d100ef53c 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -172,11 +172,8 @@ enum pipe_texture_target { #define PIPE_TEXTURE_USAGE_DEPTH_STENCIL 0x8 #define PIPE_TEXTURE_USAGE_SAMPLER 0x10 -/** - * Surfaces, textures, etc. (others may be added) - */ -#define PIPE_TEXTURE 1 -#define PIPE_SURFACE 2 /**< user-created surfaces */ +#define PIPE_TEXTURE_GEOM_NON_SQUARE 0x1 +#define PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO 0x2 /** diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index cc8430dae16..b15affef7a5 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -77,11 +77,14 @@ struct pipe_screen { /** * Check if the given pipe_format is supported as a texture or * drawing surface. - * \param type one of PIPE_TEXTURE, PIPE_SURFACE + * \param tex_usage bitmask of PIPE_TEXTURE_USAGE_* + * \param flags bitmask of PIPE_TEXTURE_GEOM_* */ boolean (*is_format_supported)( struct pipe_screen *, - enum pipe_format format, - uint type ); + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ); /** * Create a new texture object, using the given template info. diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index c08ac87acae..8d8b762ea57 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -158,8 +158,15 @@ struct st_context { * drawing surface. * \param type one of PIPE_TEXTURE, PIPE_SURFACE */ - int is_format_supported( enum pipe_format format, unsigned type ) { - return $self->screen->is_format_supported( $self->screen, format, type); + int is_format_supported( enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { + return $self->screen->is_format_supported( $self->screen, + format, + target, + tex_usage, + geom_flags ); } struct st_context * @@ -175,7 +182,7 @@ struct st_context { unsigned depth = 1, unsigned last_level = 0, enum pipe_texture_target target = PIPE_TEXTURE_2D, - unsigned usage = 0 + unsigned tex_usage = 0 ) { struct pipe_texture templat; memset(&templat, 0, sizeof(templat)); @@ -186,7 +193,7 @@ struct st_context { templat.depth[0] = depth; templat.last_level = last_level; templat.target = target; - templat.tex_usage = usage; + templat.tex_usage = tex_usage; return $self->screen->texture_create($self->screen, &templat); } diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py index 36659229298..1271c67627d 100644 --- a/src/gallium/state_trackers/python/samples/tri.py +++ b/src/gallium/state_trackers/python/samples/tri.py @@ -140,9 +140,11 @@ def test(dev): ctx.set_clip(clip) # framebuffer - cbuf = dev.texture_create(PIPE_FORMAT_X8R8G8B8_UNORM, - width, height, - usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + cbuf = dev.texture_create( + PIPE_FORMAT_X8R8G8B8_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + ) _cbuf = cbuf.get_surface(usage = PIPE_BUFFER_USAGE_GPU_READ|PIPE_BUFFER_USAGE_GPU_WRITE) fb = Framebuffer() fb.width = width diff --git a/src/gallium/state_trackers/python/tests/texture.py b/src/gallium/state_trackers/python/tests/texture.py index 16ad78c8aa8..b2ca9f416f5 100644 --- a/src/gallium/state_trackers/python/tests/texture.py +++ b/src/gallium/state_trackers/python/tests/texture.py @@ -136,7 +136,7 @@ class TextureTest(TestCase): level = self.level zslice = self.zslice - if not dev.is_format_supported(format, PIPE_TEXTURE): + if not dev.is_format_supported(format, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER, 0): raise TestSkip ctx = self.dev.context_create() @@ -199,6 +199,7 @@ class TextureTest(TestCase): height = height, depth = depth, last_level = last_level, + tex_usage = PIPE_TEXTURE_USAGE_SAMPLER, ) expected_rgba = FloatArray(height*width*4) @@ -212,10 +213,12 @@ class TextureTest(TestCase): ctx.set_sampler_texture(0, texture) # framebuffer - cbuf_tex = dev.texture_create(PIPE_FORMAT_A8R8G8B8_UNORM, - width, - height, - usage = PIPE_TEXTURE_USAGE_RENDER_TARGET) + cbuf_tex = dev.texture_create( + PIPE_FORMAT_A8R8G8B8_UNORM, + width, + height, + tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET, + ) cbuf = cbuf_tex.get_surface(usage = PIPE_BUFFER_USAGE_GPU_WRITE|PIPE_BUFFER_USAGE_GPU_READ) fb = Framebuffer() diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index e4de875e8c9..a357b716771 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -122,7 +122,7 @@ create_color_map_texture(GLcontext *ctx) const uint texSize = 256; /* simple, and usually perfect */ /* find an RGBA texture format */ - format = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE); + format = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER); /* create texture for color map/table */ pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index de86832342c..d5696a909f7 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -746,7 +746,8 @@ st_init_bitmap(struct st_context *st) st->bitmap.rasterizer.bypass_vs = 1; /* find a usable texture format */ - if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, PIPE_TEXTURE)) { + if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM; } else { diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 2ebfcaf82b1..db0c9fbd09c 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -995,18 +995,21 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, srcFormat = rbRead->texture->format; - if (screen->is_format_supported(screen, srcFormat, PIPE_TEXTURE)) { + if (screen->is_format_supported(screen, srcFormat, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { texFormat = srcFormat; } else { /* srcFormat can't be used as a texture format */ if (type == GL_DEPTH) { - texFormat = st_choose_format(pipe, GL_DEPTH_COMPONENT, PIPE_TEXTURE); + texFormat = st_choose_format(pipe, GL_DEPTH_COMPONENT, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_DEPTH_STENCIL); assert(texFormat != PIPE_FORMAT_NONE); /* XXX no depth texture formats??? */ } else { /* default color format */ - texFormat = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE); + texFormat = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER); assert(texFormat != PIPE_FORMAT_NONE); } } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index de782e8232e..1f94a0b9ef0 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1210,9 +1210,13 @@ do_copy_texsubimage(GLcontext *ctx, use_fallback = GL_FALSE; } else if (screen->is_format_supported(screen, strb->surface->format, - PIPE_TEXTURE) && + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, + 0) && screen->is_format_supported(screen, dest_surface->format, - PIPE_SURFACE)) { + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, + 0)) { boolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP); int srcY0, srcY1; if (do_flip) { diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d804d2b453d..cacf972a1ba 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -213,18 +213,24 @@ void st_init_extensions(struct st_context *st) } if (screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SRGB, - PIPE_TEXTURE)) { + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { ctx->Extensions.EXT_texture_sRGB = GL_TRUE; } #if 01 if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA, - PIPE_TEXTURE)) { + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; } #endif - if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR, PIPE_TEXTURE) || - screen->is_format_supported(screen, PIPE_FORMAT_YCBCR_REV, PIPE_TEXTURE)) { + if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0) || + screen->is_format_supported(screen, PIPE_FORMAT_YCBCR_REV, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { ctx->Extensions.MESA_ycbcr_texture = GL_TRUE; } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 50a06868dfc..b6d97ef6593 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -281,7 +281,10 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat) * Find an RGBA format supported by the context/winsys. */ static enum pipe_format -default_rgba_format(struct pipe_screen *screen, uint type) +default_rgba_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) { static const enum pipe_format colorFormats[] = { PIPE_FORMAT_A8R8G8B8_UNORM, @@ -291,7 +294,7 @@ default_rgba_format(struct pipe_screen *screen, uint type) }; uint i; for (i = 0; i < Elements(colorFormats); i++) { - if (screen->is_format_supported( screen, colorFormats[i], type )) { + if (screen->is_format_supported( screen, colorFormats[i], target, tex_usage, geom_flags )) { return colorFormats[i]; } } @@ -303,13 +306,16 @@ default_rgba_format(struct pipe_screen *screen, uint type) * Search list of formats for first RGBA format with >8 bits/channel. */ static enum pipe_format -default_deep_rgba_format(struct pipe_screen *screen, uint type) +default_deep_rgba_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) { - if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_SNORM, type)) { + if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_SNORM, target, tex_usage, geom_flags)) { return PIPE_FORMAT_R16G16B16A16_SNORM; } - if (type == PIPE_TEXTURE) - return default_rgba_format(screen, type); + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + return default_rgba_format(screen, target, tex_usage, geom_flags); else return PIPE_FORMAT_NONE; } @@ -319,7 +325,10 @@ default_deep_rgba_format(struct pipe_screen *screen, uint type) * Find an Z format supported by the context/winsys. */ static enum pipe_format -default_depth_format(struct pipe_screen *screen, uint type) +default_depth_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) { static const enum pipe_format zFormats[] = { PIPE_FORMAT_Z16_UNORM, @@ -329,7 +338,7 @@ default_depth_format(struct pipe_screen *screen, uint type) }; uint i; for (i = 0; i < Elements(zFormats); i++) { - if (screen->is_format_supported( screen, zFormats[i], type )) { + if (screen->is_format_supported( screen, zFormats[i], target, tex_usage, geom_flags )) { return zFormats[i]; } } @@ -343,12 +352,10 @@ default_depth_format(struct pipe_screen *screen, uint type) */ enum pipe_format st_choose_format(struct pipe_context *pipe, GLint internalFormat, - uint surfType) + enum pipe_texture_target target, unsigned tex_usage) { struct pipe_screen *screen = pipe->screen; - - assert(surfType == PIPE_SURFACE || - surfType == PIPE_TEXTURE); + unsigned geom_flags = 0; switch (internalFormat) { case 4: @@ -360,38 +367,38 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_RGBA8: case GL_RGB10_A2: case GL_RGBA12: - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGBA16: - if (surfType == PIPE_SURFACE) - return default_deep_rgba_format( screen, surfType ); + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + return default_deep_rgba_format( screen, target, tex_usage, geom_flags ); else - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGBA4: case GL_RGBA2: - if (screen->is_format_supported( screen, PIPE_FORMAT_A4R4G4B4_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A4R4G4B4_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A4R4G4B4_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGB5_A1: - if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A1R5G5B5_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGB8: case GL_RGB10: case GL_RGB12: case GL_RGB16: - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGB5: case GL_RGB4: case GL_R3_G3_B2: - if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A1R5G5B5_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_R5G6B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_R5G6B5_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_R5G6B5_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_ALPHA: case GL_ALPHA4: @@ -399,9 +406,9 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_ALPHA12: case GL_ALPHA16: case GL_COMPRESSED_ALPHA: - if (screen->is_format_supported( screen, PIPE_FORMAT_A8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A8_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case 1: case GL_LUMINANCE: @@ -410,9 +417,9 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_LUMINANCE12: case GL_LUMINANCE16: case GL_COMPRESSED_LUMINANCE: - if (screen->is_format_supported( screen, PIPE_FORMAT_L8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_L8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_L8_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case 2: case GL_LUMINANCE_ALPHA: @@ -423,9 +430,9 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_LUMINANCE12_ALPHA12: case GL_LUMINANCE16_ALPHA16: case GL_COMPRESSED_LUMINANCE_ALPHA: - if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A8L8_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_INTENSITY: case GL_INTENSITY4: @@ -433,17 +440,17 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_INTENSITY12: case GL_INTENSITY16: case GL_COMPRESSED_INTENSITY: - if (screen->is_format_supported( screen, PIPE_FORMAT_I8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_I8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_I8_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_YCBCR_MESA: if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR, - PIPE_TEXTURE)) { + target, tex_usage, geom_flags)) { return PIPE_FORMAT_YCBCR; } if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR_REV, - PIPE_TEXTURE)) { + target, tex_usage, geom_flags)) { return PIPE_FORMAT_YCBCR_REV; } return PIPE_FORMAT_NONE; @@ -472,40 +479,40 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, #endif case GL_DEPTH_COMPONENT16: - if (screen->is_format_supported( screen, PIPE_FORMAT_Z16_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z16_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z16_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT24: - if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_S8Z24_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z24S8_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT32: - if (screen->is_format_supported( screen, PIPE_FORMAT_Z32_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z32_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z32_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT: - return default_depth_format( screen, surfType ); + return default_depth_format( screen, target, tex_usage, geom_flags ); case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1_EXT: case GL_STENCIL_INDEX4_EXT: case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: - if (screen->is_format_supported( screen, PIPE_FORMAT_S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_S8_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_S8Z24_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z24S8_UNORM; return PIPE_FORMAT_NONE; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: - if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_S8Z24_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z24S8_UNORM; return PIPE_FORMAT_NONE; @@ -521,7 +528,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, enum pipe_format st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) { - return st_choose_format(pipe, internalFormat, PIPE_SURFACE); + return st_choose_format(pipe, internalFormat, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET); } @@ -587,7 +595,8 @@ st_ChooseTextureFormat(GLcontext *ctx, GLint internalFormat, (void) format; (void) type; - pFormat = st_choose_format(ctx->st->pipe, internalFormat, PIPE_TEXTURE); + pFormat = st_choose_format(ctx->st->pipe, internalFormat, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER); if (pFormat == PIPE_FORMAT_NONE) return NULL; diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h index ff0fd042dba..3f5ac3201b1 100644 --- a/src/mesa/state_tracker/st_format.h +++ b/src/mesa/state_tracker/st_format.h @@ -65,7 +65,7 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat); extern enum pipe_format st_choose_format(struct pipe_context *pipe, GLint internalFormat, - uint surfType); + enum pipe_texture_target target, unsigned tex_usage); extern enum pipe_format st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat); diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 2fc00df4298..6db9bc0dd58 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -86,7 +86,8 @@ st_render_mipmap(struct st_context *st, assert(target != GL_TEXTURE_3D); /* not done yet */ /* check if we can render in the texture's format */ - if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) { + if (!screen->is_format_supported(screen, pt->format, target, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { return FALSE; } diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 8222826e7a7..3e5054ecd2d 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -88,7 +88,8 @@ st_texture_create(struct st_context *st, _mesa_lookup_enum_by_nr(format), last_level); assert(format); - assert(screen->is_format_supported(screen, format, PIPE_TEXTURE)); + assert(screen->is_format_supported(screen, format, target, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); memset(&pt, 0, sizeof(pt)); pt.target = target; -- cgit v1.2.3 From a3616067fee97f06fb52131ac13c39aeca6dc06a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@nisroch.keine.ath.cx> Date: Mon, 21 Jul 2008 19:05:55 +1000 Subject: nv50: add NV86 and NV94 to list of "supported" chips --- src/gallium/drivers/nv50/nv50_screen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index bba5ca4d4e5..2f3cf041d1c 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -29,8 +29,8 @@ #include "nouveau/nouveau_stateobj.h" #define NV5X_GRCLASS5097_CHIPSETS 0x00000001 -#define NV8X_GRCLASS8297_CHIPSETS 0x00000010 -#define NV9X_GRCLASS8297_CHIPSETS 0x00000004 +#define NV8X_GRCLASS8297_CHIPSETS 0x00000050 +#define NV9X_GRCLASS8297_CHIPSETS 0x00000014 static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, -- cgit v1.2.3 From 39793a262f055adf49e6bbd7b74728f744074e8e Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Wed, 23 Jul 2008 14:14:55 +1000 Subject: nouveau: is_format_supported() interface changes --- src/gallium/drivers/nv04/nv04_screen.c | 15 ++++++--------- src/gallium/drivers/nv10/nv10_screen.c | 15 ++++++--------- src/gallium/drivers/nv30/nv30_screen.c | 15 ++++++--------- src/gallium/drivers/nv40/nv40_screen.c | 15 ++++++--------- src/gallium/drivers/nv50/nv50_screen.c | 13 +++++-------- 5 files changed, 29 insertions(+), 44 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 9f34117b8c8..da09a3a5fe8 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -79,10 +79,11 @@ nv04_screen_get_paramf(struct pipe_screen *screen, int param) static boolean nv04_screen_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, uint type) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) { - switch (type) { - case PIPE_SURFACE: + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -91,8 +92,7 @@ nv04_screen_is_format_supported(struct pipe_screen *screen, default: break; } - break; - case PIPE_TEXTURE: + } else { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: @@ -104,10 +104,7 @@ nv04_screen_is_format_supported(struct pipe_screen *screen, default: break; } - break; - default: - assert(0); - }; + } return FALSE; } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 67787d8e9ce..403f7b98cde 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -82,10 +82,11 @@ nv10_screen_get_paramf(struct pipe_screen *screen, int param) static boolean nv10_screen_is_format_supported(struct pipe_screen *screen, - enum pipe_format format, uint type) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) { - switch (type) { - case PIPE_SURFACE: + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -95,8 +96,7 @@ nv10_screen_is_format_supported(struct pipe_screen *screen, default: break; } - break; - case PIPE_TEXTURE: + } else { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: @@ -109,10 +109,7 @@ nv10_screen_is_format_supported(struct pipe_screen *screen, default: break; } - break; - default: - assert(0); - }; + } return FALSE; } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index b216a703187..0ffcd772351 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -86,10 +86,11 @@ nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) static boolean nv30_screen_surface_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, uint type) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) { - switch (type) { - case PIPE_SURFACE: + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -99,8 +100,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, default: break; } - break; - case PIPE_TEXTURE: + } else { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: @@ -116,10 +116,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, default: break; } - break; - default: - assert(0); - }; + } return FALSE; } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index ed0215b4862..3872c0a0c9b 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -91,10 +91,11 @@ nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) static boolean nv40_screen_surface_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, uint type) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) { - switch (type) { - case PIPE_SURFACE: + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -104,8 +105,7 @@ nv40_screen_surface_format_supported(struct pipe_screen *pscreen, default: break; } - break; - case PIPE_TEXTURE: + } else { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: @@ -125,10 +125,7 @@ nv40_screen_surface_format_supported(struct pipe_screen *pscreen, default: break; } - break; - default: - assert(0); - }; + } return FALSE; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 2f3cf041d1c..996b0b3e8f2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -34,10 +34,11 @@ static boolean nv50_screen_is_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, uint type) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) { - switch (type) { - case PIPE_SURFACE: + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -47,8 +48,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, default: break; } - break; - case PIPE_TEXTURE: + } else { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: @@ -62,9 +62,6 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, default: break; } - break; - default: - assert(0); } return FALSE; -- cgit v1.2.3 From a8da04cb861b8f9caf3acd33f52f64621f0c15e2 Mon Sep 17 00:00:00 2001 From: Younes Manton <younes.m@gmail.com> Date: Wed, 23 Jul 2008 23:35:23 -0400 Subject: nv all: Copy shader tokens on create, free on delete. Must copy token stream on shader create, client is allowed to free their copy after creating the state object. --- src/gallium/drivers/nv04/nv04_state.c | 4 +++- src/gallium/drivers/nv04/nv04_state.h | 2 +- src/gallium/drivers/nv10/nv10_state.c | 4 +++- src/gallium/drivers/nv10/nv10_state.h | 2 +- src/gallium/drivers/nv30/nv30_state.c | 8 ++++++-- src/gallium/drivers/nv40/nv40_state.c | 8 ++++++-- src/gallium/drivers/nv50/nv50_state.c | 18 ++++++++++++------ src/gallium/state_trackers/g3dvl/vl_context.c | 20 ++++++++++---------- 8 files changed, 42 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index d618465a201..7e71dee7b5d 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -4,6 +4,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" #include "nv04_context.h" #include "nv04_state.h" @@ -291,7 +292,7 @@ nv04_fp_state_create(struct pipe_context *pipe, struct nv04_fragment_program *fp; fp = CALLOC(1, sizeof(struct nv04_fragment_program)); - fp->pipe = cso; + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); return (void *)fp; } @@ -313,6 +314,7 @@ nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv04_fragment_program *fp = hwcso; nv04_fragprog_destroy(nv04, fp); + free((void*)fp->pipe.tokens); free(fp); } diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h index 7487819c3ab..39f7cd17b30 100644 --- a/src/gallium/drivers/nv04/nv04_state.h +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -47,7 +47,7 @@ struct nv04_fragment_program_data { }; struct nv04_fragment_program { - const struct pipe_shader_state *pipe; + struct pipe_shader_state pipe; struct tgsi_shader_info info; boolean translated; diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 9b8b7801cdf..43b9c32f12d 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -4,6 +4,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" #include "nv10_context.h" #include "nv10_state.h" @@ -407,7 +408,7 @@ nv10_fp_state_create(struct pipe_context *pipe, struct nv10_fragment_program *fp; fp = CALLOC(1, sizeof(struct nv10_fragment_program)); - fp->pipe = cso; + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); tgsi_scan_shader(cso->tokens, &fp->info); @@ -431,6 +432,7 @@ nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv10_fragment_program *fp = hwcso; nv10_fragprog_destroy(nv10, fp); + FREE((void*)fp->pipe.tokens); FREE(fp); } diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h index 3ca501d1356..f1f9a12110f 100644 --- a/src/gallium/drivers/nv10/nv10_state.h +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -79,7 +79,7 @@ struct nv10_fragment_program_data { }; struct nv10_fragment_program { - const struct pipe_shader_state *pipe; + struct pipe_shader_state pipe; struct tgsi_shader_info info; boolean translated; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 4d6303ebc28..ba02413de5c 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -3,6 +3,8 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "tgsi/util/tgsi_parse.h" + #include "nv30_context.h" #include "nv30_state.h" @@ -503,7 +505,7 @@ nv30_vp_state_create(struct pipe_context *pipe, struct nv30_vertex_program *vp; vp = CALLOC(1, sizeof(struct nv30_vertex_program)); - vp->pipe = *cso; + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/ return (void *)vp; @@ -527,6 +529,7 @@ nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/ nv30_vertprog_destroy(nv30, vp); + FREE((void*)vp->pipe.tokens); FREE(vp); } @@ -537,7 +540,7 @@ nv30_fp_state_create(struct pipe_context *pipe, struct nv30_fragment_program *fp; fp = CALLOC(1, sizeof(struct nv30_fragment_program)); - fp->pipe = *cso; + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); tgsi_scan_shader(fp->pipe.tokens, &fp->info); @@ -560,6 +563,7 @@ nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv30_fragment_program *fp = hwcso; nv30_fragprog_destroy(nv30, fp); + FREE((void*)fp->pipe.tokens); FREE(fp); } diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index afcf336a65b..5d2c3ab8810 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -5,6 +5,8 @@ #include "draw/draw_context.h" +#include "tgsi/util/tgsi_parse.h" + #include "nv40_context.h" #include "nv40_state.h" @@ -516,7 +518,7 @@ nv40_vp_state_create(struct pipe_context *pipe, struct nv40_vertex_program *vp; vp = CALLOC(1, sizeof(struct nv40_vertex_program)); - vp->pipe = *cso; + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); return (void *)vp; @@ -540,6 +542,7 @@ nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) draw_delete_vertex_shader(nv40->draw, vp->draw); nv40_vertprog_destroy(nv40, vp); + FREE((void*)vp->pipe.tokens); FREE(vp); } @@ -550,7 +553,7 @@ nv40_fp_state_create(struct pipe_context *pipe, struct nv40_fragment_program *fp; fp = CALLOC(1, sizeof(struct nv40_fragment_program)); - fp->pipe = *cso; + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); tgsi_scan_shader(fp->pipe.tokens, &fp->info); @@ -573,6 +576,7 @@ nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv40_fragment_program *fp = hwcso; nv40_fragprog_destroy(nv40, fp); + FREE((void*)fp->pipe.tokens); FREE(fp); } diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index c552a0b0aa0..731409bed4e 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -25,6 +25,8 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "tgsi/util/tgsi_parse.h" + #include "nv50_context.h" #include "nv50_texture.h" @@ -438,7 +440,7 @@ nv50_vp_state_create(struct pipe_context *pipe, { struct nv50_program *p = CALLOC_STRUCT(nv50_program); - p->pipe = *cso; + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); p->type = PIPE_SHADER_VERTEX; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; @@ -457,9 +459,11 @@ static void nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) { struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; - nv50_program_destroy(nv50, hwcso); - FREE(hwcso); + nv50_program_destroy(nv50, p); + FREE((void*)p->pipe.tokens); + FREE(p); } static void * @@ -468,7 +472,7 @@ nv50_fp_state_create(struct pipe_context *pipe, { struct nv50_program *p = CALLOC_STRUCT(nv50_program); - p->pipe = *cso; + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); p->type = PIPE_SHADER_FRAGMENT; tgsi_scan_shader(p->pipe.tokens, &p->info); return (void *)p; @@ -487,9 +491,11 @@ static void nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) { struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; - nv50_program_destroy(nv50, hwcso); - FREE(hwcso); + nv50_program_destroy(nv50, p); + FREE((void*)p->pipe.tokens); + FREE(p); } static void diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 850a769376e..638900b3f47 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -358,7 +358,7 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -436,7 +436,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } @@ -535,7 +535,7 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -655,7 +655,7 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -777,7 +777,7 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } @@ -948,7 +948,7 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } @@ -1054,7 +1054,7 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -1176,7 +1176,7 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -1315,7 +1315,7 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } @@ -1515,7 +1515,7 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } -- cgit v1.2.3 From fd6865c7e5c3c38c273b8269ff30a1acec469b6f Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Wed, 23 Jul 2008 11:15:54 +0900 Subject: softpipe: Remove unused variables. --- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index bfdaaa6b8f4..5d10234945f 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -364,7 +364,6 @@ void sp_flush_tile_cache(struct softpipe_context *softpipe, struct softpipe_tile_cache *tc) { - struct pipe_context *pipe = &softpipe->pipe; struct pipe_surface *ps = tc->surface; int inuse = 0, pos; @@ -414,7 +413,6 @@ struct softpipe_cached_tile * sp_get_cached_tile(struct softpipe_context *softpipe, struct softpipe_tile_cache *tc, int x, int y) { - struct pipe_context *pipe = &softpipe->pipe; struct pipe_surface *ps = tc->surface; /* tile pos in framebuffer: */ -- cgit v1.2.3 From c208a2c791fa24c7c5887fc496738cbddbfafc72 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 28 Jul 2008 12:42:13 +0900 Subject: Merge tgsi/exec and tgsi/util directories. --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 4 +- src/gallium/auxiliary/draw/draw_private.h | 4 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 8 +- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 6 +- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 6 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 4 +- src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 +- src/gallium/auxiliary/gallivm/gallivm.cpp | 4 +- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 4 +- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 10 +- src/gallium/auxiliary/tgsi/Makefile | 18 +- src/gallium/auxiliary/tgsi/SConscript | 24 +- src/gallium/auxiliary/tgsi/exec/Makefile | 2 - src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2522 -------------------- src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 253 -- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2275 ------------------ src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 49 - src/gallium/auxiliary/tgsi/tgsi_build.c | 1324 ++++++++++ src/gallium/auxiliary/tgsi/tgsi_build.h | 332 +++ src/gallium/auxiliary/tgsi/tgsi_dump.c | 582 +++++ src/gallium/auxiliary/tgsi/tgsi_dump.h | 63 + src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 845 +++++++ src/gallium/auxiliary/tgsi/tgsi_dump_c.h | 49 + src/gallium/auxiliary/tgsi/tgsi_exec.c | 2522 ++++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_exec.h | 253 ++ src/gallium/auxiliary/tgsi/tgsi_iterate.c | 85 + src/gallium/auxiliary/tgsi/tgsi_iterate.h | 76 + src/gallium/auxiliary/tgsi/tgsi_parse.c | 332 +++ src/gallium/auxiliary/tgsi/tgsi_parse.h | 151 ++ src/gallium/auxiliary/tgsi/tgsi_sanity.c | 341 +++ src/gallium/auxiliary/tgsi/tgsi_sanity.h | 49 + src/gallium/auxiliary/tgsi/tgsi_scan.c | 226 ++ src/gallium/auxiliary/tgsi/tgsi_scan.h | 74 + src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2275 ++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_sse2.h | 49 + src/gallium/auxiliary/tgsi/tgsi_text.c | 1221 ++++++++++ src/gallium/auxiliary/tgsi/tgsi_text.h | 47 + src/gallium/auxiliary/tgsi/tgsi_transform.c | 199 ++ src/gallium/auxiliary/tgsi/tgsi_transform.h | 93 + src/gallium/auxiliary/tgsi/tgsi_util.c | 300 +++ src/gallium/auxiliary/tgsi/tgsi_util.h | 96 + src/gallium/auxiliary/tgsi/util/tgsi_build.c | 1324 ---------- src/gallium/auxiliary/tgsi/util/tgsi_build.h | 332 --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 582 ----- src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 63 - src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 845 ------- src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h | 49 - src/gallium/auxiliary/tgsi/util/tgsi_iterate.c | 85 - src/gallium/auxiliary/tgsi/util/tgsi_iterate.h | 76 - src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 332 --- src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 151 -- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 341 --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.h | 49 - src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 226 -- src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 74 - src/gallium/auxiliary/tgsi/util/tgsi_text.c | 1221 ---------- src/gallium/auxiliary/tgsi/util/tgsi_text.h | 47 - src/gallium/auxiliary/tgsi/util/tgsi_transform.c | 199 -- src/gallium/auxiliary/tgsi/util/tgsi_transform.h | 93 - src/gallium/auxiliary/tgsi/util/tgsi_util.c | 300 --- src/gallium/auxiliary/tgsi/util/tgsi_util.h | 96 - src/gallium/auxiliary/util/u_gen_mipmap.c | 6 +- src/gallium/auxiliary/util/u_simple_shaders.c | 6 +- src/gallium/drivers/cell/ppu/cell_context.h | 2 +- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/cell/spu/spu_exec.c | 4 +- src/gallium/drivers/cell/spu/spu_exec.h | 2 +- src/gallium/drivers/cell/spu/spu_util.c | 4 +- src/gallium/drivers/i915simple/i915_context.h | 2 +- .../drivers/i915simple/i915_fpc_translate.c | 4 +- src/gallium/drivers/i915simple/i915_state.c | 2 +- src/gallium/drivers/i965simple/brw_context.h | 2 +- src/gallium/drivers/i965simple/brw_sf.c | 2 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 4 +- src/gallium/drivers/i965simple/brw_vs_emit.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 2 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 2 +- src/gallium/drivers/softpipe/sp_fs_exec.c | 4 +- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 4 +- src/gallium/drivers/softpipe/sp_headers.h | 2 +- src/gallium/drivers/softpipe/sp_state.h | 2 +- src/gallium/drivers/softpipe/sp_state_fs.c | 4 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/state_trackers/python/gallium.i | 4 +- src/mesa/state_tracker/st_debug.c | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +- src/mesa/state_tracker/st_program.c | 2 +- 93 files changed, 11680 insertions(+), 11682 deletions(-) delete mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile delete mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c delete mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h delete mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c delete mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_build.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_build.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump_c.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump_c.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_exec.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_exec.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_iterate.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_iterate.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_parse.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_parse.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sanity.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sanity.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_scan.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_scan.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sse2.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sse2.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_text.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_text.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_transform.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_transform.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_util.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_util.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index af4af8ac1d4..86e4d46a200 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -38,7 +38,7 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "cso_cache/cso_context.h" #include "cso_cache/cso_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 3dd7ee19fd1..991304b2c84 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -38,8 +38,8 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" #include "draw_context.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 87fd3036493..13b44015212 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -44,8 +44,8 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" #include "draw_context.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 1f63f943656..d3bd9baddd6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -40,8 +40,8 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" #include "draw_context.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 7bd1e670b4b..626a2e3e304 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,8 +44,8 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_scan.h" struct pipe_context; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 1f926b3e850..441877d46f0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -31,10 +31,10 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" #include "draw_vs.h" #include "draw_vs_aos.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 8e834501a4c..eda677cc62c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -28,9 +28,9 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" #include "draw_vs.h" #include "draw_vs_aos.h" #include "draw_vertex.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index 6a54917ae38..e029b7b4bb5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -31,9 +31,9 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" #include "draw_vs.h" #include "draw_vs_aos.h" #include "draw_vertex.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 4501877efcf..e26903d8cc5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -38,8 +38,8 @@ #include "draw_context.h" #include "draw_vs.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" struct exec_vertex_shader { diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index c63bd51a106..fc03473b919 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -38,7 +38,7 @@ #include "draw_context.h" #include "draw_vs.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #ifdef MESA_LLVM diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index c3189c707db..61f0c084c38 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -45,8 +45,8 @@ #include "rtasm/rtasm_cpu.h" #include "rtasm/rtasm_x86sse.h" -#include "tgsi/exec/tgsi_sse2.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_sse2.h" +#include "tgsi/tgsi_parse.h" #define SSE_MAX_VERTICES 4 diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index 77900e342b1..29adeea47de 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" #include <llvm/Module.h> #include <llvm/CallingConv.h> diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 857c190f7b6..cf5b9788375 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -43,8 +43,8 @@ #include "pipe/p_shader_tokens.h" #include "pipe/p_util.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" #include <llvm/Module.h> #include <llvm/CallingConv.h> diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 98014bdaa15..b14e2affd6f 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -10,11 +10,11 @@ #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" #include <llvm/Module.h> diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 9c4b9676511..bbeff1304d1 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -4,15 +4,15 @@ include $(TOP)/configs/current LIBNAME = tgsi C_SOURCES = \ - exec/tgsi_exec.c \ - exec/tgsi_sse2.c \ - util/tgsi_iterate.c \ - util/tgsi_build.c \ - util/tgsi_dump.c \ - util/tgsi_parse.c \ - util/tgsi_scan.c \ - util/tgsi_transform.c \ - util/tgsi_util.c + tgsi_build.c \ + tgsi_dump.c \ + tgsi_exec.c \ + tgsi_iterate.c \ + tgsi_parse.c \ + tgsi_scan.c \ + tgsi_sse2.c \ + tgsi_transform.c \ + tgsi_util.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 3bbfa1be547..03982e21943 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -3,18 +3,18 @@ Import('*') tgsi = env.ConvenienceLibrary( target = 'tgsi', source = [ - 'exec/tgsi_exec.c', - 'exec/tgsi_sse2.c', - 'util/tgsi_build.c', - 'util/tgsi_dump.c', - 'util/tgsi_dump_c.c', - 'util/tgsi_iterate.c', - 'util/tgsi_parse.c', - 'util/tgsi_sanity.c', - 'util/tgsi_scan.c', - 'util/tgsi_text.c', - 'util/tgsi_transform.c', - 'util/tgsi_util.c', + 'tgsi_build.c', + 'tgsi_dump.c', + 'tgsi_dump_c.c', + 'tgsi_exec.c', + 'tgsi_iterate.c', + 'tgsi_parse.c', + 'tgsi_sanity.c', + 'tgsi_scan.c', + 'tgsi_sse2.c', + 'tgsi_text.c', + 'tgsi_transform.c', + 'tgsi_util.c', ]) auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile deleted file mode 100644 index 451911a3545..00000000000 --- a/src/gallium/auxiliary/tgsi/exec/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -default: - cd .. ; make diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c deleted file mode 100644 index 001a4c4b152..00000000000 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ /dev/null @@ -1,2522 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI interpretor/executor. - * - * Flow control information: - * - * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) - * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special - * care since a condition may be true for some quad components but false - * for other components. - * - * We basically execute all statements (even if they're in the part of - * an IF/ELSE clause that's "not taken") and use a special mask to - * control writing to destination registers. This is the ExecMask. - * See store_dest(). - * - * The ExecMask is computed from three other masks (CondMask, LoopMask and - * ContMask) which are controlled by the flow control instructions (namely: - * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). - * - * - * Authors: - * Michal Krol - * Brian Paul - */ - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi_exec.h" - -#define TILE_TOP_LEFT 0 -#define TILE_TOP_RIGHT 1 -#define TILE_BOTTOM_LEFT 2 -#define TILE_BOTTOM_RIGHT 3 - -/* - * Shorthand locations of various utility registers (_I = Index, _C = Channel) - */ -#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I -#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C -#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I -#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C -#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I -#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C -#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I -#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C -#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I -#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C -#define TEMP_128_I TGSI_EXEC_TEMP_128_I -#define TEMP_128_C TGSI_EXEC_TEMP_128_C -#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I -#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C -#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I -#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C -#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I -#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C -#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I -#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C -#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I -#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C -#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I -#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 - -#define FOR_EACH_CHANNEL(CHAN)\ - for (CHAN = 0; CHAN < 4; CHAN++) - -#define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) - -#define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) - -#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED2( INST, CHAN )) - - -/** The execution mask depends on the conditional mask and the loop mask */ -#define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask - - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - - - -/** - * Initialize machine state by expanding tokens to full instructions, - * allocating temporary storage, setting up constants, etc. - * After this, we can call tgsi_exec_machine_run() many times. - */ -void -tgsi_exec_machine_bind_shader( - struct tgsi_exec_machine *mach, - const struct tgsi_token *tokens, - uint numSamplers, - struct tgsi_sampler *samplers) -{ - uint k; - struct tgsi_parse_context parse; - struct tgsi_exec_labels *labels = &mach->Labels; - struct tgsi_full_instruction *instructions; - struct tgsi_full_declaration *declarations; - uint maxInstructions = 10, numInstructions = 0; - uint maxDeclarations = 10, numDeclarations = 0; - uint instno = 0; - -#if 0 - tgsi_dump(tokens, 0); -#endif - - mach->Tokens = tokens; - mach->Samplers = samplers; - - k = tgsi_parse_init (&parse, mach->Tokens); - if (k != TGSI_PARSE_OK) { - debug_printf( "Problem parsing!\n" ); - return; - } - - mach->Processor = parse.FullHeader.Processor.Processor; - mach->ImmLimit = 0; - labels->count = 0; - - declarations = (struct tgsi_full_declaration *) - MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); - - if (!declarations) { - return; - } - - instructions = (struct tgsi_full_instruction *) - MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); - - if (!instructions) { - FREE( declarations ); - return; - } - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - uint pointer = parse.Position; - uint i; - - tgsi_parse_token( &parse ); - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* save expanded declaration */ - if (numDeclarations == maxDeclarations) { - declarations = REALLOC(declarations, - maxDeclarations - * sizeof(struct tgsi_full_declaration), - (maxDeclarations + 10) - * sizeof(struct tgsi_full_declaration)); - maxDeclarations += 10; - } - memcpy(declarations + numDeclarations, - &parse.FullToken.FullDeclaration, - sizeof(declarations[0])); - numDeclarations++; - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; - assert( size % 4 == 0 ); - assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); - - for( i = 0; i < size; i++ ) { - mach->Imms[mach->ImmLimit + i / 4][i % 4] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - } - mach->ImmLimit += size / 4; - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - assert( labels->count < MAX_LABELS ); - - labels->labels[labels->count][0] = instno; - labels->labels[labels->count][1] = pointer; - labels->count++; - - /* save expanded instruction */ - if (numInstructions == maxInstructions) { - instructions = REALLOC(instructions, - maxInstructions - * sizeof(struct tgsi_full_instruction), - (maxInstructions + 10) - * sizeof(struct tgsi_full_instruction)); - maxInstructions += 10; - } - memcpy(instructions + numInstructions, - &parse.FullToken.FullInstruction, - sizeof(instructions[0])); - numInstructions++; - break; - - default: - assert( 0 ); - } - } - tgsi_parse_free (&parse); - - if (mach->Declarations) { - FREE( mach->Declarations ); - } - mach->Declarations = declarations; - mach->NumDeclarations = numDeclarations; - - if (mach->Instructions) { - FREE( mach->Instructions ); - } - mach->Instructions = instructions; - mach->NumInstructions = numInstructions; -} - - -void -tgsi_exec_machine_init( - struct tgsi_exec_machine *mach ) -{ - uint i; - - mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); - mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; - - /* Setup constants. */ - for( i = 0; i < 4; i++ ) { - mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; - mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; - mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; - mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; - mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; - mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; - mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; - mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; - mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; - mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; - } -} - - -void -tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) -{ - if (mach->Instructions) { - FREE(mach->Instructions); - mach->Instructions = NULL; - mach->NumInstructions = 0; - } - if (mach->Declarations) { - FREE(mach->Declarations); - mach->Declarations = NULL; - mach->NumDeclarations = 0; - } -} - - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - -static void -micro_add( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] + src1->f[0]; - dst->f[1] = src0->f[1] + src1->f[1]; - dst->f[2] = src0->f[2] + src1->f[2]; - dst->f[3] = src0->f[3] + src1->f[3]; -} - -static void -micro_iadd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; -} - -static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; -} - -static void -micro_ceil( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); -} - -static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); -} - -static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_div( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] / src1->f[0]; - dst->f[1] = src0->f[1] / src1->f[1]; - dst->f[2] = src0->f[2] / src1->f[2]; - dst->f[3] = src0->f[3] / src1->f[3]; -} - -static void -micro_udiv( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; -} - -static void -micro_eq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void -micro_ieq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; -} - -static void -micro_exp2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); -} - -static void -micro_f2it( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = (int) src->f[0]; - dst->i[1] = (int) src->f[1]; - dst->i[2] = (int) src->f[2]; - dst->i[3] = (int) src->f[3]; -} - -static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; -} - -static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} - -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} - -static void -micro_ge( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} - -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -} - -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void -micro_lt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} - -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} - -static void -micro_max( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; - dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; - dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; - dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; -} - -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; -} - -static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} - -static void -micro_min( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; - dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; - dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; - dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; -} - -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} - -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} - -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} - -static void -micro_mul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] * src1->f[0]; - dst->f[1] = src0->f[1] * src1->f[1]; - dst->f[2] = src0->f[2] * src1->f[2]; - dst->f[3] = src0->f[3] * src1->f[3]; -} - -static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} - -static void -micro_imul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->i[0] = src0->i[0] * src1->i[0]; - dst1->i[1] = src0->i[1] * src1->i[1]; - dst1->i[2] = src0->i[2] * src1->i[2]; - dst1->i[3] = src0->i[3] * src1->i[3]; - dst0->i[0] = 0; - dst0->i[1] = 0; - dst0->i[2] = 0; - dst0->i[3] = 0; -} - -static void -micro_umul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->u[0] = src0->u[0] * src1->u[0]; - dst1->u[1] = src0->u[1] * src1->u[1]; - dst1->u[2] = src0->u[2] * src1->u[2]; - dst1->u[3] = src0->u[3] * src1->u[3]; - dst0->u[0] = 0; - dst0->u[1] = 0; - dst0->u[2] = 0; - dst0->u[3] = 0; -} - -static void -micro_movc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2 ) -{ - dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; - dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; - dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; - dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; -} - -static void -micro_neg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = -src->f[0]; - dst->f[1] = -src->f[1]; - dst->f[2] = -src->f[2]; - dst->f[3] = -src->f[3]; -} - -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} - -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} - -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} - -static void -micro_pow( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = powf( src0->f[0], src1->f[0] ); - dst->f[1] = powf( src0->f[1], src1->f[1] ); - dst->f[2] = powf( src0->f[2], src1->f[2] ); - dst->f[3] = powf( src0->f[3], src1->f[3] ); -} - -static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} - -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} - -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} - -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} - -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; -} - -static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} - -static void -micro_sqrt( union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sqrtf( src->f[0] ); - dst->f[1] = sqrtf( src->f[1] ); - dst->f[2] = sqrtf( src->f[2] ); - dst->f[3] = sqrtf( src->f[3] ); -} - -static void -micro_sub( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] - src1->f[0]; - dst->f[1] = src0->f[1] - src1->f[1]; - dst->f[2] = src0->f[2] - src1->f[2]; - dst->f[3] = src0->f[3] - src1->f[3]; -} - -static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; -} - -static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; -} - -static void -fetch_src_file_channel( - const struct tgsi_exec_machine *mach, - const uint file, - const uint swizzle, - const union tgsi_exec_channel *index, - union tgsi_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: - chan->f[0] = mach->Consts[index->i[0]][swizzle]; - chan->f[1] = mach->Consts[index->i[1]][swizzle]; - chan->f[2] = mach->Consts[index->i[2]][swizzle]; - chan->f[3] = mach->Consts[index->i[3]][swizzle]; - break; - - case TGSI_FILE_INPUT: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_TEMPORARY: - assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - assert( index->i[1] < (int) mach->ImmLimit ); - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - assert( index->i[2] < (int) mach->ImmLimit ); - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - assert( index->i[3] < (int) mach->ImmLimit ); - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; - - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - default: - assert( 0 ); - } - break; - - case TGSI_EXTSWIZZLE_ZERO: - *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; - break; - - case TGSI_EXTSWIZZLE_ONE: - *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; - break; - - default: - assert( 0 ); - } -} - -static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) -{ - union tgsi_exec_channel index; - uint swizzle; - - index.i[0] = - index.i[1] = - index.i[2] = - index.i[3] = reg->SrcRegister.Index; - - if (reg->SrcRegister.Indirect) { - union tgsi_exec_channel index2; - union tgsi_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; - - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); - fetch_src_file_channel( - mach, - reg->SrcRegisterInd.File, - swizzle, - &index2, - &indir_index ); - - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; - } - - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { - case TGSI_FILE_INPUT: - index.i[0] *= 17; - index.i[1] *= 17; - index.i[2] *= 17; - index.i[3] *= 17; - break; - case TGSI_FILE_CONSTANT: - index.i[0] *= 4096; - index.i[1] *= 4096; - index.i[2] *= 4096; - index.i[3] *= 4096; - break; - default: - assert( 0 ); - } - - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; - - if (reg->SrcRegisterDim.Indirect) { - union tgsi_exec_channel index2; - union tgsi_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; - - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); - fetch_src_file_channel( - mach, - reg->SrcRegisterDimInd.File, - swizzle, - &index2, - &indir_index ); - - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; - } - } - - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->SrcRegister.File, - swizzle, - &index, - chan ); - - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } - - if (reg->SrcRegisterExtMod.Complement) { - micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); - } -} - -static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) -{ - union tgsi_exec_channel *dst; - - switch( reg->DstRegister.File ) { - case TGSI_FILE_NULL: - return; - - case TGSI_FILE_OUTPUT: - dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_TEMPORARY: - assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS); - dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; - break; - - default: - assert( 0 ); - return; - } - - switch (inst->Instruction.Saturate) - { - case TGSI_SAT_NONE: - if (mach->ExecMask & 0x1) - dst->i[0] = chan->i[0]; - if (mach->ExecMask & 0x2) - dst->i[1] = chan->i[1]; - if (mach->ExecMask & 0x4) - dst->i[2] = chan->i[2]; - if (mach->ExecMask & 0x8) - dst->i[3] = chan->i[3]; - break; - - case TGSI_SAT_ZERO_ONE: - /* XXX need to obey ExecMask here */ - micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - - default: - assert( 0 ); - } -} - -#define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) - -#define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) - - -/** - * Execute ARB-style KIL which is predicated by a src register. - * Kill fragment if any of the four values is less than zero. - */ -static void -exec_kilp(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint uniquemask; - uint chan_index; - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - union tgsi_exec_channel r[1]; - - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); - - for (chan_index = 0; chan_index < 4; chan_index++) - { - uint swizzle; - uint i; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle ( - &inst->FullSrcRegisters[0], - chan_index); - - /* check if the component has not been already tested */ - if (uniquemask & (1 << swizzle)) - continue; - uniquemask |= 1 << swizzle; - - FETCH(&r[0], 0, chan_index); - for (i = 0; i < 4; i++) - if (r[0].f[i] < 0.0f) - kilmask |= 1 << i; - } - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} - - -/* - * Fetch a texel using STR texture coordinates. - */ -static void -fetch_texel( struct tgsi_sampler *sampler, - const union tgsi_exec_channel *s, - const union tgsi_exec_channel *t, - const union tgsi_exec_channel *p, - float lodbias, /* XXX should be float[4] */ - union tgsi_exec_channel *r, - union tgsi_exec_channel *g, - union tgsi_exec_channel *b, - union tgsi_exec_channel *a ) -{ - uint j; - float rgba[NUM_CHANNELS][QUAD_SIZE]; - - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); - - for (j = 0; j < 4; j++) { - r->f[j] = rgba[0][j]; - g->f[j] = rgba[1][j]; - b->f[j] = rgba[2][j]; - a->f[j] = rgba[3][j]; - } -} - - -static void -exec_tex(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - boolean biasLod, - boolean projected) -{ - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - union tgsi_exec_channel r[8]; - uint chan_index; - float lodBias; - - /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ - - switch (inst->InstructionExtTexture.Texture) { - case TGSI_TEXTURE_1D: - - FETCH(&r[0], 0, CHAN_X); - - if (projected) { - FETCH(&r[1], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[1] ); - } - - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ - break; - - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ - break; - - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, - &r[0], &r[1], &r[2], &r[3]); - break; - - default: - assert (0); - } - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } -} - - -/** - * Evaluate a constant-valued coefficient at the position of the - * current quad. - */ -static void -eval_constant_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; - } -} - -/** - * Evaluate a linear-valued coefficient at the position of the - * current quad. - */ -static void -eval_linear_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0; - mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; - mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; - mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; -} - -/** - * Evaluate a perspective-valued coefficient at the position of the - * current quad. - */ -static void -eval_perspective_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - const float *w = mach->QuadPos.xyzw[3].f; - /* divide by W here */ - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; -} - - -typedef void (* eval_coef_func)( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ); - -static void -exec_declaration( - struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl ) -{ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - eval_coef_func eval; - - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; - - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; - - default: - assert( 0 ); - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - eval( mach, i, j ); - } - } - } - else { - unsigned i, j; - - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - eval( mach, i, j ); - } - } - } - } - } - } -} - -static void -exec_instruction( - struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - int *pc ) -{ - uint chan_index; - union tgsi_exec_channel r[8]; - - (*pc)++; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_f2it( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - - FETCH( &r[2], 0, CHAN_W ); - micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); - micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); - micro_pow( &r[1], &r[1], &r[2] ); - micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Z ); - } - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - FETCH( &r[0], 0, CHAN_X ); - micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ - STORE( &r[2], 0, CHAN_X ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ - STORE( &r[2], 0, CHAN_Y ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ - STORE( &r[2], 0, CHAN_Z ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_LOG: - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ - micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ - micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[0], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ - micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ - STORE( &r[0], 0, CHAN_Y ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[1], 0, CHAN_Z ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - micro_mul( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - /* XXX use micro_min()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - /* XXX use micro_max()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); - - STORE(&r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - micro_sub( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add( &r[0], &r[0], &r[2] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_CND: - assert (0); - break; - - case TGSI_OPCODE_CND0: - assert (0); - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - assert (0); - break; - - case TGSI_OPCODE_INDEX: - assert (0); - break; - - case TGSI_OPCODE_NEGATE: - assert (0); - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - assert (0); - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ - FETCH(&r[0], 0, CHAN_X); - - micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_pow( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ - FETCH(&r[0], 0, CHAN_Y); - FETCH(&r[1], 1, CHAN_Z); - - micro_mul( &r[2], &r[0], &r[1] ); - - FETCH(&r[3], 0, CHAN_Z); - FETCH(&r[4], 1, CHAN_Y); - - micro_mul( &r[5], &r[3], &r[4] ); - micro_sub( &r[2], &r[2], &r[5] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } - - FETCH(&r[2], 1, CHAN_X); - - micro_mul( &r[3], &r[3], &r[2] ); - - FETCH(&r[5], 0, CHAN_X); - - micro_mul( &r[1], &r[1], &r[5] ); - micro_sub( &r[3], &r[3], &r[1] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } - - micro_mul( &r[5], &r[5], &r[4] ); - micro_mul( &r[0], &r[0], &r[2] ); - micro_sub( &r[5], &r[5], &r[0] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MULTIPLYMATRIX: - assert (0); - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - - micro_abs( &r[0], &r[0] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_RCC: - assert (0); - break; - - case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 1, CHAN_W); - - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_KILP: - exec_kilp (mach, inst); - break; - - case TGSI_OPCODE_KIL: - /* for enabled ExecMask bits, set the killed bit */ - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; - break; - - case TGSI_OPCODE_PK2H: - assert (0); - break; - - case TGSI_OPCODE_PK2US: - assert (0); - break; - - case TGSI_OPCODE_PK4B: - assert (0); - break; - - case TGSI_OPCODE_PK4UB: - assert (0); - break; - - case TGSI_OPCODE_RFL: - assert (0); - break; - - case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], - &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SFL: - assert (0); - break; - - case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_STR: - assert (0); - break; - - case TGSI_OPCODE_TEX: - /* simple texture lookup */ - /* src[0] = texcoord */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); - break; - - case TGSI_OPCODE_TXB: - /* Texture lookup with lod bias */ - /* src[0] = texcoord (src[0].w = LOD bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); - break; - - case TGSI_OPCODE_TXD: - /* Texture lookup with explict partial derivatives */ - /* src[0] = texcoord */ - /* src[1] = d[strq]/dx */ - /* src[2] = d[strq]/dy */ - /* src[3] = sampler unit */ - assert (0); - break; - - case TGSI_OPCODE_TXL: - /* Texture lookup with explit LOD */ - /* src[0] = texcoord (src[0].w = LOD) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); - break; - - case TGSI_OPCODE_TXP: - /* Texture lookup with projection */ - /* src[0] = texcoord (src[0].w = projection) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, TRUE); - break; - - case TGSI_OPCODE_UP2H: - assert (0); - break; - - case TGSI_OPCODE_UP2US: - assert (0); - break; - - case TGSI_OPCODE_UP4B: - assert (0); - break; - - case TGSI_OPCODE_UP4UB: - assert (0); - break; - - case TGSI_OPCODE_X2D: - assert (0); - break; - - case TGSI_OPCODE_ARA: - assert (0); - break; - - case TGSI_OPCODE_ARR: - assert (0); - break; - - case TGSI_OPCODE_BRA: - assert (0); - break; - - case TGSI_OPCODE_CAL: - /* skip the call if no execution channels are enabled */ - if (mach->ExecMask) { - /* do the call */ - - /* push the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); - mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; - *pc = inst->InstructionExtLabel.Label; - } - break; - - case TGSI_OPCODE_RET: - mach->FuncMask &= ~mach->ExecMask; - UPDATE_EXEC_MASK(mach); - - if (mach->ExecMask == 0x0) { - /* really return now (otherwise, keep executing */ - - if (mach->CallStackTop == 0) { - /* returning from main() */ - *pc = -1; - return; - } - *pc = mach->CallStack[--mach->CallStackTop]; - - /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->FuncStackTop > 0); - mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; - - UPDATE_EXEC_MASK(mach); - } - break; - - case TGSI_OPCODE_SSG: - assert (0); - break; - - case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_SCS: - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( &r[0], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - micro_cos( &r[1], &r[0] ); - STORE( &r[1], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - micro_sin( &r[1], &r[0] ); - STORE( &r[1], 0, CHAN_Y ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_NRM: - assert (0); - break; - - case TGSI_OPCODE_DIV: - assert( 0 ); - break; - - case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_IF: - /* push CondMask */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - FETCH( &r[0], 0, CHAN_X ); - /* update CondMask */ - if( ! r[0].u[0] ) { - mach->CondMask &= ~0x1; - } - if( ! r[0].u[1] ) { - mach->CondMask &= ~0x2; - } - if( ! r[0].u[2] ) { - mach->CondMask &= ~0x4; - } - if( ! r[0].u[3] ) { - mach->CondMask &= ~0x8; - } - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ELSE */ - break; - - case TGSI_OPCODE_ELSE: - /* invert CondMask wrt previous mask */ - { - uint prevMask; - assert(mach->CondStackTop > 0); - prevMask = mach->CondStack[mach->CondStackTop - 1]; - mach->CondMask = ~mach->CondMask & prevMask; - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ENDIF */ - } - break; - - case TGSI_OPCODE_ENDIF: - /* pop CondMask */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_END: - /* halt execution */ - *pc = -1; - break; - - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - - case TGSI_OPCODE_PUSHA: - assert (0); - break; - - case TGSI_OPCODE_POPA: - assert (0); - break; - - case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOD: - assert (0); - break; - - case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SAD: - assert (0); - break; - - case TGSI_OPCODE_TXF: - assert (0); - break; - - case TGSI_OPCODE_TXQ: - assert (0); - break; - - case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; - break; - - case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; - break; - - case TGSI_OPCODE_LOOP: - /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: - /* push LoopMask and ContMasks */ - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - break; - - case TGSI_OPCODE_ENDLOOP: - /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - UPDATE_EXEC_MASK(mach); - if (mach->ExecMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_CONT: - /* turn off cont channels for each enabled exec channel */ - mach->ContMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BGNSUB: - /* no-op */ - break; - - case TGSI_OPCODE_ENDSUB: - /* no-op */ - break; - - case TGSI_OPCODE_NOISE1: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE2: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE3: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE4: - assert( 0 ); - break; - - case TGSI_OPCODE_NOP: - break; - - default: - assert( 0 ); - } -} - - -/** - * Run TGSI interpreter. - * \return bitmask of "alive" quad components - */ -uint -tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) -{ - uint i; - int pc = 0; - - mach->CondMask = 0xf; - mach->LoopMask = 0xf; - mach->ContMask = 0xf; - mach->FuncMask = 0xf; - mach->ExecMask = 0xf; - - mach->CondStackTop = 0; /* temporarily subvert this assertion */ - assert(mach->CondStackTop == 0); - assert(mach->LoopStackTop == 0); - assert(mach->ContStackTop == 0); - assert(mach->CallStackTop == 0); - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; - - if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; - mach->Primitives[0] = 0; - } - - - /* execute declarations (interpolants) */ - for (i = 0; i < mach->NumDeclarations; i++) { - exec_declaration( mach, mach->Declarations+i ); - } - - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - assert(pc < (int) mach->NumInstructions); - exec_instruction( mach, mach->Instructions + pc, &pc ); - } - -#if 0 - /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - /* - * Scale back depth component. - */ - for (i = 0; i < 4; i++) - mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; - } -#endif - - return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; -} - - diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h deleted file mode 100644 index 4f30650b07b..00000000000 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ /dev/null @@ -1,253 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#if !defined TGSI_EXEC_H -#define TGSI_EXEC_H - -#include "pipe/p_compiler.h" - -#if defined __cplusplus -extern "C" { -#endif - -#define MAX_LABELS 1024 - -#define NUM_CHANNELS 4 /* R,G,B,A */ -#define QUAD_SIZE 4 /* 4 pixel/quad */ - -/** - * Registers may be treated as float, signed int or unsigned int. - */ -union tgsi_exec_channel -{ - float f[QUAD_SIZE]; - int i[QUAD_SIZE]; - unsigned u[QUAD_SIZE]; -}; - -/** - * A vector[RGBA] of channels[4 pixels] - */ -struct tgsi_exec_vector -{ - union tgsi_exec_channel xyzw[NUM_CHANNELS]; -}; - -/** - * For fragment programs, information for computing fragment input - * values from plane equation of the triangle/line. - */ -struct tgsi_interp_coef -{ - float a0[NUM_CHANNELS]; /* in an xyzw layout */ - float dadx[NUM_CHANNELS]; - float dady[NUM_CHANNELS]; -}; - - -struct softpipe_tile_cache; /**< Opaque to TGSI */ - -/** - * Information for sampling textures, which must be implemented - * by code outside the TGSI executor. - */ -struct tgsi_sampler -{ - const struct pipe_sampler_state *state; - struct pipe_texture *texture; - /** Get samples for four fragments in a quad */ - void (*get_samples)(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - void *pipe; /*XXX temporary*/ - struct softpipe_tile_cache *cache; -}; - -/** - * For branching/calling subroutines. - */ -struct tgsi_exec_labels -{ - unsigned labels[MAX_LABELS][2]; - unsigned count; -}; - - -#define TGSI_EXEC_NUM_TEMPS 128 -#define TGSI_EXEC_NUM_TEMP_EXTRAS 6 -#define TGSI_EXEC_NUM_IMMEDIATES 256 - -/* - * Locations of various utility registers (_I = Index, _C = Channel) - */ -#define TGSI_EXEC_TEMP_00000000_I (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_00000000_C 0 - -#define TGSI_EXEC_TEMP_7FFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_7FFFFFFF_C 1 - -#define TGSI_EXEC_TEMP_80000000_I (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_80000000_C 2 - -#define TGSI_EXEC_TEMP_FFFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_FFFFFFFF_C 3 - -#define TGSI_EXEC_TEMP_ONE_I (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_ONE_C 0 - -#define TGSI_EXEC_TEMP_TWO_I (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_TWO_C 1 - -#define TGSI_EXEC_TEMP_128_I (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_128_C 2 - -#define TGSI_EXEC_TEMP_MINUS_128_I (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_MINUS_128_C 3 - -#define TGSI_EXEC_TEMP_KILMASK_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_KILMASK_C 0 - -#define TGSI_EXEC_TEMP_OUTPUT_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_OUTPUT_C 1 - -#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_PRIMITIVE_C 2 - -#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_THREE_C 3 - -#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_HALF_C 0 - -#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) - -#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5) - - -#define TGSI_EXEC_MAX_COND_NESTING 20 -#define TGSI_EXEC_MAX_LOOP_NESTING 20 -#define TGSI_EXEC_MAX_CALL_NESTING 20 - -/** - * Run-time virtual machine state for executing TGSI shader. - */ -struct tgsi_exec_machine -{ - /* Total = program temporaries + internal temporaries - * + 1 padding to align to 16 bytes - */ - struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + - TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; - - /* - * This will point to _Temps after aligning to 16B boundary. - */ - struct tgsi_exec_vector *Temps; - struct tgsi_exec_vector *Addrs; - - struct tgsi_sampler *Samplers; - - float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - unsigned ImmLimit; - const float (*Consts)[4]; - struct tgsi_exec_vector *Inputs; - struct tgsi_exec_vector *Outputs; - const struct tgsi_token *Tokens; - unsigned Processor; - - /* GEOMETRY processor only. */ - unsigned *Primitives; - - /* FRAGMENT processor only. */ - const struct tgsi_interp_coef *InterpCoefs; - struct tgsi_exec_vector QuadPos; - - /* Conditional execution masks */ - uint CondMask; /**< For IF/ELSE/ENDIF */ - uint LoopMask; /**< For BGNLOOP/ENDLOOP */ - uint ContMask; /**< For loop CONT statements */ - uint FuncMask; /**< For function calls */ - uint ExecMask; /**< = CondMask & LoopMask */ - - /** Condition mask stack (for nested conditionals) */ - uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; - int CondStackTop; - - /** Loop mask stack (for nested loops) */ - uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int LoopStackTop; - - /** Loop continue mask stack (see comments in tgsi_exec.c) */ - uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int ContStackTop; - - /** Function execution mask stack (for executing subroutine code) */ - uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; - int FuncStackTop; - - /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; - int CallStackTop; - - struct tgsi_full_instruction *Instructions; - uint NumInstructions; - - struct tgsi_full_declaration *Declarations; - uint NumDeclarations; - - struct tgsi_exec_labels Labels; -}; - -void -tgsi_exec_machine_init( - struct tgsi_exec_machine *mach ); - - -void -tgsi_exec_machine_bind_shader( - struct tgsi_exec_machine *mach, - const struct tgsi_token *tokens, - uint numSamplers, - struct tgsi_sampler *samplers); - -uint -tgsi_exec_machine_run( - struct tgsi_exec_machine *mach ); - - -void -tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); - - -#if defined __cplusplus -} /* extern "C" */ -#endif - -#endif /* TGSI_EXEC_H */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c deleted file mode 100755 index cdbdf5c8827..00000000000 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ /dev/null @@ -1,2275 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi_exec.h" -#include "tgsi_sse2.h" - -#include "rtasm/rtasm_x86sse.h" - -#ifdef PIPE_ARCH_X86 - -/* for 1/sqrt() - * - * This costs about 100fps (close to 10%) in gears: - */ -#define HIGH_PRECISION 1 - - -#define FOR_EACH_CHANNEL( CHAN )\ - for( CHAN = 0; CHAN < 4; CHAN++ ) - -#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) - -#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - if( IS_DST0_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ - FOR_EACH_CHANNEL( CHAN )\ - IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - -#define TEMP_R0 TGSI_EXEC_TEMP_R0 - -/** - * X86 utility functions. - */ - -static struct x86_reg -make_xmm( - unsigned xmm ) -{ - return x86_make_reg( - file_XMM, - (enum x86_reg_name) xmm ); -} - -/** - * X86 register mapping helpers. - */ - -static struct x86_reg -get_const_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_CX ); -} - -static struct x86_reg -get_input_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_AX ); -} - -static struct x86_reg -get_output_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_DX ); -} - -static struct x86_reg -get_temp_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_BX ); -} - -static struct x86_reg -get_coef_base( void ) -{ - return get_output_base(); -} - -static struct x86_reg -get_immediate_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_DI ); -} - - -/** - * Data access helpers. - */ - - -static struct x86_reg -get_immediate( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_immediate_base(), - (vec * 4 + chan) * 4 ); -} - -static struct x86_reg -get_const( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_const_base(), - (vec * 4 + chan) * 4 ); -} - -static struct x86_reg -get_input( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_input_base(), - (vec * 4 + chan) * 16 ); -} - -static struct x86_reg -get_output( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_output_base(), - (vec * 4 + chan) * 16 ); -} - -static struct x86_reg -get_temp( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_temp_base(), - (vec * 4 + chan) * 16 ); -} - -static struct x86_reg -get_coef( - unsigned vec, - unsigned chan, - unsigned member ) -{ - return x86_make_disp( - get_coef_base(), - ((vec * 3 + member) * 4 + chan) * 4 ); -} - - -static void -emit_ret( - struct x86_function *func ) -{ - x86_ret( func ); -} - - -/** - * Data fetch helpers. - */ - -/** - * Copy a shader constant to xmm register - * \param xmm the destination xmm register - * \param vec the src const buffer index - * \param chan src channel to fetch (X, Y, Z or W) - */ -static void -emit_const( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_const( vec, chan ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); -} - -static void -emit_immediate( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_immediate( vec, chan ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); -} - - -/** - * Copy a shader input to xmm register - * \param xmm the destination xmm register - * \param vec the src input attrib - * \param chan src channel to fetch (X, Y, Z or W) - */ -static void -emit_inputf( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - make_xmm( xmm ), - get_input( vec, chan ) ); -} - -/** - * Store an xmm register to a shader output - * \param xmm the source xmm register - * \param vec the dest output attrib - * \param chan src dest channel to store (X, Y, Z or W) - */ -static void -emit_output( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - get_output( vec, chan ), - make_xmm( xmm ) ); -} - -/** - * Copy a shader temporary to xmm register - * \param xmm the destination xmm register - * \param vec the src temp register - * \param chan src channel to fetch (X, Y, Z or W) - */ -static void -emit_tempf( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movaps( - func, - make_xmm( xmm ), - get_temp( vec, chan ) ); -} - -/** - * Load an xmm register with an input attrib coefficient (a0, dadx or dady) - * \param xmm the destination xmm register - * \param vec the src input/attribute coefficient index - * \param chan src channel to fetch (X, Y, Z or W) - * \param member 0=a0, 1=dadx, 2=dady - */ -static void -emit_coef( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan, - unsigned member ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_coef( vec, chan, member ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); -} - -/** - * Data store helpers. - */ - -static void -emit_inputs( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - get_input( vec, chan ), - make_xmm( xmm ) ); -} - -static void -emit_temps( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movaps( - func, - get_temp( vec, chan ), - make_xmm( xmm ) ); -} - -static void -emit_addrs( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_temps( - func, - xmm, - vec + TGSI_EXEC_NUM_TEMPS, - chan ); -} - -/** - * Coefficent fetch helpers. - */ - -static void -emit_coef_a0( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 0 ); -} - -static void -emit_coef_dadx( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 1 ); -} - -static void -emit_coef_dady( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 2 ); -} - -/** - * Function call helpers. - */ - -static void -emit_push_gp( - struct x86_function *func ) -{ - x86_push( - func, - x86_make_reg( file_REG32, reg_AX) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_CX) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_DX) ); -} - -static void -x86_pop_gp( - struct x86_function *func ) -{ - /* Restore GP registers in a reverse order. - */ - x86_pop( - func, - x86_make_reg( file_REG32, reg_DX) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_CX) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_AX) ); -} - -static void -emit_func_call_dst( - struct x86_function *func, - unsigned xmm_dst, - void (PIPE_CDECL *code)() ) -{ - sse_movaps( - func, - get_temp( TEMP_R0, 0 ), - make_xmm( xmm_dst ) ); - - emit_push_gp( - func ); - - { - struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - - x86_lea( - func, - ecx, - get_temp( TEMP_R0, 0 ) ); - - x86_push( func, ecx ); - x86_mov_reg_imm( func, ecx, (unsigned long) code ); - x86_call( func, ecx ); - x86_pop(func, ecx ); - } - - - x86_pop_gp( - func ); - - sse_movaps( - func, - make_xmm( xmm_dst ), - get_temp( TEMP_R0, 0 ) ); -} - -static void -emit_func_call_dst_src( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src, - void (PIPE_CDECL *code)() ) -{ - sse_movaps( - func, - get_temp( TEMP_R0, 1 ), - make_xmm( xmm_src ) ); - - emit_func_call_dst( - func, - xmm_dst, - code ); -} - -/** - * Low-level instruction translators. - */ - -static void -emit_abs( - struct x86_function *func, - unsigned xmm ) -{ - sse_andps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_7FFFFFFF_I, - TGSI_EXEC_TEMP_7FFFFFFF_C ) ); -} - -static void -emit_add( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_addps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void PIPE_CDECL -cos4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = cosf( store[X + 0] ); - store[X + 1] = cosf( store[X + 1] ); - store[X + 2] = cosf( store[X + 2] ); - store[X + 3] = cosf( store[X + 3] ); -} - -static void -emit_cos( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - cos4f ); -} - -static void PIPE_CDECL -ex24f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = powf( 2.0f, store[X + 0] ); - store[X + 1] = powf( 2.0f, store[X + 1] ); - store[X + 2] = powf( 2.0f, store[X + 2] ); - store[X + 3] = powf( 2.0f, store[X + 3] ); -} - -static void -emit_ex2( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - ex24f ); -} - -static void -emit_f2it( - struct x86_function *func, - unsigned xmm ) -{ - sse2_cvttps2dq( - func, - make_xmm( xmm ), - make_xmm( xmm ) ); -} - -static void PIPE_CDECL -flr4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = floorf( store[X + 0] ); - store[X + 1] = floorf( store[X + 1] ); - store[X + 2] = floorf( store[X + 2] ); - store[X + 3] = floorf( store[X + 3] ); -} - -static void -emit_flr( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - flr4f ); -} - -static void PIPE_CDECL -frc4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] -= floorf( store[X + 0] ); - store[X + 1] -= floorf( store[X + 1] ); - store[X + 2] -= floorf( store[X + 2] ); - store[X + 3] -= floorf( store[X + 3] ); -} - -static void -emit_frc( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - frc4f ); -} - -static void PIPE_CDECL -lg24f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = LOG2( store[X + 0] ); - store[X + 1] = LOG2( store[X + 1] ); - store[X + 2] = LOG2( store[X + 2] ); - store[X + 3] = LOG2( store[X + 3] ); -} - -static void -emit_lg2( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - lg24f ); -} - -static void -emit_MOV( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_movups( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_mul (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) -{ - sse_mulps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_neg( - struct x86_function *func, - unsigned xmm ) -{ - sse_xorps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C ) ); -} - -static void PIPE_CDECL -pow4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = powf( store[X + 0], store[X + 4] ); - store[X + 1] = powf( store[X + 1], store[X + 5] ); - store[X + 2] = powf( store[X + 2], store[X + 6] ); - store[X + 3] = powf( store[X + 3], store[X + 7] ); -} - -static void -emit_pow( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - emit_func_call_dst_src( - func, - xmm_dst, - xmm_src, - pow4f ); -} - -static void -emit_rcp ( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - /* On Intel CPUs at least, this is only accurate to 12 bits -- not - * good enough. Need to either emit a proper divide or use the - * iterative technique described below in emit_rsqrt(). - */ - sse2_rcpps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_rsqrt( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ -#if HIGH_PRECISION - /* Although rsqrtps() and rcpps() are low precision on some/all SSE - * implementations, it is possible to improve its precision at - * fairly low cost, using a newton/raphson step, as below: - * - * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) - * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] - * - * See: http://softwarecommunity.intel.com/articles/eng/1818.htm - */ - { - struct x86_reg dst = make_xmm( xmm_dst ); - struct x86_reg src = make_xmm( xmm_src ); - struct x86_reg tmp0 = make_xmm( 2 ); - struct x86_reg tmp1 = make_xmm( 3 ); - - assert( xmm_dst != xmm_src ); - assert( xmm_dst != 2 && xmm_dst != 3 ); - assert( xmm_src != 2 && xmm_src != 3 ); - - sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); - sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); - sse_rsqrtps( func, tmp1, src ); - sse_mulps( func, src, tmp1 ); - sse_mulps( func, dst, tmp1 ); - sse_mulps( func, src, tmp1 ); - sse_subps( func, tmp0, src ); - sse_mulps( func, dst, tmp0 ); - } -#else - /* On Intel CPUs at least, this is only accurate to 12 bits -- not - * good enough. - */ - sse_rsqrtps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -#endif -} - -static void -emit_setsign( - struct x86_function *func, - unsigned xmm ) -{ - sse_orps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C ) ); -} - -static void PIPE_CDECL -sin4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = sinf( store[X + 0] ); - store[X + 1] = sinf( store[X + 1] ); - store[X + 2] = sinf( store[X + 2] ); - store[X + 3] = sinf( store[X + 3] ); -} - -static void -emit_sin (struct x86_function *func, - unsigned xmm_dst) -{ - emit_func_call_dst( - func, - xmm_dst, - sin4f ); -} - -static void -emit_sub( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_subps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -/** - * Register fetch. - */ - -static void -emit_fetch( - struct x86_function *func, - unsigned xmm, - const struct tgsi_full_src_register *reg, - const unsigned chan_index ) -{ - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - - switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch( reg->SrcRegister.File ) { - case TGSI_FILE_CONSTANT: - emit_const( - func, - xmm, - reg->SrcRegister.Index, - swizzle ); - break; - - case TGSI_FILE_IMMEDIATE: - emit_immediate( - func, - xmm, - reg->SrcRegister.Index, - swizzle ); - break; - - case TGSI_FILE_INPUT: - emit_inputf( - func, - xmm, - reg->SrcRegister.Index, - swizzle ); - break; - - case TGSI_FILE_TEMPORARY: - emit_tempf( - func, - xmm, - reg->SrcRegister.Index, - swizzle ); - break; - - default: - assert( 0 ); - } - break; - - case TGSI_EXTSWIZZLE_ZERO: - emit_tempf( - func, - xmm, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ); - break; - - case TGSI_EXTSWIZZLE_ONE: - emit_tempf( - func, - xmm, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - break; - - default: - assert( 0 ); - } - - switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { - case TGSI_UTIL_SIGN_CLEAR: - emit_abs( func, xmm ); - break; - - case TGSI_UTIL_SIGN_SET: - emit_setsign( func, xmm ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - emit_neg( func, xmm ); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } -} - -#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ - emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) - -/** - * Register store. - */ - -static void -emit_store( - struct x86_function *func, - unsigned xmm, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - unsigned chan_index ) -{ - switch( reg->DstRegister.File ) { - case TGSI_FILE_OUTPUT: - emit_output( - func, - xmm, - reg->DstRegister.Index, - chan_index ); - break; - - case TGSI_FILE_TEMPORARY: - emit_temps( - func, - xmm, - reg->DstRegister.Index, - chan_index ); - break; - - case TGSI_FILE_ADDRESS: - emit_addrs( - func, - xmm, - reg->DstRegister.Index, - chan_index ); - break; - - default: - assert( 0 ); - } - - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: - /* assert( 0 ); */ - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - } -} - -#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ - emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) - -/** - * High-level instruction translators. - */ - -static void -emit_kil( - struct x86_function *func, - const struct tgsi_full_src_register *reg ) -{ - unsigned uniquemask; - unsigned registers[4]; - unsigned nextregister = 0; - unsigned firstchan = ~0; - unsigned chan_index; - - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); - - FOR_EACH_CHANNEL( chan_index ) { - unsigned swizzle; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle( - reg, - chan_index ); - - /* check if the component has not been already tested */ - if( !(uniquemask & (1 << swizzle)) ) { - uniquemask |= 1 << swizzle; - - /* allocate register */ - registers[chan_index] = nextregister; - emit_fetch( - func, - nextregister, - reg, - chan_index ); - nextregister++; - - /* mark the first channel used */ - if( firstchan == ~0 ) { - firstchan = chan_index; - } - } - } - - x86_push( - func, - x86_make_reg( file_REG32, reg_AX ) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_DX ) ); - - FOR_EACH_CHANNEL( chan_index ) { - if( uniquemask & (1 << chan_index) ) { - sse_cmpps( - func, - make_xmm( registers[chan_index] ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ), - cc_LessThan ); - - if( chan_index == firstchan ) { - sse_pmovmskb( - func, - x86_make_reg( file_REG32, reg_AX ), - make_xmm( registers[chan_index] ) ); - } - else { - sse_pmovmskb( - func, - x86_make_reg( file_REG32, reg_DX ), - make_xmm( registers[chan_index] ) ); - x86_or( - func, - x86_make_reg( file_REG32, reg_AX ), - x86_make_reg( file_REG32, reg_DX ) ); - } - } - } - - x86_or( - func, - get_temp( - TGSI_EXEC_TEMP_KILMASK_I, - TGSI_EXEC_TEMP_KILMASK_C ), - x86_make_reg( file_REG32, reg_AX ) ); - - x86_pop( - func, - x86_make_reg( file_REG32, reg_DX ) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_AX ) ); -} - -static void -emit_setcc( - struct x86_function *func, - struct tgsi_full_instruction *inst, - enum sse_cc cc ) -{ - unsigned chan_index; - - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_cmpps( - func, - make_xmm( 0 ), - make_xmm( 1 ), - cc ); - sse_andps( - func, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ) ); - STORE( func, *inst, 0, 0, chan_index ); - } -} - -static void -emit_cmp( - struct x86_function *func, - struct tgsi_full_instruction *inst ) -{ - unsigned chan_index; - - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - sse_cmpps( - func, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ), - cc_LessThan ); - sse_andps( - func, - make_xmm( 1 ), - make_xmm( 0 ) ); - sse_andnps( - func, - make_xmm( 0 ), - make_xmm( 2 ) ); - sse_orps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } -} - -static int -emit_instruction( - struct x86_function *func, - struct tgsi_full_instruction *inst ) -{ - unsigned chan_index; - - switch( inst->Instruction.Opcode ) { - case TGSI_OPCODE_ARL: -#if 0 - /* XXX this isn't working properly (see glean vertProg1 test) */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_f2it( func, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } -#else - return 0; -#endif - break; - - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C); - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - STORE( func, *inst, 0, 0, CHAN_X ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( func, *inst, 0, 0, CHAN_W ); - } - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - sse_maxps( - func, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ) ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - /* XMM[1] = SrcReg[0].yyyy */ - FETCH( func, *inst, 1, 0, CHAN_Y ); - /* XMM[1] = max(XMM[1], 0) */ - sse_maxps( - func, - make_xmm( 1 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ) ); - /* XMM[2] = SrcReg[0].wwww */ - FETCH( func, *inst, 2, 0, CHAN_W ); - /* XMM[2] = min(XMM[2], 128.0) */ - sse_minps( - func, - make_xmm( 2 ), - get_temp( - TGSI_EXEC_TEMP_128_I, - TGSI_EXEC_TEMP_128_C ) ); - /* XMM[2] = max(XMM[2], -128.0) */ - sse_maxps( - func, - make_xmm( 2 ), - get_temp( - TGSI_EXEC_TEMP_MINUS_128_I, - TGSI_EXEC_TEMP_MINUS_128_C ) ); - emit_pow( func, 1, 2 ); - FETCH( func, *inst, 0, 0, CHAN_X ); - sse_xorps( - func, - make_xmm( 2 ), - make_xmm( 2 ) ); - sse_cmpps( - func, - make_xmm( 2 ), - make_xmm( 0 ), - cc_LessThanEqual ); - sse_andps( - func, - make_xmm( 2 ), - make_xmm( 1 ) ); - STORE( func, *inst, 2, 0, CHAN_Z ); - } - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rcp( func, 0, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rsqrt( func, 1, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 1, 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - return 0; - break; - - case TGSI_OPCODE_LOG: - return 0; - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_mul( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_add( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul(func, 1, 2 ); - emit_add(func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_W ); - FETCH( func, *inst, 2, 1, CHAN_W ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 1, 1, CHAN_Y ); - emit_mul( func, 0, 1 ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - FETCH( func, *inst, 0, 0, CHAN_Z ); - STORE( func, *inst, 0, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - FETCH( func, *inst, 0, 1, CHAN_W ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_minps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_maxps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - emit_setcc( func, inst, cc_LessThan ); - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - emit_setcc( func, inst, cc_NotLessThan ); - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - emit_mul( func, 0, 1 ); - emit_add( func, 0, 2 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_sub( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - emit_sub( func, 1, 2 ); - emit_mul( func, 0, 1 ); - emit_add( func, 0, 2 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CND: - return 0; - break; - - case TGSI_OPCODE_CND0: - return 0; - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - return 0; - break; - - case TGSI_OPCODE_INDEX: - return 0; - break; - - case TGSI_OPCODE_NEGATE: - return 0; - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_frc( func, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - return 0; - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_flr( func, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - return 0; - break; - - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_ex2( func, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_lg2( func, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_pow( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 1, 1, CHAN_Z ); - FETCH( func, *inst, 3, 0, CHAN_Z ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 4, 1, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_MOV( func, 2, 0 ); - emit_mul( func, 2, 1 ); - emit_MOV( func, 5, 3 ); - emit_mul( func, 5, 4 ); - emit_sub( func, 2, 5 ); - STORE( func, *inst, 2, 0, CHAN_X ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 2, 1, CHAN_X ); - FETCH( func, *inst, 5, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - emit_mul( func, 3, 2 ); - emit_mul( func, 1, 5 ); - emit_sub( func, 3, 1 ); - STORE( func, *inst, 3, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_mul( func, 5, 4 ); - emit_mul( func, 0, 2 ); - emit_sub( func, 5, 0 ); - STORE( func, *inst, 5, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MULTIPLYMATRIX: - return 0; - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_abs( func, 0) ; - - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_RCC: - return 0; - break; - - case TGSI_OPCODE_DPH: - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 1, CHAN_W ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - return 0; - break; - - case TGSI_OPCODE_DDY: - return 0; - break; - - case TGSI_OPCODE_KIL: - emit_kil( func, &inst->FullSrcRegisters[0] ); - break; - - case TGSI_OPCODE_PK2H: - return 0; - break; - - case TGSI_OPCODE_PK2US: - return 0; - break; - - case TGSI_OPCODE_PK4B: - return 0; - break; - - case TGSI_OPCODE_PK4UB: - return 0; - break; - - case TGSI_OPCODE_RFL: - return 0; - break; - - case TGSI_OPCODE_SEQ: - return 0; - break; - - case TGSI_OPCODE_SFL: - return 0; - break; - - case TGSI_OPCODE_SGT: - return 0; - break; - - case TGSI_OPCODE_SIN: - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - return 0; - break; - - case TGSI_OPCODE_SNE: - return 0; - break; - - case TGSI_OPCODE_STR: - return 0; - break; - - case TGSI_OPCODE_TEX: - if (0) { - /* Disable dummy texture code: - */ - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - } - else { - return 0; - } - break; - - case TGSI_OPCODE_TXD: - return 0; - break; - - case TGSI_OPCODE_UP2H: - return 0; - break; - - case TGSI_OPCODE_UP2US: - return 0; - break; - - case TGSI_OPCODE_UP4B: - return 0; - break; - - case TGSI_OPCODE_UP4UB: - return 0; - break; - - case TGSI_OPCODE_X2D: - return 0; - break; - - case TGSI_OPCODE_ARA: - return 0; - break; - - case TGSI_OPCODE_ARR: - return 0; - break; - - case TGSI_OPCODE_BRA: - return 0; - break; - - case TGSI_OPCODE_CAL: - return 0; - break; - - case TGSI_OPCODE_RET: - emit_ret( func ); - break; - - case TGSI_OPCODE_END: - break; - - case TGSI_OPCODE_SSG: - return 0; - break; - - case TGSI_OPCODE_CMP: - emit_cmp (func, inst); - break; - - case TGSI_OPCODE_SCS: - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); - STORE( func, *inst, 0, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ); - STORE( func, *inst, 0, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_TXB: - return 0; - break; - - case TGSI_OPCODE_NRM: - return 0; - break; - - case TGSI_OPCODE_DIV: - return 0; - break; - - case TGSI_OPCODE_DP2: - return 0; - break; - - case TGSI_OPCODE_TXL: - return 0; - break; - - case TGSI_OPCODE_BRK: - return 0; - break; - - case TGSI_OPCODE_IF: - return 0; - break; - - case TGSI_OPCODE_LOOP: - return 0; - break; - - case TGSI_OPCODE_REP: - return 0; - break; - - case TGSI_OPCODE_ELSE: - return 0; - break; - - case TGSI_OPCODE_ENDIF: - return 0; - break; - - case TGSI_OPCODE_ENDLOOP: - return 0; - break; - - case TGSI_OPCODE_ENDREP: - return 0; - break; - - case TGSI_OPCODE_PUSHA: - return 0; - break; - - case TGSI_OPCODE_POPA: - return 0; - break; - - case TGSI_OPCODE_CEIL: - return 0; - break; - - case TGSI_OPCODE_I2F: - return 0; - break; - - case TGSI_OPCODE_NOT: - return 0; - break; - - case TGSI_OPCODE_TRUNC: - return 0; - break; - - case TGSI_OPCODE_SHL: - return 0; - break; - - case TGSI_OPCODE_SHR: - return 0; - break; - - case TGSI_OPCODE_AND: - return 0; - break; - - case TGSI_OPCODE_OR: - return 0; - break; - - case TGSI_OPCODE_MOD: - return 0; - break; - - case TGSI_OPCODE_XOR: - return 0; - break; - - case TGSI_OPCODE_SAD: - return 0; - break; - - case TGSI_OPCODE_TXF: - return 0; - break; - - case TGSI_OPCODE_TXQ: - return 0; - break; - - case TGSI_OPCODE_CONT: - return 0; - break; - - case TGSI_OPCODE_EMIT: - return 0; - break; - - case TGSI_OPCODE_ENDPRIM: - return 0; - break; - - default: - return 0; - } - - return 1; -} - -static void -emit_declaration( - struct x86_function *func, - struct tgsi_full_declaration *decl ) -{ - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - unsigned i, j; - - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - emit_coef_a0( func, 0, i, j ); - emit_inputs( func, 0, i, j ); - break; - - case TGSI_INTERPOLATE_LINEAR: - emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); - emit_coef_dadx( func, 1, i, j ); - emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); - emit_coef_dady( func, 3, i, j ); - emit_mul( func, 0, 1 ); /* x * dadx */ - emit_coef_a0( func, 4, i, j ); - emit_mul( func, 2, 3 ); /* y * dady */ - emit_add( func, 0, 4 ); /* x * dadx + a0 */ - emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ - emit_inputs( func, 0, i, j ); - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); - emit_coef_dadx( func, 1, i, j ); - emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); - emit_coef_dady( func, 3, i, j ); - emit_mul( func, 0, 1 ); /* x * dadx */ - emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); - emit_coef_a0( func, 5, i, j ); - emit_rcp( func, 4, 4 ); /* 1.0 / w */ - emit_mul( func, 2, 3 ); /* y * dady */ - emit_add( func, 0, 5 ); /* x * dadx + a0 */ - emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ - emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ - emit_inputs( func, 0, i, j ); - break; - - default: - assert( 0 ); - break; - } - } - } - } - } -} - -static void aos_to_soa( struct x86_function *func, - uint arg_aos, - uint arg_soa, - uint arg_num, - uint arg_stride ) -{ - struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); - struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); - struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); - struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); - int inner_loop; - - - /* Save EBX */ - x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); - - x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); - x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); - x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); - x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); - - /* do */ - inner_loop = x86_get_label( func ); - { - x86_push( func, aos_input ); - sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); - sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); - sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); - sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); - sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_pop( func, aos_input ); - - sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); - sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); - sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); - sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); - sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); - sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); - - sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); - sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); - sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); - sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); - - /* Advance to next input */ - x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); - x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); - } - /* while --num_inputs */ - x86_dec( func, num_inputs ); - x86_jcc( func, cc_NE, inner_loop ); - - /* Restore EBX */ - x86_pop( func, aos_input ); -} - -static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) -{ - struct x86_reg soa_output; - struct x86_reg aos_output; - struct x86_reg num_outputs; - struct x86_reg temp; - int inner_loop; - - soa_output = x86_make_reg( file_REG32, reg_AX ); - aos_output = x86_make_reg( file_REG32, reg_BX ); - num_outputs = x86_make_reg( file_REG32, reg_CX ); - temp = x86_make_reg( file_REG32, reg_DX ); - - /* Save EBX */ - x86_push( func, aos_output ); - - x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); - x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); - x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); - - /* do */ - inner_loop = x86_get_label( func ); - { - sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); - sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); - sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); - sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); - - sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); - sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); - sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); - sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); - sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); - sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); - - x86_mov( func, temp, x86_fn_arg( func, stride ) ); - x86_push( func, aos_output ); - sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); - sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); - x86_add( func, aos_output, temp ); - sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); - sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); - x86_add( func, aos_output, temp ); - sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); - sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); - x86_add( func, aos_output, temp ); - sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); - sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); - x86_pop( func, aos_output ); - - /* Advance to next output */ - x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); - x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); - } - /* while --num_outputs */ - x86_dec( func, num_outputs ); - x86_jcc( func, cc_NE, inner_loop ); - - /* Restore EBX */ - x86_pop( func, aos_output ); -} - -/** - * Translate a TGSI vertex/fragment shader to SSE2 code. - * Slightly different things are done for vertex vs. fragment shaders. - * - * Note that fragment shaders are responsible for interpolating shader - * inputs. Because on x86 we have only 4 GP registers, and here we - * have 5 shader arguments (input, output, const, temp and coef), the - * code is split into two phases -- DECLARATION and INSTRUCTION phase. - * GP register holding the output argument is aliased with the coeff - * argument, as outputs are not needed in the DECLARATION phase. - * - * \param tokens the TGSI input shader - * \param func the output SSE code/function - * \param immediates buffer to place immediates, later passed to SSE func - * \param return 1 for success, 0 if translation failed - */ -unsigned -tgsi_emit_sse2( - const struct tgsi_token *tokens, - struct x86_function *func, - float (*immediates)[4], - boolean do_swizzles ) -{ - struct tgsi_parse_context parse; - boolean instruction_phase = FALSE; - unsigned ok = 1; - uint num_immediates = 0; - - func->csr = func->store; - - tgsi_parse_init( &parse, tokens ); - - /* Can't just use EDI, EBX without save/restoring them: - */ - x86_push( - func, - get_immediate_base() ); - - x86_push( - func, - get_temp_base() ); - - - /* - * Different function args for vertex/fragment shaders: - */ - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - /* DECLARATION phase, do not load output argument. */ - x86_mov( - func, - get_input_base(), - x86_fn_arg( func, 1 ) ); - /* skipping outputs argument here */ - x86_mov( - func, - get_const_base(), - x86_fn_arg( func, 3 ) ); - x86_mov( - func, - get_temp_base(), - x86_fn_arg( func, 4 ) ); - x86_mov( - func, - get_coef_base(), - x86_fn_arg( func, 5 ) ); - x86_mov( - func, - get_immediate_base(), - x86_fn_arg( func, 6 ) ); - } - else { - assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); - - if (do_swizzles) - aos_to_soa( func, - 6, /* aos_input */ - 1, /* machine->input */ - 7, /* num_inputs */ - 8 ); /* input_stride */ - - x86_mov( - func, - get_input_base(), - x86_fn_arg( func, 1 ) ); - x86_mov( - func, - get_output_base(), - x86_fn_arg( func, 2 ) ); - x86_mov( - func, - get_const_base(), - x86_fn_arg( func, 3 ) ); - x86_mov( - func, - get_temp_base(), - x86_fn_arg( func, 4 ) ); - x86_mov( - func, - get_immediate_base(), - x86_fn_arg( func, 5 ) ); - } - - while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - emit_declaration( - func, - &parse.FullToken.FullDeclaration ); - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - if( !instruction_phase ) { - /* INSTRUCTION phase, overwrite coeff with output. */ - instruction_phase = TRUE; - x86_mov( - func, - get_output_base(), - x86_fn_arg( func, 2 ) ); - } - } - - ok = emit_instruction( - func, - &parse.FullToken.FullInstruction ); - - if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", - parse.FullToken.FullInstruction.Instruction.Opcode, - parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? - "vertex shader" : "fragment shader"); - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* simply copy the immediate values into the next immediates[] slot */ - { - const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; - uint i; - assert(size <= 4); - assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); - for( i = 0; i < size; i++ ) { - immediates[num_immediates][i] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - } -#if 0 - debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", - num_immediates, - immediates[num_immediates][0], - immediates[num_immediates][1], - immediates[num_immediates][2], - immediates[num_immediates][3]); -#endif - num_immediates++; - } - break; - - default: - ok = 0; - assert( 0 ); - } - } - - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { - if (do_swizzles) - soa_to_aos( func, 9, 2, 10, 11 ); - } - - /* Can't just use EBX, EDI without save/restoring them: - */ - x86_pop( - func, - get_temp_base() ); - - x86_pop( - func, - get_immediate_base() ); - - emit_ret( func ); - - tgsi_parse_free( &parse ); - - return ok; -} - -#endif /* PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h deleted file mode 100755 index af838b2a25b..00000000000 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ /dev/null @@ -1,49 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_SSE2_H -#define TGSI_SSE2_H - -#if defined __cplusplus -extern "C" { -#endif - -struct tgsi_token; -struct x86_function; - -unsigned -tgsi_emit_sse2( - const struct tgsi_token *tokens, - struct x86_function *function, - float (*immediates)[4], - boolean do_swizzles ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_SSE2_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c new file mode 100644 index 00000000000..742ef14c352 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -0,0 +1,1324 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_build.h" +#include "tgsi_parse.h" + +/* + * version + */ + +struct tgsi_version +tgsi_build_version( void ) +{ + struct tgsi_version version; + + version.MajorVersion = 1; + version.MinorVersion = 1; + version.Padding = 0; + + return version; +} + +/* + * header + */ + +struct tgsi_header +tgsi_build_header( void ) +{ + struct tgsi_header header; + + header.HeaderSize = 1; + header.BodySize = 0; + + return header; +} + +static void +header_headersize_grow( struct tgsi_header *header ) +{ + assert( header->HeaderSize < 0xFF ); + assert( header->BodySize == 0 ); + + header->HeaderSize++; +} + +static void +header_bodysize_grow( struct tgsi_header *header ) +{ + assert( header->BodySize < 0xFFFFFF ); + + header->BodySize++; +} + +struct tgsi_processor +tgsi_default_processor( void ) +{ + struct tgsi_processor processor; + + processor.Processor = TGSI_PROCESSOR_FRAGMENT; + processor.Padding = 0; + + return processor; +} + +struct tgsi_processor +tgsi_build_processor( + unsigned type, + struct tgsi_header *header ) +{ + struct tgsi_processor processor; + + processor = tgsi_default_processor(); + processor.Processor = type; + + header_headersize_grow( header ); + + return processor; +} + +/* + * declaration + */ + +struct tgsi_declaration +tgsi_default_declaration( void ) +{ + struct tgsi_declaration declaration; + + declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; + declaration.Size = 1; + declaration.File = TGSI_FILE_NULL; + declaration.UsageMask = TGSI_WRITEMASK_XYZW; + declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; + declaration.Semantic = 0; + declaration.Padding = 0; + declaration.Extended = 0; + + return declaration; +} + +struct tgsi_declaration +tgsi_build_declaration( + unsigned file, + unsigned usage_mask, + unsigned interpolate, + unsigned semantic, + struct tgsi_header *header ) +{ + struct tgsi_declaration declaration; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE ); + + declaration = tgsi_default_declaration(); + declaration.File = file; + declaration.UsageMask = usage_mask; + declaration.Interpolate = interpolate; + declaration.Semantic = semantic; + + header_bodysize_grow( header ); + + return declaration; +} + +static void +declaration_grow( + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + assert( declaration->Size < 0xFF ); + + declaration->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_full_declaration +tgsi_default_full_declaration( void ) +{ + struct tgsi_full_declaration full_declaration; + + full_declaration.Declaration = tgsi_default_declaration(); + full_declaration.DeclarationRange = tgsi_default_declaration_range(); + full_declaration.Semantic = tgsi_default_declaration_semantic(); + + return full_declaration; +} + +unsigned +tgsi_build_full_declaration( + const struct tgsi_full_declaration *full_decl, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0; + struct tgsi_declaration *declaration; + struct tgsi_declaration_range *dr; + + if( maxsize <= size ) + return 0; + declaration = (struct tgsi_declaration *) &tokens[size]; + size++; + + *declaration = tgsi_build_declaration( + full_decl->Declaration.File, + full_decl->Declaration.UsageMask, + full_decl->Declaration.Interpolate, + full_decl->Declaration.Semantic, + header ); + + if (maxsize <= size) + return 0; + dr = (struct tgsi_declaration_range *) &tokens[size]; + size++; + + *dr = tgsi_build_declaration_range( + full_decl->DeclarationRange.First, + full_decl->DeclarationRange.Last, + declaration, + header ); + + if( full_decl->Declaration.Semantic ) { + struct tgsi_declaration_semantic *ds; + + if( maxsize <= size ) + return 0; + ds = (struct tgsi_declaration_semantic *) &tokens[size]; + size++; + + *ds = tgsi_build_declaration_semantic( + full_decl->Semantic.SemanticName, + full_decl->Semantic.SemanticIndex, + declaration, + header ); + } + + return size; +} + +struct tgsi_declaration_range +tgsi_default_declaration_range( void ) +{ + struct tgsi_declaration_range dr; + + dr.First = 0; + dr.Last = 0; + + return dr; +} + +struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_range declaration_range; + + assert( last >= first ); + assert( last <= 0xFFFF ); + + declaration_range = tgsi_default_declaration_range(); + declaration_range.First = first; + declaration_range.Last = last; + + declaration_grow( declaration, header ); + + return declaration_range; +} + +struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ) +{ + struct tgsi_declaration_semantic ds; + + ds.SemanticName = TGSI_SEMANTIC_POSITION; + ds.SemanticIndex = 0; + ds.Padding = 0; + + return ds; +} + +struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_semantic ds; + + assert( semantic_name <= TGSI_SEMANTIC_COUNT ); + assert( semantic_index <= 0xFFFF ); + + ds = tgsi_default_declaration_semantic(); + ds.SemanticName = semantic_name; + ds.SemanticIndex = semantic_index; + + declaration_grow( declaration, header ); + + return ds; +} + +/* + * immediate + */ + +struct tgsi_immediate +tgsi_default_immediate( void ) +{ + struct tgsi_immediate immediate; + + immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE; + immediate.Size = 1; + immediate.DataType = TGSI_IMM_FLOAT32; + immediate.Padding = 0; + immediate.Extended = 0; + + return immediate; +} + +struct tgsi_immediate +tgsi_build_immediate( + struct tgsi_header *header ) +{ + struct tgsi_immediate immediate; + + immediate = tgsi_default_immediate(); + + header_bodysize_grow( header ); + + return immediate; +} + +struct tgsi_full_immediate +tgsi_default_full_immediate( void ) +{ + struct tgsi_full_immediate fullimm; + + fullimm.Immediate = tgsi_default_immediate(); + fullimm.u.Pointer = (void *) 0; + + return fullimm; +} + +static void +immediate_grow( + struct tgsi_immediate *immediate, + struct tgsi_header *header ) +{ + assert( immediate->Size < 0xFF ); + + immediate->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_immediate_float32 +tgsi_build_immediate_float32( + float value, + struct tgsi_immediate *immediate, + struct tgsi_header *header ) +{ + struct tgsi_immediate_float32 immediate_float32; + + immediate_float32.Float = value; + + immediate_grow( immediate, header ); + + return immediate_float32; +} + +unsigned +tgsi_build_full_immediate( + const struct tgsi_full_immediate *full_imm, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0, i; + struct tgsi_immediate *immediate; + + if( maxsize <= size ) + return 0; + immediate = (struct tgsi_immediate *) &tokens[size]; + size++; + + *immediate = tgsi_build_immediate( header ); + + for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) { + struct tgsi_immediate_float32 *if32; + + if( maxsize <= size ) + return 0; + if32 = (struct tgsi_immediate_float32 *) &tokens[size]; + size++; + + *if32 = tgsi_build_immediate_float32( + full_imm->u.ImmediateFloat32[i].Float, + immediate, + header ); + } + + return size; +} + +/* + * instruction + */ + +struct tgsi_instruction +tgsi_default_instruction( void ) +{ + struct tgsi_instruction instruction; + + instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; + instruction.Size = 1; + instruction.Opcode = TGSI_OPCODE_MOV; + instruction.Saturate = TGSI_SAT_NONE; + instruction.NumDstRegs = 1; + instruction.NumSrcRegs = 1; + instruction.Padding = 0; + instruction.Extended = 0; + + return instruction; +} + +struct tgsi_instruction +tgsi_build_instruction( + unsigned opcode, + unsigned saturate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header ) +{ + struct tgsi_instruction instruction; + + assert (opcode <= TGSI_OPCODE_LAST); + assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE); + assert (num_dst_regs <= 3); + assert (num_src_regs <= 15); + + instruction = tgsi_default_instruction(); + instruction.Opcode = opcode; + instruction.Saturate = saturate; + instruction.NumDstRegs = num_dst_regs; + instruction.NumSrcRegs = num_src_regs; + + header_bodysize_grow( header ); + + return instruction; +} + +static void +instruction_grow( + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + assert (instruction->Size < 0xFF); + + instruction->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_full_instruction +tgsi_default_full_instruction( void ) +{ + struct tgsi_full_instruction full_instruction; + unsigned i; + + full_instruction.Instruction = tgsi_default_instruction(); + full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv(); + full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); + full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); + for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { + full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); + } + for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { + full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); + } + + return full_instruction; +} + +unsigned +tgsi_build_full_instruction( + const struct tgsi_full_instruction *full_inst, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0; + unsigned i; + struct tgsi_instruction *instruction; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + instruction = (struct tgsi_instruction *) &tokens[size]; + size++; + + *instruction = tgsi_build_instruction( + full_inst->Instruction.Opcode, + full_inst->Instruction.Saturate, + full_inst->Instruction.NumDstRegs, + full_inst->Instruction.NumSrcRegs, + header ); + prev_token = (struct tgsi_token *) instruction; + + if( tgsi_compare_instruction_ext_nv( + full_inst->InstructionExtNv, + tgsi_default_instruction_ext_nv() ) ) { + struct tgsi_instruction_ext_nv *instruction_ext_nv; + + if( maxsize <= size ) + return 0; + instruction_ext_nv = + (struct tgsi_instruction_ext_nv *) &tokens[size]; + size++; + + *instruction_ext_nv = tgsi_build_instruction_ext_nv( + full_inst->InstructionExtNv.Precision, + full_inst->InstructionExtNv.CondDstIndex, + full_inst->InstructionExtNv.CondFlowIndex, + full_inst->InstructionExtNv.CondMask, + full_inst->InstructionExtNv.CondSwizzleX, + full_inst->InstructionExtNv.CondSwizzleY, + full_inst->InstructionExtNv.CondSwizzleZ, + full_inst->InstructionExtNv.CondSwizzleW, + full_inst->InstructionExtNv.CondDstUpdate, + full_inst->InstructionExtNv.CondFlowEnable, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_nv; + } + + if( tgsi_compare_instruction_ext_label( + full_inst->InstructionExtLabel, + tgsi_default_instruction_ext_label() ) ) { + struct tgsi_instruction_ext_label *instruction_ext_label; + + if( maxsize <= size ) + return 0; + instruction_ext_label = + (struct tgsi_instruction_ext_label *) &tokens[size]; + size++; + + *instruction_ext_label = tgsi_build_instruction_ext_label( + full_inst->InstructionExtLabel.Label, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_label; + } + + if( tgsi_compare_instruction_ext_texture( + full_inst->InstructionExtTexture, + tgsi_default_instruction_ext_texture() ) ) { + struct tgsi_instruction_ext_texture *instruction_ext_texture; + + if( maxsize <= size ) + return 0; + instruction_ext_texture = + (struct tgsi_instruction_ext_texture *) &tokens[size]; + size++; + + *instruction_ext_texture = tgsi_build_instruction_ext_texture( + full_inst->InstructionExtTexture.Texture, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_texture; + } + + for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { + const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; + struct tgsi_dst_register *dst_register; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + dst_register = (struct tgsi_dst_register *) &tokens[size]; + size++; + + *dst_register = tgsi_build_dst_register( + reg->DstRegister.File, + reg->DstRegister.WriteMask, + reg->DstRegister.Index, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register; + + if( tgsi_compare_dst_register_ext_concode( + reg->DstRegisterExtConcode, + tgsi_default_dst_register_ext_concode() ) ) { + struct tgsi_dst_register_ext_concode *dst_register_ext_concode; + + if( maxsize <= size ) + return 0; + dst_register_ext_concode = + (struct tgsi_dst_register_ext_concode *) &tokens[size]; + size++; + + *dst_register_ext_concode = tgsi_build_dst_register_ext_concode( + reg->DstRegisterExtConcode.CondMask, + reg->DstRegisterExtConcode.CondSwizzleX, + reg->DstRegisterExtConcode.CondSwizzleY, + reg->DstRegisterExtConcode.CondSwizzleZ, + reg->DstRegisterExtConcode.CondSwizzleW, + reg->DstRegisterExtConcode.CondSrcIndex, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register_ext_concode; + } + + if( tgsi_compare_dst_register_ext_modulate( + reg->DstRegisterExtModulate, + tgsi_default_dst_register_ext_modulate() ) ) { + struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; + + if( maxsize <= size ) + return 0; + dst_register_ext_modulate = + (struct tgsi_dst_register_ext_modulate *) &tokens[size]; + size++; + + *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( + reg->DstRegisterExtModulate.Modulate, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register_ext_modulate; + } + } + + for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { + const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; + struct tgsi_src_register *src_register; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + src_register = (struct tgsi_src_register *) &tokens[size]; + size++; + + *src_register = tgsi_build_src_register( + reg->SrcRegister.File, + reg->SrcRegister.SwizzleX, + reg->SrcRegister.SwizzleY, + reg->SrcRegister.SwizzleZ, + reg->SrcRegister.SwizzleW, + reg->SrcRegister.Negate, + reg->SrcRegister.Indirect, + reg->SrcRegister.Dimension, + reg->SrcRegister.Index, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register; + + if( tgsi_compare_src_register_ext_swz( + reg->SrcRegisterExtSwz, + tgsi_default_src_register_ext_swz() ) ) { + struct tgsi_src_register_ext_swz *src_register_ext_swz; + + /* Use of the extended swizzle requires the simple swizzle to be identity. + */ + assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X ); + assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y ); + assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z ); + assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W ); + assert( reg->SrcRegister.Negate == FALSE ); + + if( maxsize <= size ) + return 0; + src_register_ext_swz = + (struct tgsi_src_register_ext_swz *) &tokens[size]; + size++; + + *src_register_ext_swz = tgsi_build_src_register_ext_swz( + reg->SrcRegisterExtSwz.ExtSwizzleX, + reg->SrcRegisterExtSwz.ExtSwizzleY, + reg->SrcRegisterExtSwz.ExtSwizzleZ, + reg->SrcRegisterExtSwz.ExtSwizzleW, + reg->SrcRegisterExtSwz.NegateX, + reg->SrcRegisterExtSwz.NegateY, + reg->SrcRegisterExtSwz.NegateZ, + reg->SrcRegisterExtSwz.NegateW, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register_ext_swz; + } + + if( tgsi_compare_src_register_ext_mod( + reg->SrcRegisterExtMod, + tgsi_default_src_register_ext_mod() ) ) { + struct tgsi_src_register_ext_mod *src_register_ext_mod; + + if( maxsize <= size ) + return 0; + src_register_ext_mod = + (struct tgsi_src_register_ext_mod *) &tokens[size]; + size++; + + *src_register_ext_mod = tgsi_build_src_register_ext_mod( + reg->SrcRegisterExtMod.Complement, + reg->SrcRegisterExtMod.Bias, + reg->SrcRegisterExtMod.Scale2X, + reg->SrcRegisterExtMod.Absolute, + reg->SrcRegisterExtMod.Negate, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register_ext_mod; + } + + if( reg->SrcRegister.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->SrcRegisterInd.File, + reg->SrcRegisterInd.SwizzleX, + reg->SrcRegisterInd.SwizzleY, + reg->SrcRegisterInd.SwizzleZ, + reg->SrcRegisterInd.SwizzleW, + reg->SrcRegisterInd.Negate, + reg->SrcRegisterInd.Indirect, + reg->SrcRegisterInd.Dimension, + reg->SrcRegisterInd.Index, + instruction, + header ); + } + + if( reg->SrcRegister.Dimension ) { + struct tgsi_dimension *dim; + + assert( !reg->SrcRegisterDim.Dimension ); + + if( maxsize <= size ) + return 0; + dim = (struct tgsi_dimension *) &tokens[size]; + size++; + + *dim = tgsi_build_dimension( + reg->SrcRegisterDim.Indirect, + reg->SrcRegisterDim.Index, + instruction, + header ); + + if( reg->SrcRegisterDim.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->SrcRegisterDimInd.File, + reg->SrcRegisterDimInd.SwizzleX, + reg->SrcRegisterDimInd.SwizzleY, + reg->SrcRegisterDimInd.SwizzleZ, + reg->SrcRegisterDimInd.SwizzleW, + reg->SrcRegisterDimInd.Negate, + reg->SrcRegisterDimInd.Indirect, + reg->SrcRegisterDimInd.Dimension, + reg->SrcRegisterDimInd.Index, + instruction, + header ); + } + } + } + + return size; +} + +struct tgsi_instruction_ext_nv +tgsi_default_instruction_ext_nv( void ) +{ + struct tgsi_instruction_ext_nv instruction_ext_nv; + + instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV; + instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT; + instruction_ext_nv.CondDstIndex = 0; + instruction_ext_nv.CondFlowIndex = 0; + instruction_ext_nv.CondMask = TGSI_CC_TR; + instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X; + instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y; + instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z; + instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W; + instruction_ext_nv.CondDstUpdate = 0; + instruction_ext_nv.CondFlowEnable = 0; + instruction_ext_nv.Padding = 0; + instruction_ext_nv.Extended = 0; + + return instruction_ext_nv; +} + +union token_u32 +{ + unsigned u32; +}; + +unsigned +tgsi_compare_instruction_ext_nv( + struct tgsi_instruction_ext_nv a, + struct tgsi_instruction_ext_nv b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_nv +tgsi_build_instruction_ext_nv( + unsigned precision, + unsigned cond_dst_index, + unsigned cond_flow_index, + unsigned cond_mask, + unsigned cond_swizzle_x, + unsigned cond_swizzle_y, + unsigned cond_swizzle_z, + unsigned cond_swizzle_w, + unsigned cond_dst_update, + unsigned cond_flow_update, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_nv instruction_ext_nv; + + instruction_ext_nv = tgsi_default_instruction_ext_nv(); + instruction_ext_nv.Precision = precision; + instruction_ext_nv.CondDstIndex = cond_dst_index; + instruction_ext_nv.CondFlowIndex = cond_flow_index; + instruction_ext_nv.CondMask = cond_mask; + instruction_ext_nv.CondSwizzleX = cond_swizzle_x; + instruction_ext_nv.CondSwizzleY = cond_swizzle_y; + instruction_ext_nv.CondSwizzleZ = cond_swizzle_z; + instruction_ext_nv.CondSwizzleW = cond_swizzle_w; + instruction_ext_nv.CondDstUpdate = cond_dst_update; + instruction_ext_nv.CondFlowEnable = cond_flow_update; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_nv; +} + +struct tgsi_instruction_ext_label +tgsi_default_instruction_ext_label( void ) +{ + struct tgsi_instruction_ext_label instruction_ext_label; + + instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; + instruction_ext_label.Label = 0; + instruction_ext_label.Padding = 0; + instruction_ext_label.Extended = 0; + + return instruction_ext_label; +} + +unsigned +tgsi_compare_instruction_ext_label( + struct tgsi_instruction_ext_label a, + struct tgsi_instruction_ext_label b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_label +tgsi_build_instruction_ext_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_label instruction_ext_label; + + instruction_ext_label = tgsi_default_instruction_ext_label(); + instruction_ext_label.Label = label; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_label; +} + +struct tgsi_instruction_ext_texture +tgsi_default_instruction_ext_texture( void ) +{ + struct tgsi_instruction_ext_texture instruction_ext_texture; + + instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; + instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; + instruction_ext_texture.Padding = 0; + instruction_ext_texture.Extended = 0; + + return instruction_ext_texture; +} + +unsigned +tgsi_compare_instruction_ext_texture( + struct tgsi_instruction_ext_texture a, + struct tgsi_instruction_ext_texture b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_texture +tgsi_build_instruction_ext_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_texture instruction_ext_texture; + + instruction_ext_texture = tgsi_default_instruction_ext_texture(); + instruction_ext_texture.Texture = texture; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_texture; +} + +struct tgsi_src_register +tgsi_default_src_register( void ) +{ + struct tgsi_src_register src_register; + + src_register.File = TGSI_FILE_NULL; + src_register.SwizzleX = TGSI_SWIZZLE_X; + src_register.SwizzleY = TGSI_SWIZZLE_Y; + src_register.SwizzleZ = TGSI_SWIZZLE_Z; + src_register.SwizzleW = TGSI_SWIZZLE_W; + src_register.Negate = 0; + src_register.Indirect = 0; + src_register.Dimension = 0; + src_register.Index = 0; + src_register.Extended = 0; + + return src_register; +} + +struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register src_register; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( negate <= 1 ); + assert( index >= -0x8000 && index <= 0x7FFF ); + + src_register = tgsi_default_src_register(); + src_register.File = file; + src_register.SwizzleX = swizzle_x; + src_register.SwizzleY = swizzle_y; + src_register.SwizzleZ = swizzle_z; + src_register.SwizzleW = swizzle_w; + src_register.Negate = negate; + src_register.Indirect = indirect; + src_register.Dimension = dimension; + src_register.Index = index; + + instruction_grow( instruction, header ); + + return src_register; +} + +struct tgsi_full_src_register +tgsi_default_full_src_register( void ) +{ + struct tgsi_full_src_register full_src_register; + + full_src_register.SrcRegister = tgsi_default_src_register(); + full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz(); + full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); + full_src_register.SrcRegisterInd = tgsi_default_src_register(); + full_src_register.SrcRegisterDim = tgsi_default_dimension(); + full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); + + return full_src_register; +} + +struct tgsi_src_register_ext_swz +tgsi_default_src_register_ext_swz( void ) +{ + struct tgsi_src_register_ext_swz src_register_ext_swz; + + src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; + src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X; + src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y; + src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z; + src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W; + src_register_ext_swz.NegateX = 0; + src_register_ext_swz.NegateY = 0; + src_register_ext_swz.NegateZ = 0; + src_register_ext_swz.NegateW = 0; + src_register_ext_swz.Padding = 0; + src_register_ext_swz.Extended = 0; + + return src_register_ext_swz; +} + +unsigned +tgsi_compare_src_register_ext_swz( + struct tgsi_src_register_ext_swz a, + struct tgsi_src_register_ext_swz b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_src_register_ext_swz +tgsi_build_src_register_ext_swz( + unsigned ext_swizzle_x, + unsigned ext_swizzle_y, + unsigned ext_swizzle_z, + unsigned ext_swizzle_w, + unsigned negate_x, + unsigned negate_y, + unsigned negate_z, + unsigned negate_w, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register_ext_swz src_register_ext_swz; + + assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE ); + assert( negate_x <= 1 ); + assert( negate_y <= 1 ); + assert( negate_z <= 1 ); + assert( negate_w <= 1 ); + + src_register_ext_swz = tgsi_default_src_register_ext_swz(); + src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; + src_register_ext_swz.ExtSwizzleY = ext_swizzle_y; + src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z; + src_register_ext_swz.ExtSwizzleW = ext_swizzle_w; + src_register_ext_swz.NegateX = negate_x; + src_register_ext_swz.NegateY = negate_y; + src_register_ext_swz.NegateZ = negate_z; + src_register_ext_swz.NegateW = negate_w; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return src_register_ext_swz; +} + +struct tgsi_src_register_ext_mod +tgsi_default_src_register_ext_mod( void ) +{ + struct tgsi_src_register_ext_mod src_register_ext_mod; + + src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; + src_register_ext_mod.Complement = 0; + src_register_ext_mod.Bias = 0; + src_register_ext_mod.Scale2X = 0; + src_register_ext_mod.Absolute = 0; + src_register_ext_mod.Negate = 0; + src_register_ext_mod.Padding = 0; + src_register_ext_mod.Extended = 0; + + return src_register_ext_mod; +} + +unsigned +tgsi_compare_src_register_ext_mod( + struct tgsi_src_register_ext_mod a, + struct tgsi_src_register_ext_mod b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_src_register_ext_mod +tgsi_build_src_register_ext_mod( + unsigned complement, + unsigned bias, + unsigned scale_2x, + unsigned absolute, + unsigned negate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register_ext_mod src_register_ext_mod; + + assert( complement <= 1 ); + assert( bias <= 1 ); + assert( scale_2x <= 1 ); + assert( absolute <= 1 ); + assert( negate <= 1 ); + + src_register_ext_mod = tgsi_default_src_register_ext_mod(); + src_register_ext_mod.Complement = complement; + src_register_ext_mod.Bias = bias; + src_register_ext_mod.Scale2X = scale_2x; + src_register_ext_mod.Absolute = absolute; + src_register_ext_mod.Negate = negate; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return src_register_ext_mod; +} + +struct tgsi_dimension +tgsi_default_dimension( void ) +{ + struct tgsi_dimension dimension; + + dimension.Indirect = 0; + dimension.Dimension = 0; + dimension.Padding = 0; + dimension.Index = 0; + dimension.Extended = 0; + + return dimension; +} + +struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dimension dimension; + + dimension = tgsi_default_dimension(); + dimension.Indirect = indirect; + dimension.Index = index; + + instruction_grow( instruction, header ); + + return dimension; +} + +struct tgsi_dst_register +tgsi_default_dst_register( void ) +{ + struct tgsi_dst_register dst_register; + + dst_register.File = TGSI_FILE_NULL; + dst_register.WriteMask = TGSI_WRITEMASK_XYZW; + dst_register.Indirect = 0; + dst_register.Dimension = 0; + dst_register.Index = 0; + dst_register.Padding = 0; + dst_register.Extended = 0; + + return dst_register; +} + +struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register dst_register; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( mask <= TGSI_WRITEMASK_XYZW ); + assert( index >= -32768 && index <= 32767 ); + + dst_register = tgsi_default_dst_register(); + dst_register.File = file; + dst_register.WriteMask = mask; + dst_register.Index = index; + + instruction_grow( instruction, header ); + + return dst_register; +} + +struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ) +{ + struct tgsi_full_dst_register full_dst_register; + + full_dst_register.DstRegister = tgsi_default_dst_register(); + full_dst_register.DstRegisterExtConcode = + tgsi_default_dst_register_ext_concode(); + full_dst_register.DstRegisterExtModulate = + tgsi_default_dst_register_ext_modulate(); + + return full_dst_register; +} + +struct tgsi_dst_register_ext_concode +tgsi_default_dst_register_ext_concode( void ) +{ + struct tgsi_dst_register_ext_concode dst_register_ext_concode; + + dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE; + dst_register_ext_concode.CondMask = TGSI_CC_TR; + dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X; + dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y; + dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z; + dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W; + dst_register_ext_concode.CondSrcIndex = 0; + dst_register_ext_concode.Padding = 0; + dst_register_ext_concode.Extended = 0; + + return dst_register_ext_concode; +} + +unsigned +tgsi_compare_dst_register_ext_concode( + struct tgsi_dst_register_ext_concode a, + struct tgsi_dst_register_ext_concode b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_dst_register_ext_concode +tgsi_build_dst_register_ext_concode( + unsigned cc, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + int index, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register_ext_concode dst_register_ext_concode; + + assert( cc <= TGSI_CC_FL ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( index >= -32768 && index <= 32767 ); + + dst_register_ext_concode = tgsi_default_dst_register_ext_concode(); + dst_register_ext_concode.CondMask = cc; + dst_register_ext_concode.CondSwizzleX = swizzle_x; + dst_register_ext_concode.CondSwizzleY = swizzle_y; + dst_register_ext_concode.CondSwizzleZ = swizzle_z; + dst_register_ext_concode.CondSwizzleW = swizzle_w; + dst_register_ext_concode.CondSrcIndex = index; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return dst_register_ext_concode; +} + +struct tgsi_dst_register_ext_modulate +tgsi_default_dst_register_ext_modulate( void ) +{ + struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + + dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; + dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; + dst_register_ext_modulate.Padding = 0; + dst_register_ext_modulate.Extended = 0; + + return dst_register_ext_modulate; +} + +unsigned +tgsi_compare_dst_register_ext_modulate( + struct tgsi_dst_register_ext_modulate a, + struct tgsi_dst_register_ext_modulate b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_dst_register_ext_modulate +tgsi_build_dst_register_ext_modulate( + unsigned modulate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + + assert( modulate <= TGSI_MODULATE_EIGHTH ); + + dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); + dst_register_ext_modulate.Modulate = modulate; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return dst_register_ext_modulate; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h new file mode 100644 index 00000000000..ed258302487 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -0,0 +1,332 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_BUILD_H +#define TGSI_BUILD_H + +#if defined __cplusplus +extern "C" { +#endif + +/* + * version + */ + +struct tgsi_version +tgsi_build_version( void ); + +/* + * header + */ + +struct tgsi_header +tgsi_build_header( void ); + +struct tgsi_processor +tgsi_default_processor( void ); + +struct tgsi_processor +tgsi_build_processor( + unsigned processor, + struct tgsi_header *header ); + +/* + * declaration + */ + +struct tgsi_declaration +tgsi_default_declaration( void ); + +struct tgsi_declaration +tgsi_build_declaration( + unsigned file, + unsigned usage_mask, + unsigned interpolate, + unsigned semantic, + struct tgsi_header *header ); + +struct tgsi_full_declaration +tgsi_default_full_declaration( void ); + +unsigned +tgsi_build_full_declaration( + const struct tgsi_full_declaration *full_decl, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +struct tgsi_declaration_range +tgsi_default_declaration_range( void ); + +struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ); + +struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +/* + * immediate + */ + +struct tgsi_immediate +tgsi_default_immediate( void ); + +struct tgsi_immediate +tgsi_build_immediate( + struct tgsi_header *header ); + +struct tgsi_full_immediate +tgsi_default_full_immediate( void ); + +struct tgsi_immediate_float32 +tgsi_build_immediate_float32( + float value, + struct tgsi_immediate *immediate, + struct tgsi_header *header ); + +unsigned +tgsi_build_full_immediate( + const struct tgsi_full_immediate *full_imm, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +/* + * instruction + */ + +struct tgsi_instruction +tgsi_default_instruction( void ); + +struct tgsi_instruction +tgsi_build_instruction( + unsigned opcode, + unsigned saturate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header ); + +struct tgsi_full_instruction +tgsi_default_full_instruction( void ); + +unsigned +tgsi_build_full_instruction( + const struct tgsi_full_instruction *full_inst, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +struct tgsi_instruction_ext_nv +tgsi_default_instruction_ext_nv( void ); + +unsigned +tgsi_compare_instruction_ext_nv( + struct tgsi_instruction_ext_nv a, + struct tgsi_instruction_ext_nv b ); + +struct tgsi_instruction_ext_nv +tgsi_build_instruction_ext_nv( + unsigned precision, + unsigned cond_dst_index, + unsigned cond_flow_index, + unsigned cond_mask, + unsigned cond_swizzle_x, + unsigned cond_swizzle_y, + unsigned cond_swizzle_z, + unsigned cond_swizzle_w, + unsigned cond_dst_update, + unsigned cond_flow_update, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_instruction_ext_label +tgsi_default_instruction_ext_label( void ); + +unsigned +tgsi_compare_instruction_ext_label( + struct tgsi_instruction_ext_label a, + struct tgsi_instruction_ext_label b ); + +struct tgsi_instruction_ext_label +tgsi_build_instruction_ext_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_instruction_ext_texture +tgsi_default_instruction_ext_texture( void ); + +unsigned +tgsi_compare_instruction_ext_texture( + struct tgsi_instruction_ext_texture a, + struct tgsi_instruction_ext_texture b ); + +struct tgsi_instruction_ext_texture +tgsi_build_instruction_ext_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_src_register +tgsi_default_src_register( void ); + +struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_full_src_register +tgsi_default_full_src_register( void ); + +struct tgsi_src_register_ext_swz +tgsi_default_src_register_ext_swz( void ); + +unsigned +tgsi_compare_src_register_ext_swz( + struct tgsi_src_register_ext_swz a, + struct tgsi_src_register_ext_swz b ); + +struct tgsi_src_register_ext_swz +tgsi_build_src_register_ext_swz( + unsigned ext_swizzle_x, + unsigned ext_swizzle_y, + unsigned ext_swizzle_z, + unsigned ext_swizzle_w, + unsigned negate_x, + unsigned negate_y, + unsigned negate_z, + unsigned negate_w, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_src_register_ext_mod +tgsi_default_src_register_ext_mod( void ); + +unsigned +tgsi_compare_src_register_ext_mod( + struct tgsi_src_register_ext_mod a, + struct tgsi_src_register_ext_mod b ); + +struct tgsi_src_register_ext_mod +tgsi_build_src_register_ext_mod( + unsigned complement, + unsigned bias, + unsigned scale_2x, + unsigned absolute, + unsigned negate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dimension +tgsi_default_dimension( void ); + +struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dst_register +tgsi_default_dst_register( void ); + +struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ); + +struct tgsi_dst_register_ext_concode +tgsi_default_dst_register_ext_concode( void ); + +unsigned +tgsi_compare_dst_register_ext_concode( + struct tgsi_dst_register_ext_concode a, + struct tgsi_dst_register_ext_concode b ); + +struct tgsi_dst_register_ext_concode +tgsi_build_dst_register_ext_concode( + unsigned cc, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + int index, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dst_register_ext_modulate +tgsi_default_dst_register_ext_modulate( void ); + +unsigned +tgsi_compare_dst_register_ext_modulate( + struct tgsi_dst_register_ext_modulate a, + struct tgsi_dst_register_ext_modulate b ); + +struct tgsi_dst_register_ext_modulate +tgsi_build_dst_register_ext_modulate( + unsigned modulate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_BUILD_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c new file mode 100644 index 00000000000..d2e6375212f --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -0,0 +1,582 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_dump.h" +#include "tgsi_iterate.h" + +struct dump_ctx +{ + struct tgsi_iterate_context iter; + + uint instno; +}; + +static void +dump_enum( + uint e, + const char **enums, + uint enum_count ) +{ + if (e >= enum_count) + debug_printf( "%u", e ); + else + debug_printf( "%s", enums[e] ); +} + +#define EOL() debug_printf( "\n" ) +#define TXT(S) debug_printf( "%s", S ) +#define CHR(C) debug_printf( "%c", C ) +#define UIX(I) debug_printf( "0x%x", I ) +#define UID(I) debug_printf( "%u", I ) +#define SID(I) debug_printf( "%d", I ) +#define FLT(F) debug_printf( "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) + +static const char *processor_type_names[] = +{ + "FRAG", + "VERT", + "GEOM" +}; + +static const char *file_names[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static const char *interpolate_names[] = +{ + "CONSTANT", + "LINEAR", + "PERSPECTIVE" +}; + +static const char *semantic_names[] = +{ + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", + "NORMAL" +}; + +static const char *immediate_type_names[] = +{ + "FLT32" +}; + +static const char *opcode_names[TGSI_OPCODE_LAST] = +{ + "ARL", + "MOV", + "LIT", + "RCP", + "RSQ", + "EXP", + "LOG", + "MUL", + "ADD", + "DP3", + "DP4", + "DST", + "MIN", + "MAX", + "SLT", + "SGE", + "MAD", + "SUB", + "LERP", + "CND", + "CND0", + "DOT2ADD", + "INDEX", + "NEGATE", + "FRAC", + "CLAMP", + "FLOOR", + "ROUND", + "EXPBASE2", + "LOGBASE2", + "POWER", + "CROSSPRODUCT", + "MULTIPLYMATRIX", + "ABS", + "RCC", + "DPH", + "COS", + "DDX", + "DDY", + "KILP", + "PK2H", + "PK2US", + "PK4B", + "PK4UB", + "RFL", + "SEQ", + "SFL", + "SGT", + "SIN", + "SLE", + "SNE", + "STR", + "TEX", + "TXD", + "TXP", + "UP2H", + "UP2US", + "UP4B", + "UP4UB", + "X2D", + "ARA", + "ARR", + "BRA", + "CAL", + "RET", + "SSG", + "CMP", + "SCS", + "TXB", + "NRM", + "DIV", + "DP2", + "TXL", + "BRK", + "IF", + "LOOP", + "REP", + "ELSE", + "ENDIF", + "ENDLOOP", + "ENDREP", + "PUSHA", + "POPA", + "CEIL", + "I2F", + "NOT", + "TRUNC", + "SHL", + "SHR", + "AND", + "OR", + "MOD", + "XOR", + "SAD", + "TXF", + "TXQ", + "CONT", + "EMIT", + "ENDPRIM", + "BGNLOOP2", + "BGNSUB", + "ENDLOOP2", + "ENDSUB", + "NOISE1", + "NOISE2", + "NOISE3", + "NOISE4", + "NOP", + "M4X3", + "M3X4", + "M3X3", + "M3X2", + "NRM4", + "CALLNZ", + "IFC", + "BREAKC", + "KIL", + "END", + "SWZ" +}; + +static const char *swizzle_names[] = +{ + "x", + "y", + "z", + "w" +}; + +static const char *texture_names[] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" +}; + +static const char *extswizzle_names[] = +{ + "x", + "y", + "z", + "w", + "0", + "1" +}; + +static const char *modulate_names[TGSI_MODULATE_COUNT] = +{ + "", + "_2X", + "_4X", + "_8X", + "_D2", + "_D4", + "_D8" +}; + +static void +_dump_register_prefix( + uint file, + uint first, + uint last ) +{ + + +} + +static void +_dump_register( + uint file, + int first, + int last ) +{ + ENM( file, file_names ); + CHR( '[' ); + SID( first ); + if (first != last) { + TXT( ".." ); + SID( last ); + } + CHR( ']' ); +} + +static void +_dump_register_ind( + uint file, + int index, + uint ind_file, + int ind_index ) +{ + ENM( file, file_names ); + CHR( '[' ); + ENM( ind_file, file_names ); + CHR( '[' ); + SID( ind_index ); + CHR( ']' ); + if (index != 0) { + if (index > 0) + CHR( '+' ); + SID( index ); + } + CHR( ']' ); +} + +static void +_dump_writemask( + uint writemask ) +{ + if (writemask != TGSI_WRITEMASK_XYZW) { + CHR( '.' ); + if (writemask & TGSI_WRITEMASK_X) + CHR( 'x' ); + if (writemask & TGSI_WRITEMASK_Y) + CHR( 'y' ); + if (writemask & TGSI_WRITEMASK_Z) + CHR( 'z' ); + if (writemask & TGSI_WRITEMASK_W) + CHR( 'w' ); + } +} + +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ) +{ + TXT( "\nDCL " ); + + _dump_register( + decl->Declaration.File, + decl->DeclarationRange.First, + decl->DeclarationRange.Last ); + _dump_writemask( + decl->Declaration.UsageMask ); + + if (decl->Declaration.Semantic) { + TXT( ", " ); + ENM( decl->Semantic.SemanticName, semantic_names ); + if (decl->Semantic.SemanticIndex != 0 || + decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { + CHR( '[' ); + UID( decl->Semantic.SemanticIndex ); + CHR( ']' ); + } + } + + TXT( ", " ); + ENM( decl->Declaration.Interpolate, interpolate_names ); +} + +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) +{ + tgsi_dump_declaration( decl ); + return TRUE; +} + +void +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ) +{ + uint i; + + TXT( "\nIMM " ); + ENM( imm->Immediate.DataType, immediate_type_names ); + + TXT( " { " ); + for (i = 0; i < imm->Immediate.Size - 1; i++) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + default: + assert( 0 ); + } + + if (i < imm->Immediate.Size - 2) + TXT( ", " ); + } + TXT( " }" ); +} + +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + tgsi_dump_immediate( imm ); + return TRUE; +} + +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, + uint instno ) +{ + uint i; + boolean first_reg = TRUE; + + EOL(); + UID( instno ); + CHR( ':' ); + ENM( inst->Instruction.Opcode, opcode_names ); + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + TXT( "_SAT" ); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + TXT( "_SATNV" ); + break; + default: + assert( 0 ); + } + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (!first_reg) + CHR( ',' ); + CHR( ' ' ); + + _dump_register( + dst->DstRegister.File, + dst->DstRegister.Index, + dst->DstRegister.Index ); + ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); + _dump_writemask( dst->DstRegister.WriteMask ); + + first_reg = FALSE; + } + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + + if (!first_reg) + CHR( ',' ); + CHR( ' ' ); + + if (src->SrcRegisterExtMod.Negate) + TXT( "-(" ); + if (src->SrcRegisterExtMod.Absolute) + CHR( '|' ); + if (src->SrcRegisterExtMod.Scale2X) + TXT( "2*(" ); + if (src->SrcRegisterExtMod.Bias) + CHR( '(' ); + if (src->SrcRegisterExtMod.Complement) + TXT( "1-(" ); + if (src->SrcRegister.Negate) + CHR( '-' ); + + if (src->SrcRegister.Indirect) { + _dump_register_ind( + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegisterInd.File, + src->SrcRegisterInd.Index ); + } + else { + _dump_register( + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegister.Index ); + } + + if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { + CHR( '.' ); + ENM( src->SrcRegister.SwizzleX, swizzle_names ); + ENM( src->SrcRegister.SwizzleY, swizzle_names ); + ENM( src->SrcRegister.SwizzleZ, swizzle_names ); + ENM( src->SrcRegister.SwizzleW, swizzle_names ); + } + if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { + CHR( '.' ); + if (src->SrcRegisterExtSwz.NegateX) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateY) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateZ) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateW) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); + } + + if (src->SrcRegisterExtMod.Complement) + CHR( ')' ); + if (src->SrcRegisterExtMod.Bias) + TXT( ")-.5" ); + if (src->SrcRegisterExtMod.Scale2X) + CHR( ')' ); + if (src->SrcRegisterExtMod.Absolute) + CHR( '|' ); + if (src->SrcRegisterExtMod.Negate) + CHR( ')' ); + + first_reg = FALSE; + } + + if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { + TXT( ", " ); + ENM( inst->InstructionExtTexture.Texture, texture_names ); + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_CAL: + TXT( " :" ); + UID( inst->InstructionExtLabel.Label ); + break; + } +} + +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct dump_ctx *ctx = (struct dump_ctx *) iter; + + tgsi_dump_instruction( inst, ctx->instno++ ); + return TRUE; +} + +static boolean +prolog( + struct tgsi_iterate_context *ctx ) +{ + EOL(); + ENM( ctx->processor.Processor, processor_type_names ); + UID( ctx->version.MajorVersion ); + CHR( '.' ); + UID( ctx->version.MinorVersion ); + return TRUE; +} + +void +tgsi_dump( + const struct tgsi_token *tokens, + uint flags ) +{ + struct dump_ctx ctx; + + /* sanity checks */ + assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); + assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); + + ctx.iter.prolog = prolog; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.epilog = NULL; + + ctx.instno = 0; + + tgsi_iterate_shader( tokens, &ctx.iter ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h new file mode 100644 index 00000000000..51c230b5db4 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_DUMP_H +#define TGSI_DUMP_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +void +tgsi_dump( + const struct tgsi_token *tokens, + uint flags ); + +struct tgsi_full_immediate; +struct tgsi_full_instruction; +struct tgsi_full_declaration; + +void +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ); + +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, + uint instno ); + +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_DUMP_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c new file mode 100644 index 00000000000..eabd74bd6d9 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -0,0 +1,845 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "util/u_string.h" +#include "tgsi_dump_c.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" + +static void +dump_enum( + const unsigned e, + const char **enums, + const unsigned enums_count ) +{ + if (e >= enums_count) { + debug_printf( "%u", e ); + } + else { + debug_printf( "%s", enums[e] ); + } +} + +#define EOL() debug_printf( "\n" ) +#define TXT(S) debug_printf( "%s", S ) +#define CHR(C) debug_printf( "%c", C ) +#define UIX(I) debug_printf( "0x%x", I ) +#define UID(I) debug_printf( "%u", I ) +#define SID(I) debug_printf( "%d", I ) +#define FLT(F) debug_printf( "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) + +static const char *TGSI_PROCESSOR_TYPES[] = +{ + "PROCESSOR_FRAGMENT", + "PROCESSOR_VERTEX", + "PROCESSOR_GEOMETRY" +}; + +static const char *TGSI_TOKEN_TYPES[] = +{ + "TOKEN_TYPE_DECLARATION", + "TOKEN_TYPE_IMMEDIATE", + "TOKEN_TYPE_INSTRUCTION" +}; + +static const char *TGSI_FILES[] = +{ + "FILE_NULL", + "FILE_CONSTANT", + "FILE_INPUT", + "FILE_OUTPUT", + "FILE_TEMPORARY", + "FILE_SAMPLER", + "FILE_ADDRESS", + "FILE_IMMEDIATE" +}; + +static const char *TGSI_INTERPOLATES[] = +{ + "INTERPOLATE_CONSTANT", + "INTERPOLATE_LINEAR", + "INTERPOLATE_PERSPECTIVE" +}; + +static const char *TGSI_SEMANTICS[] = +{ + "SEMANTIC_POSITION", + "SEMANTIC_COLOR", + "SEMANTIC_BCOLOR", + "SEMANTIC_FOG", + "SEMANTIC_PSIZE", + "SEMANTIC_GENERIC", + "SEMANTIC_NORMAL" +}; + +static const char *TGSI_IMMS[] = +{ + "IMM_FLOAT32" +}; + +static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] = +{ + "OPCODE_ARL", + "OPCODE_MOV", + "OPCODE_LIT", + "OPCODE_RCP", + "OPCODE_RSQ", + "OPCODE_EXP", + "OPCODE_LOG", + "OPCODE_MUL", + "OPCODE_ADD", + "OPCODE_DP3", + "OPCODE_DP4", + "OPCODE_DST", + "OPCODE_MIN", + "OPCODE_MAX", + "OPCODE_SLT", + "OPCODE_SGE", + "OPCODE_MAD", + "OPCODE_SUB", + "OPCODE_LERP", + "OPCODE_CND", + "OPCODE_CND0", + "OPCODE_DOT2ADD", + "OPCODE_INDEX", + "OPCODE_NEGATE", + "OPCODE_FRAC", + "OPCODE_CLAMP", + "OPCODE_FLOOR", + "OPCODE_ROUND", + "OPCODE_EXPBASE2", + "OPCODE_LOGBASE2", + "OPCODE_POWER", + "OPCODE_CROSSPRODUCT", + "OPCODE_MULTIPLYMATRIX", + "OPCODE_ABS", + "OPCODE_RCC", + "OPCODE_DPH", + "OPCODE_COS", + "OPCODE_DDX", + "OPCODE_DDY", + "OPCODE_KILP", + "OPCODE_PK2H", + "OPCODE_PK2US", + "OPCODE_PK4B", + "OPCODE_PK4UB", + "OPCODE_RFL", + "OPCODE_SEQ", + "OPCODE_SFL", + "OPCODE_SGT", + "OPCODE_SIN", + "OPCODE_SLE", + "OPCODE_SNE", + "OPCODE_STR", + "OPCODE_TEX", + "OPCODE_TXD", + "OPCODE_TXP", + "OPCODE_UP2H", + "OPCODE_UP2US", + "OPCODE_UP4B", + "OPCODE_UP4UB", + "OPCODE_X2D", + "OPCODE_ARA", + "OPCODE_ARR", + "OPCODE_BRA", + "OPCODE_CAL", + "OPCODE_RET", + "OPCODE_SSG", + "OPCODE_CMP", + "OPCODE_SCS", + "OPCODE_TXB", + "OPCODE_NRM", + "OPCODE_DIV", + "OPCODE_DP2", + "OPCODE_TXL", + "OPCODE_BRK", + "OPCODE_IF", + "OPCODE_LOOP", + "OPCODE_REP", + "OPCODE_ELSE", + "OPCODE_ENDIF", + "OPCODE_ENDLOOP", + "OPCODE_ENDREP", + "OPCODE_PUSHA", + "OPCODE_POPA", + "OPCODE_CEIL", + "OPCODE_I2F", + "OPCODE_NOT", + "OPCODE_TRUNC", + "OPCODE_SHL", + "OPCODE_SHR", + "OPCODE_AND", + "OPCODE_OR", + "OPCODE_MOD", + "OPCODE_XOR", + "OPCODE_SAD", + "OPCODE_TXF", + "OPCODE_TXQ", + "OPCODE_CONT", + "OPCODE_EMIT", + "OPCODE_ENDPRIM", + "OPCODE_BGNLOOP2", + "OPCODE_BGNSUB", + "OPCODE_ENDLOOP2", + "OPCODE_ENDSUB", + "OPCODE_NOISE1", + "OPCODE_NOISE2", + "OPCODE_NOISE3", + "OPCODE_NOISE4", + "OPCODE_NOP", + "OPCODE_M4X3", + "OPCODE_M3X4", + "OPCODE_M3X3", + "OPCODE_M3X2", + "OPCODE_NRM4", + "OPCODE_CALLNZ", + "OPCODE_IFC", + "OPCODE_BREAKC", + "OPCODE_KIL", + "OPCODE_END" +}; + +static const char *TGSI_SATS[] = +{ + "SAT_NONE", + "SAT_ZERO_ONE", + "SAT_MINUS_PLUS_ONE" +}; + +static const char *TGSI_INSTRUCTION_EXTS[] = +{ + "INSTRUCTION_EXT_TYPE_NV", + "INSTRUCTION_EXT_TYPE_LABEL", + "INSTRUCTION_EXT_TYPE_TEXTURE" +}; + +static const char *TGSI_PRECISIONS[] = +{ + "PRECISION_DEFAULT", + "PRECISION_FLOAT32", + "PRECISION_FLOAT16", + "PRECISION_FIXED12" +}; + +static const char *TGSI_CCS[] = +{ + "CC_GT", + "CC_EQ", + "CC_LT", + "CC_UN", + "CC_GE", + "CC_LE", + "CC_NE", + "CC_TR", + "CC_FL" +}; + +static const char *TGSI_SWIZZLES[] = +{ + "SWIZZLE_X", + "SWIZZLE_Y", + "SWIZZLE_Z", + "SWIZZLE_W" +}; + +static const char *TGSI_TEXTURES[] = +{ + "TEXTURE_UNKNOWN", + "TEXTURE_1D", + "TEXTURE_2D", + "TEXTURE_3D", + "TEXTURE_CUBE", + "TEXTURE_RECT", + "TEXTURE_SHADOW1D", + "TEXTURE_SHADOW2D", + "TEXTURE_SHADOWRECT" +}; + +static const char *TGSI_SRC_REGISTER_EXTS[] = +{ + "SRC_REGISTER_EXT_TYPE_SWZ", + "SRC_REGISTER_EXT_TYPE_MOD" +}; + +static const char *TGSI_EXTSWIZZLES[] = +{ + "EXTSWIZZLE_X", + "EXTSWIZZLE_Y", + "EXTSWIZZLE_Z", + "EXTSWIZZLE_W", + "EXTSWIZZLE_ZERO", + "EXTSWIZZLE_ONE" +}; + +static const char *TGSI_WRITEMASKS[] = +{ + "0", + "WRITEMASK_X", + "WRITEMASK_Y", + "WRITEMASK_XY", + "WRITEMASK_Z", + "WRITEMASK_XZ", + "WRITEMASK_YZ", + "WRITEMASK_XYZ", + "WRITEMASK_W", + "WRITEMASK_XW", + "WRITEMASK_YW", + "WRITEMASK_XYW", + "WRITEMASK_ZW", + "WRITEMASK_XZW", + "WRITEMASK_YZW", + "WRITEMASK_XYZW" +}; + +static const char *TGSI_DST_REGISTER_EXTS[] = +{ + "DST_REGISTER_EXT_TYPE_CONDCODE", + "DST_REGISTER_EXT_TYPE_MODULATE" +}; + +static const char *TGSI_MODULATES[] = +{ + "MODULATE_1X", + "MODULATE_2X", + "MODULATE_4X", + "MODULATE_8X", + "MODULATE_HALF", + "MODULATE_QUARTER", + "MODULATE_EIGHTH" +}; + +static void +dump_declaration_verbose( + struct tgsi_full_declaration *decl, + unsigned ignored, + unsigned deflt, + struct tgsi_full_declaration *fd ) +{ + TXT( "\nFile : " ); + ENM( decl->Declaration.File, TGSI_FILES ); + if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { + TXT( "\nUsageMask : " ); + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + CHR( 'X' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + CHR( 'Y' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + CHR( 'Z' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + CHR( 'W' ); + } + } + if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { + TXT( "\nInterpolate: " ); + ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); + } + if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { + TXT( "\nSemantic : " ); + UID( decl->Declaration.Semantic ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Declaration.Padding ); + } + + EOL(); + TXT( "\nFirst: " ); + UID( decl->DeclarationRange.First ); + TXT( "\nLast : " ); + UID( decl->DeclarationRange.Last ); + + if( decl->Declaration.Semantic ) { + EOL(); + TXT( "\nSemanticName : " ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); + TXT( "\nSemanticIndex: " ); + UID( decl->Semantic.SemanticIndex ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Semantic.Padding ); + } + } +} + +static void +dump_immediate_verbose( + struct tgsi_full_immediate *imm, + unsigned ignored ) +{ + unsigned i; + + TXT( "\nDataType : " ); + ENM( imm->Immediate.DataType, TGSI_IMMS ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( imm->Immediate.Padding ); + } + + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + EOL(); + switch( imm->Immediate.DataType ) { + case TGSI_IMM_FLOAT32: + TXT( "\nFloat: " ); + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + + default: + assert( 0 ); + } + } +} + +static void +dump_instruction_verbose( + struct tgsi_full_instruction *inst, + unsigned ignored, + unsigned deflt, + struct tgsi_full_instruction *fi ) +{ + unsigned i; + + TXT( "\nOpcode : " ); + ENM( inst->Instruction.Opcode, TGSI_OPCODES ); + if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { + TXT( "\nSaturate : " ); + ENM( inst->Instruction.Saturate, TGSI_SATS ); + } + if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { + TXT( "\nNumDstRegs : " ); + UID( inst->Instruction.NumDstRegs ); + } + if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { + TXT( "\nNumSrcRegs : " ); + UID( inst->Instruction.NumSrcRegs ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->Instruction.Padding ); + } + + if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { + TXT( "\nPrecision : " ); + ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); + } + if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { + TXT( "\nCondDstIndex : " ); + UID( inst->InstructionExtNv.CondDstIndex ); + } + if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { + TXT( "\nCondFlowIndex : " ); + UID( inst->InstructionExtNv.CondFlowIndex ); + } + if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { + TXT( "\nCondMask : " ); + ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { + TXT( "\nCondSwizzleX : " ); + ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { + TXT( "\nCondSwizzleY : " ); + ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ : " ); + ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { + TXT( "\nCondSwizzleW : " ); + ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { + TXT( "\nCondDstUpdate : " ); + UID( inst->InstructionExtNv.CondDstUpdate ); + } + if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { + TXT( "\nCondFlowEnable: " ); + UID( inst->InstructionExtNv.CondFlowEnable ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtNv.Padding ); + if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { + TXT( "\nExtended : " ); + UID( inst->InstructionExtNv.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { + TXT( "\nLabel : " ); + UID( inst->InstructionExtLabel.Label ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtLabel.Padding ); + if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtLabel.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { + TXT( "\nTexture : " ); + ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtTexture.Padding ); + if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtTexture.Extended ); + } + } + } + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; + + EOL(); + TXT( "\nFile : " ); + ENM( dst->DstRegister.File, TGSI_FILES ); + if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { + TXT( "\nWriteMask: " ); + ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); + } + if( ignored ) { + if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( dst->DstRegister.Indirect ); + } + if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( dst->DstRegister.Dimension ); + } + } + if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { + TXT( "\nIndex : " ); + SID( dst->DstRegister.Index ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegister.Padding ); + if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegister.Extended ); + } + } + + if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { + EOL(); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { + TXT( "\nCondMask : " ); + ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { + TXT( "\nCondSwizzleX: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { + TXT( "\nCondSwizzleY: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { + TXT( "\nCondSwizzleW: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { + TXT( "\nCondSrcIndex: " ); + UID( dst->DstRegisterExtConcode.CondSrcIndex ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtConcode.Padding ); + if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegisterExtConcode.Extended ); + } + } + } + + if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { + EOL(); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { + TXT( "\nModulate: " ); + ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtModulate.Padding ); + if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { + TXT( "\nExtended: " ); + UID( dst->DstRegisterExtModulate.Extended ); + } + } + } + } + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; + + EOL(); + TXT( "\nFile : "); + ENM( src->SrcRegister.File, TGSI_FILES ); + if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { + TXT( "\nSwizzleX : " ); + ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { + TXT( "\nSwizzleY : " ); + ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { + TXT( "\nSwizzleZ : " ); + ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { + TXT( "\nSwizzleW : " ); + ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegister.Negate ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( src->SrcRegister.Indirect ); + } + if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( src->SrcRegister.Dimension ); + } + } + if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { + TXT( "\nIndex : " ); + SID( src->SrcRegister.Index ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegister.Extended ); + } + } + + if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { + EOL(); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { + TXT( "\nExtSwizzleX: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { + TXT( "\nExtSwizzleY: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { + TXT( "\nExtSwizzleZ: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { + TXT( "\nExtSwizzleW: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { + TXT( "\nNegateX : " ); + UID( src->SrcRegisterExtSwz.NegateX ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { + TXT( "\nNegateY : " ); + UID( src->SrcRegisterExtSwz.NegateY ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { + TXT( "\nNegateZ : " ); + UID( src->SrcRegisterExtSwz.NegateZ ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { + TXT( "\nNegateW : " ); + UID( src->SrcRegisterExtSwz.NegateW ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtSwz.Padding ); + if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtSwz.Extended ); + } + } + } + + if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { + EOL(); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { + TXT( "\nComplement: " ); + UID( src->SrcRegisterExtMod.Complement ); + } + if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { + TXT( "\nBias : " ); + UID( src->SrcRegisterExtMod.Bias ); + } + if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { + TXT( "\nScale2X : " ); + UID( src->SrcRegisterExtMod.Scale2X ); + } + if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { + TXT( "\nAbsolute : " ); + UID( src->SrcRegisterExtMod.Absolute ); + } + if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegisterExtMod.Negate ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtMod.Padding ); + if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtMod.Extended ); + } + } + } + } +} + +void +tgsi_dump_c( + const struct tgsi_token *tokens, + uint flags ) +{ + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + uint ignored = flags & TGSI_DUMP_C_IGNORED; + uint deflt = flags & TGSI_DUMP_C_DEFAULT; + uint instno = 0; + + /* sanity checks */ + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); + + tgsi_parse_init( &parse, tokens ); + + TXT( "tgsi-dump begin -----------------" ); + + TXT( "\nMajorVersion: " ); + UID( parse.FullVersion.Version.MajorVersion ); + TXT( "\nMinorVersion: " ); + UID( parse.FullVersion.Version.MinorVersion ); + EOL(); + + TXT( "\nHeaderSize: " ); + UID( parse.FullHeader.Header.HeaderSize ); + TXT( "\nBodySize : " ); + UID( parse.FullHeader.Header.BodySize ); + TXT( "\nProcessor : " ); + ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); + EOL(); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + TXT( "\nType : " ); + ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); + if( ignored ) { + TXT( "\nSize : " ); + UID( parse.FullToken.Token.Size ); + if( deflt || parse.FullToken.Token.Extended ) { + TXT( "\nExtended : " ); + UID( parse.FullToken.Token.Extended ); + } + } + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + dump_declaration_verbose( + &parse.FullToken.FullDeclaration, + ignored, + deflt, + &fd ); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + dump_immediate_verbose( + &parse.FullToken.FullImmediate, + ignored ); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + dump_instruction_verbose( + &parse.FullToken.FullInstruction, + ignored, + deflt, + &fi ); + break; + + default: + assert( 0 ); + } + + EOL(); + } + + TXT( "\ntgsi-dump end -------------------\n" ); + + tgsi_parse_free( &parse ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h new file mode 100644 index 00000000000..d91cd35b3b7 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_DUMP_C_H +#define TGSI_DUMP_C_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +#define TGSI_DUMP_C_IGNORED 1 +#define TGSI_DUMP_C_DEFAULT 2 + +void +tgsi_dump_c( + const struct tgsi_token *tokens, + uint flags ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_DUMP_C_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c new file mode 100644 index 00000000000..8b430548bca --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -0,0 +1,2522 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I +#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C +#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I +#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call tgsi_exec_machine_run() many times. + */ +void +tgsi_exec_machine_bind_shader( + struct tgsi_exec_machine *mach, + const struct tgsi_token *tokens, + uint numSamplers, + struct tgsi_sampler *samplers) +{ + uint k; + struct tgsi_parse_context parse; + struct tgsi_exec_labels *labels = &mach->Labels; + struct tgsi_full_instruction *instructions; + struct tgsi_full_declaration *declarations; + uint maxInstructions = 10, numInstructions = 0; + uint maxDeclarations = 10, numDeclarations = 0; + uint instno = 0; + +#if 0 + tgsi_dump(tokens, 0); +#endif + + mach->Tokens = tokens; + mach->Samplers = samplers; + + k = tgsi_parse_init (&parse, mach->Tokens); + if (k != TGSI_PARSE_OK) { + debug_printf( "Problem parsing!\n" ); + return; + } + + mach->Processor = parse.FullHeader.Processor.Processor; + mach->ImmLimit = 0; + labels->count = 0; + + declarations = (struct tgsi_full_declaration *) + MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); + + if (!declarations) { + return; + } + + instructions = (struct tgsi_full_instruction *) + MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); + + if (!instructions) { + FREE( declarations ); + return; + } + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + uint pointer = parse.Position; + uint i; + + tgsi_parse_token( &parse ); + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* save expanded declaration */ + if (numDeclarations == maxDeclarations) { + declarations = REALLOC(declarations, + maxDeclarations + * sizeof(struct tgsi_full_declaration), + (maxDeclarations + 10) + * sizeof(struct tgsi_full_declaration)); + maxDeclarations += 10; + } + memcpy(declarations + numDeclarations, + &parse.FullToken.FullDeclaration, + sizeof(declarations[0])); + numDeclarations++; + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + assert( size % 4 == 0 ); + assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); + + for( i = 0; i < size; i++ ) { + mach->Imms[mach->ImmLimit + i / 4][i % 4] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + mach->ImmLimit += size / 4; + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + assert( labels->count < MAX_LABELS ); + + labels->labels[labels->count][0] = instno; + labels->labels[labels->count][1] = pointer; + labels->count++; + + /* save expanded instruction */ + if (numInstructions == maxInstructions) { + instructions = REALLOC(instructions, + maxInstructions + * sizeof(struct tgsi_full_instruction), + (maxInstructions + 10) + * sizeof(struct tgsi_full_instruction)); + maxInstructions += 10; + } + memcpy(instructions + numInstructions, + &parse.FullToken.FullInstruction, + sizeof(instructions[0])); + numInstructions++; + break; + + default: + assert( 0 ); + } + } + tgsi_parse_free (&parse); + + if (mach->Declarations) { + FREE( mach->Declarations ); + } + mach->Declarations = declarations; + mach->NumDeclarations = numDeclarations; + + if (mach->Instructions) { + FREE( mach->Instructions ); + } + mach->Instructions = instructions; + mach->NumInstructions = numInstructions; +} + + +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach ) +{ + uint i; + + mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); + mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + + /* Setup constants. */ + for( i = 0; i < 4; i++ ) { + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; + mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; + mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; + } +} + + +void +tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) +{ + if (mach->Instructions) { + FREE(mach->Instructions); + mach->Instructions = NULL; + mach->NumInstructions = 0; + } + if (mach->Declarations) { + FREE(mach->Declarations); + mach->Declarations = NULL; + mach->NumDeclarations = 0; + } +} + + +static void +micro_abs( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = fabsf( src->f[0] ); + dst->f[1] = fabsf( src->f[1] ); + dst->f[2] = fabsf( src->f[2] ); + dst->f[3] = fabsf( src->f[3] ); +} + +static void +micro_add( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] + src1->f[0]; + dst->f[1] = src0->f[1] + src1->f[1]; + dst->f[2] = src0->f[2] + src1->f[2]; + dst->f[3] = src0->f[3] + src1->f[3]; +} + +static void +micro_iadd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] + src1->i[0]; + dst->i[1] = src0->i[1] + src1->i[1]; + dst->i[2] = src0->i[2] + src1->i[2]; + dst->i[3] = src0->i[3] + src1->i[3]; +} + +static void +micro_and( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] & src1->u[0]; + dst->u[1] = src0->u[1] & src1->u[1]; + dst->u[2] = src0->u[2] & src1->u[2]; + dst->u[3] = src0->u[3] & src1->u[3]; +} + +static void +micro_ceil( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = ceilf( src->f[0] ); + dst->f[1] = ceilf( src->f[1] ); + dst->f[2] = ceilf( src->f[2] ); + dst->f[3] = ceilf( src->f[3] ); +} + +static void +micro_cos( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = cosf( src->f[0] ); + dst->f[1] = cosf( src->f[1] ); + dst->f[2] = cosf( src->f[2] ); + dst->f[3] = cosf( src->f[3] ); +} + +static void +micro_ddx( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_div( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] / src1->f[0]; + dst->f[1] = src0->f[1] / src1->f[1]; + dst->f[2] = src0->f[2] / src1->f[2]; + dst->f[3] = src0->f[3] / src1->f[3]; +} + +static void +micro_udiv( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] / src1->u[0]; + dst->u[1] = src0->u[1] / src1->u[1]; + dst->u[2] = src0->u[2] / src1->u[2]; + dst->u[3] = src0->u[3] / src1->u[3]; +} + +static void +micro_eq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ieq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_exp2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = powf( 2.0f, src->f[0] ); + dst->f[1] = powf( 2.0f, src->f[1] ); + dst->f[2] = powf( 2.0f, src->f[2] ); + dst->f[3] = powf( 2.0f, src->f[3] ); +} + +static void +micro_f2it( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = (int) src->f[0]; + dst->i[1] = (int) src->f[1]; + dst->i[2] = (int) src->f[2]; + dst->i[3] = (int) src->f[3]; +} + +static void +micro_f2ut( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = (uint) src->f[0]; + dst->u[1] = (uint) src->f[1]; + dst->u[2] = (uint) src->f[2]; + dst->u[3] = (uint) src->f[3]; +} + +static void +micro_flr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = floorf( src->f[0] ); + dst->f[1] = floorf( src->f[1] ); + dst->f[2] = floorf( src->f[2] ); + dst->f[3] = floorf( src->f[3] ); +} + +static void +micro_frc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = src->f[0] - floorf( src->f[0] ); + dst->f[1] = src->f[1] - floorf( src->f[1] ); + dst->f[2] = src->f[2] - floorf( src->f[2] ); + dst->f[3] = src->f[3] - floorf( src->f[3] ); +} + +static void +micro_ge( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_i2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) src->i[0]; + dst->f[1] = (float) src->i[1]; + dst->f[2] = (float) src->i[2]; + dst->f[3] = (float) src->i[3]; +} + +static void +micro_lg2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = logf( src->f[0] ) * 1.442695f; + dst->f[1] = logf( src->f[1] ) * 1.442695f; + dst->f[2] = logf( src->f[2] ) * 1.442695f; + dst->f[3] = logf( src->f[3] ) * 1.442695f; +} + +static void +micro_le( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_lt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ilt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_ult( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; +} + +static void +micro_max( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_min( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_umod( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] % src1->u[0]; + dst->u[1] = src0->u[1] % src1->u[1]; + dst->u[2] = src0->u[2] % src1->u[2]; + dst->u[3] = src0->u[3] % src1->u[3]; +} + +static void +micro_mul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] * src1->f[0]; + dst->f[1] = src0->f[1] * src1->f[1]; + dst->f[2] = src0->f[2] * src1->f[2]; + dst->f[3] = src0->f[3] * src1->f[3]; +} + +static void +micro_imul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] * src1->i[0]; + dst->i[1] = src0->i[1] * src1->i[1]; + dst->i[2] = src0->i[2] * src1->i[2]; + dst->i[3] = src0->i[3] * src1->i[3]; +} + +static void +micro_imul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->i[0] = src0->i[0] * src1->i[0]; + dst1->i[1] = src0->i[1] * src1->i[1]; + dst1->i[2] = src0->i[2] * src1->i[2]; + dst1->i[3] = src0->i[3] * src1->i[3]; + dst0->i[0] = 0; + dst0->i[1] = 0; + dst0->i[2] = 0; + dst0->i[3] = 0; +} + +static void +micro_umul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->u[0] = src0->u[0] * src1->u[0]; + dst1->u[1] = src0->u[1] * src1->u[1]; + dst1->u[2] = src0->u[2] * src1->u[2]; + dst1->u[3] = src0->u[3] * src1->u[3]; + dst0->u[0] = 0; + dst0->u[1] = 0; + dst0->u[2] = 0; + dst0->u[3] = 0; +} + +static void +micro_movc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2 ) +{ + dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; + dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; + dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; + dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; +} + +static void +micro_neg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = -src->f[0]; + dst->f[1] = -src->f[1]; + dst->f[2] = -src->f[2]; + dst->f[3] = -src->f[3]; +} + +static void +micro_ineg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_not( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_or( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] | src1->u[0]; + dst->u[1] = src0->u[1] | src1->u[1]; + dst->u[2] = src0->u[2] | src1->u[2]; + dst->u[3] = src0->u[3] | src1->u[3]; +} + +static void +micro_pow( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = powf( src0->f[0], src1->f[0] ); + dst->f[1] = powf( src0->f[1], src1->f[1] ); + dst->f[2] = powf( src0->f[2], src1->f[2] ); + dst->f[3] = powf( src0->f[3], src1->f[3] ); +} + +static void +micro_rnd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = floorf( src->f[0] + 0.5f ); + dst->f[1] = floorf( src->f[1] + 0.5f ); + dst->f[2] = floorf( src->f[2] + 0.5f ); + dst->f[3] = floorf( src->f[3] + 0.5f ); +} + +static void +micro_shl( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] << src1->i[0]; + dst->i[1] = src0->i[1] << src1->i[1]; + dst->i[2] = src0->i[2] << src1->i[2]; + dst->i[3] = src0->i[3] << src1->i[3]; +} + +static void +micro_ishr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] >> src1->i[0]; + dst->i[1] = src0->i[1] >> src1->i[1]; + dst->i[2] = src0->i[2] >> src1->i[2]; + dst->i[3] = src0->i[3] >> src1->i[3]; +} + +static void +micro_trunc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0 ) +{ + dst->f[0] = (float) (int) src0->f[0]; + dst->f[1] = (float) (int) src0->f[1]; + dst->f[2] = (float) (int) src0->f[2]; + dst->f[3] = (float) (int) src0->f[3]; +} + +static void +micro_ushr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] >> src1->u[0]; + dst->u[1] = src0->u[1] >> src1->u[1]; + dst->u[2] = src0->u[2] >> src1->u[2]; + dst->u[3] = src0->u[3] >> src1->u[3]; +} + +static void +micro_sin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = sinf( src->f[0] ); + dst->f[1] = sinf( src->f[1] ); + dst->f[2] = sinf( src->f[2] ); + dst->f[3] = sinf( src->f[3] ); +} + +static void +micro_sqrt( union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = sqrtf( src->f[0] ); + dst->f[1] = sqrtf( src->f[1] ); + dst->f[2] = sqrtf( src->f[2] ); + dst->f[3] = sqrtf( src->f[3] ); +} + +static void +micro_sub( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] - src1->f[0]; + dst->f[1] = src0->f[1] - src1->f[1]; + dst->f[2] = src0->f[2] - src1->f[2]; + dst->f[3] = src0->f[3] - src1->f[3]; +} + +static void +micro_u2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) src->u[0]; + dst->f[1] = (float) src->u[1]; + dst->f[2] = (float) src->u[2]; + dst->f[3] = (float) src->u[3]; +} + +static void +micro_xor( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] ^ src1->u[0]; + dst->u[1] = src0->u[1] ^ src1->u[1]; + dst->u[2] = src0->u[2] ^ src1->u[2]; + dst->u[3] = src0->u[3] ^ src1->u[3]; +} + +static void +fetch_src_file_channel( + const struct tgsi_exec_machine *mach, + const uint file, + const uint swizzle, + const union tgsi_exec_channel *index, + union tgsi_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: + chan->f[0] = mach->Consts[index->i[0]][swizzle]; + chan->f[1] = mach->Consts[index->i[1]][swizzle]; + chan->f[2] = mach->Consts[index->i[2]][swizzle]; + chan->f[3] = mach->Consts[index->i[3]][swizzle]; + break; + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (int) mach->ImmLimit ); + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + assert( index->i[1] < (int) mach->ImmLimit ); + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + assert( index->i[2] < (int) mach->ImmLimit ); + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + assert( index->i[3] < (int) mach->ImmLimit ); + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union tgsi_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.i[0] *= 17; + index.i[1] *= 17; + index.i[2] *= 17; + index.i[3] *= 17; + break; + case TGSI_FILE_CONSTANT: + index.i[0] *= 4096; + index.i[1] *= 4096; + index.i[2] *= 4096; + index.i[3] *= 4096; + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + micro_abs( chan, chan ); + break; + + case TGSI_UTIL_SIGN_SET: + micro_abs( chan, chan ); + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); + } +} + +static void +store_dest( + struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union tgsi_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS); + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + + default: + assert( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union tgsi_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct tgsi_sampler *sampler, + const union tgsi_exec_channel *s, + const union tgsi_exec_channel *t, + const union tgsi_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union tgsi_exec_channel *r, + union tgsi_exec_channel *g, + union tgsi_exec_channel *b, + union tgsi_exec_channel *a ) +{ + uint j; + float rgba[NUM_CHANNELS][QUAD_SIZE]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + + for (j = 0; j < 4; j++) { + r->f[j] = rgba[0][j]; + g->f[j] = rgba[1][j]; + b->f[j] = rgba[2][j]; + a->f[j] = rgba[3][j]; + } +} + + +static void +exec_tex(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod, + boolean projected) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union tgsi_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + if (projected) { + FETCH(&r[1], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[1] ); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + +/** + * Evaluate a constant-valued coefficient at the position of the + * current quad. + */ +static void +eval_constant_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +/** + * Evaluate a linear-valued coefficient at the position of the + * current quad. + */ +static void +eval_linear_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +/** + * Evaluate a perspective-valued coefficient at the position of the + * current quad. + */ +static void +eval_perspective_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* eval_coef_func)( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration( + struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl ) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + eval_coef_func eval; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + eval( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + eval( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union tgsi_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_f2it( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + + FETCH( &r[2], 0, CHAN_W ); + micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); + micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); + micro_pow( &r[1], &r[1], &r[2] ); + micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + micro_sqrt( &r[0], &r[0] ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + FETCH( &r[0], 0, CHAN_X ); + micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ + STORE( &r[2], 0, CHAN_X ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ + STORE( &r[2], 0, CHAN_Y ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ + STORE( &r[2], 0, CHAN_Z ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_LOG: + FETCH( &r[0], 0, CHAN_X ); + micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ + micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ + micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[0], 0, CHAN_X ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ + micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ + STORE( &r[0], 0, CHAN_Y ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[1], 0, CHAN_Z ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_mul( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + micro_mul( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + /* XXX use micro_min()?? */ + micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + /* XXX use micro_max()?? */ + micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_mul( &r[0], &r[0], &r[1] ); + FETCH( &r[1], 2, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_sub( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_sub( &r[1], &r[1], &r[2] ); + micro_mul( &r[0], &r[0], &r[1] ); + micro_add( &r[0], &r[0], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_frc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_flr( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_rnd( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + micro_lg2( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_pow( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + + micro_mul( &r[2], &r[0], &r[1] ); + + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + micro_mul( &r[5], &r[3], &r[4] ); + micro_sub( &r[2], &r[2], &r[5] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + + micro_mul( &r[3], &r[3], &r[2] ); + + FETCH(&r[5], 0, CHAN_X); + + micro_mul( &r[1], &r[1], &r[5] ); + micro_sub( &r[3], &r[3], &r[1] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + micro_mul( &r[5], &r[5], &r[4] ); + micro_mul( &r[0], &r[0], &r[2] ); + micro_sub( &r[5], &r[5], &r[0] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + micro_abs( &r[0], &r[0] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 1, CHAN_W); + + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + micro_cos( &r[0], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ddx( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ddy( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + /* for enabled ExecMask bits, set the killed bit */ + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_eq( &r[0], &r[0], &r[1], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], + &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + micro_sin( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = LOD bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + assert (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = LOD) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, TRUE); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + micro_cos( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + micro_sin( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + assert(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ceil( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_i2f( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_not( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_trunc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_shl( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ishr( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_and( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_or( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_xor( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + UPDATE_EXEC_MASK(mach); + if (mach->ExecMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + assert( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + assert( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this assertion */ + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + for (i = 0; i < mach->NumDeclarations; i++) { + exec_declaration( mach, mach->Declarations+i ); + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + assert(pc < (int) mach->NumInstructions); + exec_instruction( mach, mach->Instructions + pc, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h new file mode 100644 index 00000000000..4f30650b07b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -0,0 +1,253 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined TGSI_EXEC_H +#define TGSI_EXEC_H + +#include "pipe/p_compiler.h" + +#if defined __cplusplus +extern "C" { +#endif + +#define MAX_LABELS 1024 + +#define NUM_CHANNELS 4 /* R,G,B,A */ +#define QUAD_SIZE 4 /* 4 pixel/quad */ + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union tgsi_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct tgsi_exec_vector +{ + union tgsi_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct tgsi_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct tgsi_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + +/** + * For branching/calling subroutines. + */ +struct tgsi_exec_labels +{ + unsigned labels[MAX_LABELS][2]; + unsigned count; +}; + + +#define TGSI_EXEC_NUM_TEMPS 128 +#define TGSI_EXEC_NUM_TEMP_EXTRAS 6 +#define TGSI_EXEC_NUM_IMMEDIATES 256 + +/* + * Locations of various utility registers (_I = Index, _C = Channel) + */ +#define TGSI_EXEC_TEMP_00000000_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_00000000_C 0 + +#define TGSI_EXEC_TEMP_7FFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_7FFFFFFF_C 1 + +#define TGSI_EXEC_TEMP_80000000_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_80000000_C 2 + +#define TGSI_EXEC_TEMP_FFFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_FFFFFFFF_C 3 + +#define TGSI_EXEC_TEMP_ONE_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_ONE_C 0 + +#define TGSI_EXEC_TEMP_TWO_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_TWO_C 1 + +#define TGSI_EXEC_TEMP_128_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_128_C 2 + +#define TGSI_EXEC_TEMP_MINUS_128_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_MINUS_128_C 3 + +#define TGSI_EXEC_TEMP_KILMASK_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_KILMASK_C 0 + +#define TGSI_EXEC_TEMP_OUTPUT_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_OUTPUT_C 1 + +#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_PRIMITIVE_C 2 + +#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_THREE_C 3 + +#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) +#define TGSI_EXEC_TEMP_HALF_C 0 + +#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) + +#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5) + + +#define TGSI_EXEC_MAX_COND_NESTING 20 +#define TGSI_EXEC_MAX_LOOP_NESTING 20 +#define TGSI_EXEC_MAX_CALL_NESTING 20 + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct tgsi_exec_machine +{ + /* Total = program temporaries + internal temporaries + * + 1 padding to align to 16 bytes + */ + struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; + + /* + * This will point to _Temps after aligning to 16B boundary. + */ + struct tgsi_exec_vector *Temps; + struct tgsi_exec_vector *Addrs; + + struct tgsi_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + const float (*Consts)[4]; + struct tgsi_exec_vector *Inputs; + struct tgsi_exec_vector *Outputs; + const struct tgsi_token *Tokens; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct tgsi_interp_coef *InterpCoefs; + struct tgsi_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; + + struct tgsi_exec_labels Labels; +}; + +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach ); + + +void +tgsi_exec_machine_bind_shader( + struct tgsi_exec_machine *mach, + const struct tgsi_token *tokens, + uint numSamplers, + struct tgsi_sampler *samplers); + +uint +tgsi_exec_machine_run( + struct tgsi_exec_machine *mach ); + + +void +tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* TGSI_EXEC_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c new file mode 100644 index 00000000000..5371a88b964 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_iterate.h" + +boolean +tgsi_iterate_shader( + const struct tgsi_token *tokens, + struct tgsi_iterate_context *ctx ) +{ + struct tgsi_parse_context parse; + + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) + return FALSE; + + ctx->processor = parse.FullHeader.Processor; + ctx->version = parse.FullVersion.Version; + + if (ctx->prolog) + if (!ctx->prolog( ctx )) + goto fail; + + while (!tgsi_parse_end_of_tokens( &parse )) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (ctx->iterate_instruction) + if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (ctx->iterate_declaration) + if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + if (ctx->iterate_immediate) + if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate )) + goto fail; + break; + + default: + assert( 0 ); + } + } + + if (ctx->epilog) + if (!ctx->epilog( ctx )) + goto fail; + + tgsi_parse_free( &parse ); + return TRUE; + +fail: + tgsi_parse_free( &parse ); + return FALSE; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/tgsi_iterate.h new file mode 100644 index 00000000000..ec7b85bf63d --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_ITERATE_H +#define TGSI_ITERATE_H + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_iterate_context +{ + boolean + (* prolog)( + struct tgsi_iterate_context *ctx ); + + boolean + (* iterate_instruction)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_instruction *inst ); + + boolean + (* iterate_declaration)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_declaration *decl ); + + boolean + (* iterate_immediate)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_immediate *imm ); + + boolean + (* epilog)( + struct tgsi_iterate_context *ctx ); + + struct tgsi_processor processor; + struct tgsi_version version; +}; + +boolean +tgsi_iterate_shader( + const struct tgsi_token *tokens, + struct tgsi_iterate_context *ctx ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_ITERATE_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c new file mode 100644 index 00000000000..d16f0cdcad4 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -0,0 +1,332 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" + +void +tgsi_full_token_init( + union tgsi_full_token *full_token ) +{ + full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION; +} + +void +tgsi_full_token_free( + union tgsi_full_token *full_token ) +{ + if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { + FREE( (void *) full_token->FullImmediate.u.Pointer ); + } +} + +unsigned +tgsi_parse_init( + struct tgsi_parse_context *ctx, + const struct tgsi_token *tokens ) +{ + ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; + if( ctx->FullVersion.Version.MajorVersion > 1 ) { + return TGSI_PARSE_ERROR; + } + + ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1]; + if( ctx->FullHeader.Header.HeaderSize >= 2 ) { + ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; + } + else { + ctx->FullHeader.Processor = tgsi_default_processor(); + } + + ctx->Tokens = tokens; + ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize; + + tgsi_full_token_init( &ctx->FullToken ); + + return TGSI_PARSE_OK; +} + +void +tgsi_parse_free( + struct tgsi_parse_context *ctx ) +{ + tgsi_full_token_free( &ctx->FullToken ); +} + +boolean +tgsi_parse_end_of_tokens( + struct tgsi_parse_context *ctx ) +{ + return ctx->Position >= + 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; +} + +static void +next_token( + struct tgsi_parse_context *ctx, + void *token ) +{ + assert( !tgsi_parse_end_of_tokens( ctx ) ); + + *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++]; +} + +void +tgsi_parse_token( + struct tgsi_parse_context *ctx ) +{ + struct tgsi_token token; + unsigned i; + + tgsi_full_token_free( &ctx->FullToken ); + tgsi_full_token_init( &ctx->FullToken ); + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; + + *decl = tgsi_default_full_declaration(); + decl->Declaration = *(struct tgsi_declaration *) &token; + + next_token( ctx, &decl->DeclarationRange ); + + if( decl->Declaration.Semantic ) { + next_token( ctx, &decl->Semantic ); + } + + break; + } + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + + *imm = tgsi_default_full_immediate(); + imm->Immediate = *(struct tgsi_immediate *) &token; + + assert( !imm->Immediate.Extended ); + + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + imm->u.Pointer = MALLOC( + sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); + } + break; + + default: + assert( 0 ); + } + + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; + unsigned extended; + + *inst = tgsi_default_full_instruction(); + inst->Instruction = *(struct tgsi_instruction *) &token; + + extended = inst->Instruction.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_INSTRUCTION_EXT_TYPE_NV: + inst->InstructionExtNv = + *(struct tgsi_instruction_ext_nv *) &token; + break; + + case TGSI_INSTRUCTION_EXT_TYPE_LABEL: + inst->InstructionExtLabel = + *(struct tgsi_instruction_ext_label *) &token; + break; + + case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: + inst->InstructionExtTexture = + *(struct tgsi_instruction_ext_texture *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + + assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + unsigned extended; + + next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullDstRegisters[i].DstRegister.Indirect ); + assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); + + extended = inst->FullDstRegisters[i].DstRegister.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: + inst->FullDstRegisters[i].DstRegisterExtConcode = + *(struct tgsi_dst_register_ext_concode *) &token; + break; + + case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: + inst->FullDstRegisters[i].DstRegisterExtModulate = + *(struct tgsi_dst_register_ext_modulate *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + } + + assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + unsigned extended; + + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); + + extended = inst->FullSrcRegisters[i].SrcRegister.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: + inst->FullSrcRegisters[i].SrcRegisterExtSwz = + *(struct tgsi_src_register_ext_swz *) &token; + break; + + case TGSI_SRC_REGISTER_EXT_TYPE_MOD: + inst->FullSrcRegisters[i].SrcRegisterExtMod = + *(struct tgsi_src_register_ext_mod *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + + if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + } + + if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); + + /* + * No support for multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); + + if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + } + } + } + + break; + } + + default: + assert( 0 ); + } +} + + +unsigned +tgsi_num_tokens(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context ctx; + if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) { + unsigned len = (ctx.FullHeader.Header.HeaderSize + + ctx.FullHeader.Header.BodySize + + 1); + return len; + } + return 0; +} + + +/** + * Make a new copy of a token array. + */ +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens) +{ + unsigned n = tgsi_num_tokens(tokens); + unsigned bytes = n * sizeof(struct tgsi_token); + struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes); + if (new_tokens) + memcpy(new_tokens, tokens, bytes); + return new_tokens; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h new file mode 100644 index 00000000000..054350712d8 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -0,0 +1,151 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PARSE_H +#define TGSI_PARSE_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_full_version +{ + struct tgsi_version Version; +}; + +struct tgsi_full_header +{ + struct tgsi_header Header; + struct tgsi_processor Processor; +}; + +struct tgsi_full_dst_register +{ + struct tgsi_dst_register DstRegister; + struct tgsi_dst_register_ext_concode DstRegisterExtConcode; + struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; +}; + +struct tgsi_full_src_register +{ + struct tgsi_src_register SrcRegister; + struct tgsi_src_register_ext_swz SrcRegisterExtSwz; + struct tgsi_src_register_ext_mod SrcRegisterExtMod; + struct tgsi_src_register SrcRegisterInd; + struct tgsi_dimension SrcRegisterDim; + struct tgsi_src_register SrcRegisterDimInd; +}; + +struct tgsi_full_declaration +{ + struct tgsi_declaration Declaration; + struct tgsi_declaration_range DeclarationRange; + struct tgsi_declaration_semantic Semantic; +}; + +struct tgsi_full_immediate +{ + struct tgsi_immediate Immediate; + union + { + const void *Pointer; + const struct tgsi_immediate_float32 *ImmediateFloat32; + } u; +}; + +#define TGSI_FULL_MAX_DST_REGISTERS 2 +#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ + +struct tgsi_full_instruction +{ + struct tgsi_instruction Instruction; + struct tgsi_instruction_ext_nv InstructionExtNv; + struct tgsi_instruction_ext_label InstructionExtLabel; + struct tgsi_instruction_ext_texture InstructionExtTexture; + struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; + struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; +}; + +union tgsi_full_token +{ + struct tgsi_token Token; + struct tgsi_full_declaration FullDeclaration; + struct tgsi_full_immediate FullImmediate; + struct tgsi_full_instruction FullInstruction; +}; + +void +tgsi_full_token_init( + union tgsi_full_token *full_token ); + +void +tgsi_full_token_free( + union tgsi_full_token *full_token ); + +struct tgsi_parse_context +{ + const struct tgsi_token *Tokens; + unsigned Position; + struct tgsi_full_version FullVersion; + struct tgsi_full_header FullHeader; + union tgsi_full_token FullToken; +}; + +#define TGSI_PARSE_OK 0 +#define TGSI_PARSE_ERROR 1 + +unsigned +tgsi_parse_init( + struct tgsi_parse_context *ctx, + const struct tgsi_token *tokens ); + +void +tgsi_parse_free( + struct tgsi_parse_context *ctx ); + +boolean +tgsi_parse_end_of_tokens( + struct tgsi_parse_context *ctx ); + +void +tgsi_parse_token( + struct tgsi_parse_context *ctx ); + +unsigned +tgsi_num_tokens(const struct tgsi_token *tokens); + +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_PARSE_H */ + diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c new file mode 100644 index 00000000000..2e3ec96b5b5 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -0,0 +1,341 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_sanity.h" +#include "tgsi_iterate.h" + +#define MAX_REGISTERS 256 + +typedef uint reg_flag; + +#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) + +struct sanity_check_ctx +{ + struct tgsi_iterate_context iter; + + reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + boolean regs_ind_used[TGSI_FILE_COUNT]; + uint num_imms; + uint num_instructions; + uint index_of_END; + + uint errors; + uint warnings; +}; + +static void +report_error( + struct sanity_check_ctx *ctx, + const char *format, + ... ) +{ + va_list args; + + debug_printf( "Error : " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); + ctx->errors++; +} + +static void +report_warning( + struct sanity_check_ctx *ctx, + const char *format, + ... ) +{ + va_list args; + + debug_printf( "Warning: " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); + ctx->warnings++; +} + +static boolean +check_file_name( + struct sanity_check_ctx *ctx, + uint file ) +{ + if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { + report_error( ctx, "Invalid register file name" ); + return FALSE; + } + return TRUE; +} + +static boolean +is_register_declared( + struct sanity_check_ctx *ctx, + uint file, + int index ) +{ + assert( index >= 0 && index < MAX_REGISTERS ); + + return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +} + +static boolean +is_any_register_declared( + struct sanity_check_ctx *ctx, + uint file ) +{ + uint i; + + for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++) + if (ctx->regs_decl[file][i]) + return TRUE; + return FALSE; +} + +static boolean +is_register_used( + struct sanity_check_ctx *ctx, + uint file, + int index ) +{ + assert( index < MAX_REGISTERS ); + + return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +} + +static const char *file_names[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static boolean +check_register_usage( + struct sanity_check_ctx *ctx, + uint file, + int index, + const char *name, + boolean indirect_access ) +{ + if (!check_file_name( ctx, file )) + return FALSE; + if (indirect_access) { + if (!is_any_register_declared( ctx, file )) + report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); + ctx->regs_ind_used[file] = TRUE; + } + else { + if (!is_register_declared( ctx, file, index )) + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); + ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + } + return TRUE; +} + +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint i; + + /* There must be no other instructions after END. + */ + if (ctx->index_of_END != ~0) { + report_error( ctx, "Unexpected instruction after END" ); + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + ctx->index_of_END = ctx->num_instructions; + } + + /* Check destination and source registers' validity. + * Mark the registers as used. + */ + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + check_register_usage( + ctx, + inst->FullDstRegisters[i].DstRegister.File, + inst->FullDstRegisters[i].DstRegister.Index, + "destination", + FALSE ); + } + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + check_register_usage( + ctx, + inst->FullSrcRegisters[i].SrcRegister.File, + inst->FullSrcRegisters[i].SrcRegister.Index, + "source", + (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); + if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { + uint file; + int index; + + file = inst->FullSrcRegisters[i].SrcRegisterInd.File; + index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; + check_register_usage( + ctx, + file, + index, + "indirect", + FALSE ); + if (file != TGSI_FILE_ADDRESS || index != 0) + report_warning( ctx, "Indirect register not ADDR[0]" ); + } + } + + ctx->num_instructions++; + + return TRUE; +} + +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint file; + uint i; + + /* No declarations allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but declaration found" ); + + /* Check registers' validity. + * Mark the registers as declared. + */ + file = decl->Declaration.File; + if (!check_file_name( ctx, file )) + return TRUE; + for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { + if (is_register_declared( ctx, file, i )) + report_error( ctx, "The same register declared twice" ); + ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); + } + + return TRUE; +} + +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + + assert( ctx->num_imms < MAX_REGISTERS ); + + /* No immediates allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but immediate found" ); + + /* Mark the register as declared. + */ + ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + ctx->num_imms++; + + /* Check data type validity. + */ + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + report_error( ctx, "Invalid immediate data type" ); + return TRUE; + } + + return TRUE; +} + +static boolean +epilog( + struct tgsi_iterate_context *iter ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint file; + + /* There must be an END instruction at the end. + */ + if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) { + report_error( ctx, "Expected END at end of instruction sequence" ); + } + + /* Check if all declared registers were used. + */ + for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { + uint i; + + for (i = 0; i < MAX_REGISTERS; i++) { + if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { + report_warning( ctx, "Register never used" ); + } + } + } + + /* Print totals, if any. + */ + if (ctx->errors || ctx->warnings) + debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings ); + + return TRUE; +} + +boolean +tgsi_sanity_check( + struct tgsi_token *tokens ) +{ + struct sanity_check_ctx ctx; + + ctx.iter.prolog = NULL; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.epilog = epilog; + + memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); + memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); + memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); + ctx.num_imms = 0; + ctx.num_instructions = 0; + ctx.index_of_END = ~0; + + ctx.errors = 0; + ctx.warnings = 0; + + if (!tgsi_iterate_shader( tokens, &ctx.iter )) + return FALSE; + + return ctx.errors == 0; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h new file mode 100644 index 00000000000..ca45e94c7ad --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SANITY_H +#define TGSI_SANITY_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +/* Check the given token stream for errors and common mistakes. + * Diagnostic messages are printed out to the debug output. + * Returns TRUE if there are no errors, even though there could be some warnings. + */ +boolean +tgsi_sanity_check( + struct tgsi_token *tokens ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_SANITY_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c new file mode 100644 index 00000000000..59bcf10b530 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -0,0 +1,226 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program scan utility. + * Used to determine which registers and instructions are used by a shader. + * + * Authors: Brian Paul + */ + + +#include "tgsi_scan.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" + +#include "pipe/p_util.h" + + + +/** + */ +void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info) +{ + uint procType, i; + struct tgsi_parse_context parse; + + memset(info, 0, sizeof(*info)); + for (i = 0; i < TGSI_FILE_COUNT; i++) + info->file_max[i] = -1; + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); + return; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + info->num_tokens++; + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + uint file = fulldecl->Declaration.File; + uint i; + for (i = fulldecl->DeclarationRange.First; + i <= fulldecl->DeclarationRange.Last; + i++) { + + /* only first 32 regs will appear in this bitfield */ + info->file_mask[file] |= (1 << i); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)i); + + if (file == TGSI_FILE_INPUT) { + info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->num_inputs++; + } + + if (file == TGSI_FILE_OUTPUT) { + info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->num_outputs++; + } + + /* special case */ + if (procType == TGSI_PROCESSOR_FRAGMENT && + file == TGSI_FILE_OUTPUT && + fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + info->writes_z = TRUE; + } + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + info->immediate_count++; + break; + + default: + assert( 0 ); + } + } + + assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); + assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); + + info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || + info->opcode_count[TGSI_OPCODE_KILP]); + + tgsi_parse_free (&parse); +} + + + +/** + * Check if the given shader is a "passthrough" shader consisting of only + * MOV instructions of the form: MOV OUT[n], IN[n] + * + */ +boolean +tgsi_is_passthrough_shader(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n"); + return FALSE; + } + + /** + ** Loop over incoming program tokens/instructions + */ + while (!tgsi_parse_end_of_tokens(&parse)) { + + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst = + &parse.FullToken.FullInstruction; + const struct tgsi_full_src_register *src = + &fullinst->FullSrcRegisters[0]; + const struct tgsi_full_dst_register *dst = + &fullinst->FullDstRegisters[0]; + + /* Do a whole bunch of checks for a simple move */ + if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || + src->SrcRegister.File != TGSI_FILE_INPUT || + dst->DstRegister.File != TGSI_FILE_OUTPUT || + src->SrcRegister.Index != dst->DstRegister.Index || + + src->SrcRegister.Negate || + src->SrcRegisterExtMod.Negate || + src->SrcRegisterExtMod.Absolute || + src->SrcRegisterExtMod.Scale2X || + src->SrcRegisterExtMod.Bias || + src->SrcRegisterExtMod.Complement || + + src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || + + src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W || + + dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) + { + tgsi_parse_free(&parse); + return FALSE; + } + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + /* fall-through */ + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* fall-through */ + default: + ; /* no-op */ + } + } + + tgsi_parse_free(&parse); + + /* if we get here, it's a pass-through shader */ + return TRUE; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h new file mode 100644 index 00000000000..5cb6efb3439 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SCAN_H +#define TGSI_SCAN_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + + +/** + * Shader summary info + */ +struct tgsi_shader_info +{ + uint num_tokens; + + /* XXX eventually remove the corresponding fields from pipe_shader_state: */ + ubyte num_inputs; + ubyte num_outputs; + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ + uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ + int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ + + uint immediate_count; /**< number of immediates declared */ + + uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + + boolean writes_z; /**< does fragment shader write Z value? */ + boolean uses_kill; /**< KIL or KILP instruction used? */ +}; + + +extern void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info); + + +extern boolean +tgsi_is_passthrough_shader(const struct tgsi_token *tokens); + + +#endif /* TGSI_SCAN_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c new file mode 100644 index 00000000000..0cb1f11ef20 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -0,0 +1,2275 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" +#include "tgsi_sse2.h" + +#include "rtasm/rtasm_x86sse.h" + +#ifdef PIPE_ARCH_X86 + +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 + + +#define FOR_EACH_CHANNEL( CHAN )\ + for( CHAN = 0; CHAN < 4; CHAN++ ) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if( IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +/** + * X86 utility functions. + */ + +static struct x86_reg +make_xmm( + unsigned xmm ) +{ + return x86_make_reg( + file_XMM, + (enum x86_reg_name) xmm ); +} + +/** + * X86 register mapping helpers. + */ + +static struct x86_reg +get_const_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_CX ); +} + +static struct x86_reg +get_input_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_AX ); +} + +static struct x86_reg +get_output_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DX ); +} + +static struct x86_reg +get_temp_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_BX ); +} + +static struct x86_reg +get_coef_base( void ) +{ + return get_output_base(); +} + +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + +/** + * Data access helpers. + */ + + +static struct x86_reg +get_immediate( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_immediate_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_const( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_const_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_input( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_input_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_output( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_output_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_temp( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_temp_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_coef( + unsigned vec, + unsigned chan, + unsigned member ) +{ + return x86_make_disp( + get_coef_base(), + ((vec * 3 + member) * 4 + chan) * 4 ); +} + + +static void +emit_ret( + struct x86_function *func ) +{ + x86_ret( func ); +} + + +/** + * Data fetch helpers. + */ + +/** + * Copy a shader constant to xmm register + * \param xmm the destination xmm register + * \param vec the src const buffer index + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_const( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_const( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + +/** + * Copy a shader input to xmm register + * \param xmm the destination xmm register + * \param vec the src input attrib + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_inputf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + make_xmm( xmm ), + get_input( vec, chan ) ); +} + +/** + * Store an xmm register to a shader output + * \param xmm the source xmm register + * \param vec the dest output attrib + * \param chan src dest channel to store (X, Y, Z or W) + */ +static void +emit_output( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_output( vec, chan ), + make_xmm( xmm ) ); +} + +/** + * Copy a shader temporary to xmm register + * \param xmm the destination xmm register + * \param vec the src temp register + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_tempf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + make_xmm( xmm ), + get_temp( vec, chan ) ); +} + +/** + * Load an xmm register with an input attrib coefficient (a0, dadx or dady) + * \param xmm the destination xmm register + * \param vec the src input/attribute coefficient index + * \param chan src channel to fetch (X, Y, Z or W) + * \param member 0=a0, 1=dadx, 2=dady + */ +static void +emit_coef( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan, + unsigned member ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_coef( vec, chan, member ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +/** + * Data store helpers. + */ + +static void +emit_inputs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_input( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_temps( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + get_temp( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_addrs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_temps( + func, + xmm, + vec + TGSI_EXEC_NUM_TEMPS, + chan ); +} + +/** + * Coefficent fetch helpers. + */ + +static void +emit_coef_a0( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 0 ); +} + +static void +emit_coef_dadx( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 1 ); +} + +static void +emit_coef_dady( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 2 ); +} + +/** + * Function call helpers. + */ + +static void +emit_push_gp( + struct x86_function *func ) +{ + x86_push( + func, + x86_make_reg( file_REG32, reg_AX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX) ); +} + +static void +x86_pop_gp( + struct x86_function *func ) +{ + /* Restore GP registers in a reverse order. + */ + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX) ); +} + +static void +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_dst, + void (PIPE_CDECL *code)() ) +{ + sse_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + + emit_push_gp( + func ); + + { + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + x86_pop(func, ecx ); + } + + + x86_pop_gp( + func ); + + sse_movaps( + func, + make_xmm( xmm_dst ), + get_temp( TEMP_R0, 0 ) ); +} + +static void +emit_func_call_dst_src( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src, + void (PIPE_CDECL *code)() ) +{ + sse_movaps( + func, + get_temp( TEMP_R0, 1 ), + make_xmm( xmm_src ) ); + + emit_func_call_dst( + func, + xmm_dst, + code ); +} + +/** + * Low-level instruction translators. + */ + +static void +emit_abs( + struct x86_function *func, + unsigned xmm ) +{ + sse_andps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_7FFFFFFF_I, + TGSI_EXEC_TEMP_7FFFFFFF_C ) ); +} + +static void +emit_add( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_addps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void PIPE_CDECL +cos4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = cosf( store[X + 0] ); + store[X + 1] = cosf( store[X + 1] ); + store[X + 2] = cosf( store[X + 2] ); + store[X + 3] = cosf( store[X + 3] ); +} + +static void +emit_cos( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + cos4f ); +} + +static void PIPE_CDECL +ex24f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = powf( 2.0f, store[X + 0] ); + store[X + 1] = powf( 2.0f, store[X + 1] ); + store[X + 2] = powf( 2.0f, store[X + 2] ); + store[X + 3] = powf( 2.0f, store[X + 3] ); +} + +static void +emit_ex2( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + ex24f ); +} + +static void +emit_f2it( + struct x86_function *func, + unsigned xmm ) +{ + sse2_cvttps2dq( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); +} + +static void PIPE_CDECL +flr4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = floorf( store[X + 0] ); + store[X + 1] = floorf( store[X + 1] ); + store[X + 2] = floorf( store[X + 2] ); + store[X + 3] = floorf( store[X + 3] ); +} + +static void +emit_flr( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + flr4f ); +} + +static void PIPE_CDECL +frc4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] -= floorf( store[X + 0] ); + store[X + 1] -= floorf( store[X + 1] ); + store[X + 2] -= floorf( store[X + 2] ); + store[X + 3] -= floorf( store[X + 3] ); +} + +static void +emit_frc( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + frc4f ); +} + +static void PIPE_CDECL +lg24f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = LOG2( store[X + 0] ); + store[X + 1] = LOG2( store[X + 1] ); + store[X + 2] = LOG2( store[X + 2] ); + store[X + 3] = LOG2( store[X + 3] ); +} + +static void +emit_lg2( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + lg24f ); +} + +static void +emit_MOV( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_movups( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_mul (struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src) +{ + sse_mulps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_neg( + struct x86_function *func, + unsigned xmm ) +{ + sse_xorps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +pow4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = powf( store[X + 0], store[X + 4] ); + store[X + 1] = powf( store[X + 1], store[X + 5] ); + store[X + 2] = powf( store[X + 2], store[X + 6] ); + store[X + 3] = powf( store[X + 3], store[X + 7] ); +} + +static void +emit_pow( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_func_call_dst_src( + func, + xmm_dst, + xmm_src, + pow4f ); +} + +static void +emit_rcp ( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. Need to either emit a proper divide or use the + * iterative technique described below in emit_rsqrt(). + */ + sse2_rcpps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_rsqrt( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ +#if HIGH_PRECISION + /* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); + } +#else + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. + */ + sse_rsqrtps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +#endif +} + +static void +emit_setsign( + struct x86_function *func, + unsigned xmm ) +{ + sse_orps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +sin4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = sinf( store[X + 0] ); + store[X + 1] = sinf( store[X + 1] ); + store[X + 2] = sinf( store[X + 2] ); + store[X + 3] = sinf( store[X + 3] ); +} + +static void +emit_sin (struct x86_function *func, + unsigned xmm_dst) +{ + emit_func_call_dst( + func, + xmm_dst, + sin4f ); +} + +static void +emit_sub( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_subps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +/** + * Register fetch. + */ + +static void +emit_fetch( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_src_register *reg, + const unsigned chan_index ) +{ + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( reg->SrcRegister.File ) { + case TGSI_FILE_CONSTANT: + emit_const( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_IMMEDIATE: + emit_immediate( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_INPUT: + emit_inputf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_TEMPORARY: + emit_tempf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + break; + + case TGSI_EXTSWIZZLE_ONE: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + break; + + default: + assert( 0 ); + } + + switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { + case TGSI_UTIL_SIGN_CLEAR: + emit_abs( func, xmm ); + break; + + case TGSI_UTIL_SIGN_SET: + emit_setsign( func, xmm ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + emit_neg( func, xmm ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } +} + +#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ + emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + +/** + * Register store. + */ + +static void +emit_store( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index ) +{ + switch( reg->DstRegister.File ) { + case TGSI_FILE_OUTPUT: + emit_output( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_TEMPORARY: + emit_temps( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + default: + assert( 0 ); + } + + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +} + +#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ + emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +/** + * High-level instruction translators. + */ + +static void +emit_kil( + struct x86_function *func, + const struct tgsi_full_src_register *reg ) +{ + unsigned uniquemask; + unsigned registers[4]; + unsigned nextregister = 0; + unsigned firstchan = ~0; + unsigned chan_index; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + FOR_EACH_CHANNEL( chan_index ) { + unsigned swizzle; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle( + reg, + chan_index ); + + /* check if the component has not been already tested */ + if( !(uniquemask & (1 << swizzle)) ) { + uniquemask |= 1 << swizzle; + + /* allocate register */ + registers[chan_index] = nextregister; + emit_fetch( + func, + nextregister, + reg, + chan_index ); + nextregister++; + + /* mark the first channel used */ + if( firstchan == ~0 ) { + firstchan = chan_index; + } + } + } + + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX ) ); + + FOR_EACH_CHANNEL( chan_index ) { + if( uniquemask & (1 << chan_index) ) { + sse_cmpps( + func, + make_xmm( registers[chan_index] ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + + if( chan_index == firstchan ) { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_AX ), + make_xmm( registers[chan_index] ) ); + } + else { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_DX ), + make_xmm( registers[chan_index] ) ); + x86_or( + func, + x86_make_reg( file_REG32, reg_AX ), + x86_make_reg( file_REG32, reg_DX ) ); + } + } + } + + x86_or( + func, + get_temp( + TGSI_EXEC_TEMP_KILMASK_I, + TGSI_EXEC_TEMP_KILMASK_C ), + x86_make_reg( file_REG32, reg_AX ) ); + + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + +static void +emit_setcc( + struct x86_function *func, + struct tgsi_full_instruction *inst, + enum sse_cc cc ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + make_xmm( 1 ), + cc ); + sse_andps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static void +emit_cmp( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + sse_andps( + func, + make_xmm( 1 ), + make_xmm( 0 ) ); + sse_andnps( + func, + make_xmm( 0 ), + make_xmm( 2 ) ); + sse_orps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static int +emit_instruction( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + switch( inst->Instruction.Opcode ) { + case TGSI_OPCODE_ARL: +#if 0 + /* XXX this isn't working properly (see glean vertProg1 test) */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } +#else + return 0; +#endif + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C); + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + STORE( func, *inst, 0, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( func, *inst, 0, 0, CHAN_W ); + } + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_maxps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + /* XMM[1] = SrcReg[0].yyyy */ + FETCH( func, *inst, 1, 0, CHAN_Y ); + /* XMM[1] = max(XMM[1], 0) */ + sse_maxps( + func, + make_xmm( 1 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + /* XMM[2] = SrcReg[0].wwww */ + FETCH( func, *inst, 2, 0, CHAN_W ); + /* XMM[2] = min(XMM[2], 128.0) */ + sse_minps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_128_I, + TGSI_EXEC_TEMP_128_C ) ); + /* XMM[2] = max(XMM[2], -128.0) */ + sse_maxps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_MINUS_128_I, + TGSI_EXEC_TEMP_MINUS_128_C ) ); + emit_pow( func, 1, 2 ); + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_xorps( + func, + make_xmm( 2 ), + make_xmm( 2 ) ); + sse_cmpps( + func, + make_xmm( 2 ), + make_xmm( 0 ), + cc_LessThanEqual ); + sse_andps( + func, + make_xmm( 2 ), + make_xmm( 1 ) ); + STORE( func, *inst, 2, 0, CHAN_Z ); + } + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rcp( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rsqrt( func, 1, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 1, 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + return 0; + break; + + case TGSI_OPCODE_LOG: + return 0; + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_add( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul(func, 1, 2 ); + emit_add(func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_W ); + FETCH( func, *inst, 2, 1, CHAN_W ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 1, 1, CHAN_Y ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, 0, CHAN_Z ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, 1, CHAN_W ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_minps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_maxps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + emit_setcc( func, inst, cc_LessThan ); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + emit_setcc( func, inst, cc_NotLessThan ); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_sub( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_sub( func, 1, 2 ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CND: + return 0; + break; + + case TGSI_OPCODE_CND0: + return 0; + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + return 0; + break; + + case TGSI_OPCODE_INDEX: + return 0; + break; + + case TGSI_OPCODE_NEGATE: + return 0; + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_frc( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + return 0; + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_flr( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + return 0; + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_ex2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_lg2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_pow( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 1, 1, CHAN_Z ); + FETCH( func, *inst, 3, 0, CHAN_Z ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 4, 1, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_MOV( func, 2, 0 ); + emit_mul( func, 2, 1 ); + emit_MOV( func, 5, 3 ); + emit_mul( func, 5, 4 ); + emit_sub( func, 2, 5 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 2, 1, CHAN_X ); + FETCH( func, *inst, 5, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + emit_mul( func, 3, 2 ); + emit_mul( func, 1, 5 ); + emit_sub( func, 3, 1 ); + STORE( func, *inst, 3, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_mul( func, 5, 4 ); + emit_mul( func, 0, 2 ); + emit_sub( func, 5, 0 ); + STORE( func, *inst, 5, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + return 0; + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_abs( func, 0) ; + + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RCC: + return 0; + break; + + case TGSI_OPCODE_DPH: + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 1, CHAN_W ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + return 0; + break; + + case TGSI_OPCODE_DDY: + return 0; + break; + + case TGSI_OPCODE_KIL: + emit_kil( func, &inst->FullSrcRegisters[0] ); + break; + + case TGSI_OPCODE_PK2H: + return 0; + break; + + case TGSI_OPCODE_PK2US: + return 0; + break; + + case TGSI_OPCODE_PK4B: + return 0; + break; + + case TGSI_OPCODE_PK4UB: + return 0; + break; + + case TGSI_OPCODE_RFL: + return 0; + break; + + case TGSI_OPCODE_SEQ: + return 0; + break; + + case TGSI_OPCODE_SFL: + return 0; + break; + + case TGSI_OPCODE_SGT: + return 0; + break; + + case TGSI_OPCODE_SIN: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + return 0; + break; + + case TGSI_OPCODE_SNE: + return 0; + break; + + case TGSI_OPCODE_STR: + return 0; + break; + + case TGSI_OPCODE_TEX: + if (0) { + /* Disable dummy texture code: + */ + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + } + else { + return 0; + } + break; + + case TGSI_OPCODE_TXD: + return 0; + break; + + case TGSI_OPCODE_UP2H: + return 0; + break; + + case TGSI_OPCODE_UP2US: + return 0; + break; + + case TGSI_OPCODE_UP4B: + return 0; + break; + + case TGSI_OPCODE_UP4UB: + return 0; + break; + + case TGSI_OPCODE_X2D: + return 0; + break; + + case TGSI_OPCODE_ARA: + return 0; + break; + + case TGSI_OPCODE_ARR: + return 0; + break; + + case TGSI_OPCODE_BRA: + return 0; + break; + + case TGSI_OPCODE_CAL: + return 0; + break; + + case TGSI_OPCODE_RET: + emit_ret( func ); + break; + + case TGSI_OPCODE_END: + break; + + case TGSI_OPCODE_SSG: + return 0; + break; + + case TGSI_OPCODE_CMP: + emit_cmp (func, inst); + break; + + case TGSI_OPCODE_SCS: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_TXB: + return 0; + break; + + case TGSI_OPCODE_NRM: + return 0; + break; + + case TGSI_OPCODE_DIV: + return 0; + break; + + case TGSI_OPCODE_DP2: + return 0; + break; + + case TGSI_OPCODE_TXL: + return 0; + break; + + case TGSI_OPCODE_BRK: + return 0; + break; + + case TGSI_OPCODE_IF: + return 0; + break; + + case TGSI_OPCODE_LOOP: + return 0; + break; + + case TGSI_OPCODE_REP: + return 0; + break; + + case TGSI_OPCODE_ELSE: + return 0; + break; + + case TGSI_OPCODE_ENDIF: + return 0; + break; + + case TGSI_OPCODE_ENDLOOP: + return 0; + break; + + case TGSI_OPCODE_ENDREP: + return 0; + break; + + case TGSI_OPCODE_PUSHA: + return 0; + break; + + case TGSI_OPCODE_POPA: + return 0; + break; + + case TGSI_OPCODE_CEIL: + return 0; + break; + + case TGSI_OPCODE_I2F: + return 0; + break; + + case TGSI_OPCODE_NOT: + return 0; + break; + + case TGSI_OPCODE_TRUNC: + return 0; + break; + + case TGSI_OPCODE_SHL: + return 0; + break; + + case TGSI_OPCODE_SHR: + return 0; + break; + + case TGSI_OPCODE_AND: + return 0; + break; + + case TGSI_OPCODE_OR: + return 0; + break; + + case TGSI_OPCODE_MOD: + return 0; + break; + + case TGSI_OPCODE_XOR: + return 0; + break; + + case TGSI_OPCODE_SAD: + return 0; + break; + + case TGSI_OPCODE_TXF: + return 0; + break; + + case TGSI_OPCODE_TXQ: + return 0; + break; + + case TGSI_OPCODE_CONT: + return 0; + break; + + case TGSI_OPCODE_EMIT: + return 0; + break; + + case TGSI_OPCODE_ENDPRIM: + return 0; + break; + + default: + return 0; + } + + return 1; +} + +static void +emit_declaration( + struct x86_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } + } +} + +static void aos_to_soa( struct x86_function *func, + uint arg_aos, + uint arg_soa, + uint arg_num, + uint arg_stride ) +{ + struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); + struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); + struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); + int inner_loop; + + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); + x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); + x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); + x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); + } + /* while --num_inputs */ + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_input ); +} + +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + int inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, aos_output ); + + x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); + x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, x86_fn_arg( func, stride ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); + x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); + } + /* while --num_outputs */ + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_output ); +} + +/** + * Translate a TGSI vertex/fragment shader to SSE2 code. + * Slightly different things are done for vertex vs. fragment shaders. + * + * Note that fragment shaders are responsible for interpolating shader + * inputs. Because on x86 we have only 4 GP registers, and here we + * have 5 shader arguments (input, output, const, temp and coef), the + * code is split into two phases -- DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff + * argument, as outputs are not needed in the DECLARATION phase. + * + * \param tokens the TGSI input shader + * \param func the output SSE code/function + * \param immediates buffer to place immediates, later passed to SSE func + * \param return 1 for success, 0 if translation failed + */ +unsigned +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + struct tgsi_parse_context parse; + boolean instruction_phase = FALSE; + unsigned ok = 1; + uint num_immediates = 0; + + func->csr = func->store; + + tgsi_parse_init( &parse, tokens ); + + /* Can't just use EDI, EBX without save/restoring them: + */ + x86_push( + func, + get_immediate_base() ); + + x86_push( + func, + get_temp_base() ); + + + /* + * Different function args for vertex/fragment shaders: + */ + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + /* skipping outputs argument here */ + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_coef_base(), + x86_fn_arg( func, 5 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 6 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + if (do_swizzles) + aos_to_soa( func, + 6, /* aos_input */ + 1, /* machine->input */ + 7, /* num_inputs */ + 8 ); /* input_stride */ + + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 5 ) ); + } + + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + } + } + + ok = emit_instruction( + func, + &parse.FullToken.FullInstruction ); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } +#if 0 + debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", + num_immediates, + immediates[num_immediates][0], + immediates[num_immediates][1], + immediates[num_immediates][2], + immediates[num_immediates][3]); +#endif + num_immediates++; + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 9, 2, 10, 11 ); + } + + /* Can't just use EBX, EDI without save/restoring them: + */ + x86_pop( + func, + get_temp_base() ); + + x86_pop( + func, + get_immediate_base() ); + + emit_ret( func ); + + tgsi_parse_free( &parse ); + + return ok; +} + +#endif /* PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h new file mode 100644 index 00000000000..af838b2a25b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SSE2_H +#define TGSI_SSE2_H + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct x86_function; + +unsigned +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *function, + float (*immediates)[4], + boolean do_swizzles ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_SSE2_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c new file mode 100644 index 00000000000..35cb3055bb2 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -0,0 +1,1221 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_text.h" +#include "tgsi_build.h" +#include "tgsi_parse.h" +#include "tgsi_sanity.h" +#include "tgsi_util.h" + +static boolean is_alpha_underscore( const char *cur ) +{ + return + (*cur >= 'a' && *cur <= 'z') || + (*cur >= 'A' && *cur <= 'Z') || + *cur == '_'; +} + +static boolean is_digit( const char *cur ) +{ + return *cur >= '0' && *cur <= '9'; +} + +static boolean is_digit_alpha_underscore( const char *cur ) +{ + return is_digit( cur ) || is_alpha_underscore( cur ); +} + +static boolean str_match_no_case( const char **pcur, const char *str ) +{ + const char *cur = *pcur; + + while (*str != '\0' && *str == toupper( *cur )) { + str++; + cur++; + } + if (*str == '\0') { + *pcur = cur; + return TRUE; + } + return FALSE; +} + +/* Eat zero or more whitespaces. + */ +static void eat_opt_white( const char **pcur ) +{ + while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n') + (*pcur)++; +} + +/* Eat one or more whitespaces. + * Return TRUE if at least one whitespace eaten. + */ +static boolean eat_white( const char **pcur ) +{ + const char *cur = *pcur; + + eat_opt_white( pcur ); + return *pcur > cur; +} + +/* Parse unsigned integer. + * No checks for overflow. + */ +static boolean parse_uint( const char **pcur, uint *val ) +{ + const char *cur = *pcur; + + if (is_digit( cur )) { + *val = *cur++ - '0'; + while (is_digit( cur )) + *val = *val * 10 + *cur++ - '0'; + *pcur = cur; + return TRUE; + } + return FALSE; +} + +/* Parse floating point. + */ +static boolean parse_float( const char **pcur, float *val ) +{ + const char *cur = *pcur; + boolean integral_part = FALSE; + boolean fractional_part = FALSE; + + *val = (float) atof( cur ); + + if (*cur == '-' || *cur == '+') + cur++; + if (is_digit( cur )) { + cur++; + integral_part = TRUE; + while (is_digit( cur )) + cur++; + } + if (*cur == '.') { + cur++; + if (is_digit( cur )) { + cur++; + fractional_part = TRUE; + while (is_digit( cur )) + cur++; + } + } + if (!integral_part && !fractional_part) + return FALSE; + if (toupper( *cur ) == 'E') { + cur++; + if (*cur == '-' || *cur == '+') + cur++; + if (is_digit( cur )) { + cur++; + while (is_digit( cur )) + cur++; + } + else + return FALSE; + } + *pcur = cur; + return TRUE; +} + +struct translate_ctx +{ + const char *text; + const char *cur; + struct tgsi_token *tokens; + struct tgsi_token *tokens_cur; + struct tgsi_token *tokens_end; + struct tgsi_header *header; +}; + +static void report_error( struct translate_ctx *ctx, const char *msg ) +{ + debug_printf( "\nError: %s", msg ); +} + +/* Parse shader header. + * Return TRUE for one of the following headers. + * FRAG1.1 + * GEOM1.1 + * VERT1.1 + */ +static boolean parse_header( struct translate_ctx *ctx ) +{ + uint processor; + + if (str_match_no_case( &ctx->cur, "FRAG1.1" )) + processor = TGSI_PROCESSOR_FRAGMENT; + else if (str_match_no_case( &ctx->cur, "VERT1.1" )) + processor = TGSI_PROCESSOR_VERTEX; + else if (str_match_no_case( &ctx->cur, "GEOM1.1" )) + processor = TGSI_PROCESSOR_GEOMETRY; + else { + report_error( ctx, "Unknown header" ); + return FALSE; + } + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version(); + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + ctx->header = (struct tgsi_header *) ctx->tokens_cur++; + *ctx->header = tgsi_build_header(); + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); + + return TRUE; +} + +static boolean parse_label( struct translate_ctx *ctx, uint *val ) +{ + const char *cur = ctx->cur; + + if (parse_uint( &cur, val )) { + eat_opt_white( &cur ); + if (*cur == ':') { + cur++; + ctx->cur = cur; + return TRUE; + } + } + return FALSE; +} + +static const char *file_names[TGSI_FILE_COUNT] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static boolean +parse_file( const char **pcur, uint *file ) +{ + uint i; + + for (i = 0; i < TGSI_FILE_COUNT; i++) { + const char *cur = *pcur; + + if (str_match_no_case( &cur, file_names[i] )) { + if (!is_digit_alpha_underscore( cur )) { + *pcur = cur; + *file = i; + return TRUE; + } + } + } + return FALSE; +} + +static boolean +parse_opt_writemask( + struct translate_ctx *ctx, + uint *writemask ) +{ + const char *cur; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '.') { + cur++; + *writemask = TGSI_WRITEMASK_NONE; + eat_opt_white( &cur ); + if (toupper( *cur ) == 'X') { + cur++; + *writemask |= TGSI_WRITEMASK_X; + } + if (toupper( *cur ) == 'Y') { + cur++; + *writemask |= TGSI_WRITEMASK_Y; + } + if (toupper( *cur ) == 'Z') { + cur++; + *writemask |= TGSI_WRITEMASK_Z; + } + if (toupper( *cur ) == 'W') { + cur++; + *writemask |= TGSI_WRITEMASK_W; + } + + if (*writemask == TGSI_WRITEMASK_NONE) { + report_error( ctx, "Writemask expected" ); + return FALSE; + } + + ctx->cur = cur; + } + else { + *writemask = TGSI_WRITEMASK_XYZW; + } + return TRUE; +} + +/* <register_file_bracket> ::= <file> `[' + */ +static boolean +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse destination register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_dst( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* Parse source register operand. + * <register_src> ::= <register_file_bracket_index> `]' | + * <register_file_bracket> <register_dst> `]' | + * <register_file_bracket> <register_dst> `+' <uint> `]' | + * <register_file_bracket> <register_dst> `-' <uint> `]' + */ +static boolean +parse_register_src( + struct translate_ctx *ctx, + uint *file, + int *index, + uint *ind_file, + int *ind_index ) +{ + const char *cur; + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + cur = ctx->cur; + if (parse_file( &cur, ind_file )) { + if (!parse_register_dst( ctx, ind_file, ind_index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur == '+' || *ctx->cur == '-') { + boolean negate; + + negate = *ctx->cur == '-'; + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + if (negate) + *index = -(int) uindex; + else + *index = (int) uindex; + } + else { + *index = 0; + } + } + else { + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + *ind_file = TGSI_FILE_NULL; + *ind_index = 0; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* Parse register declaration. + * <register_dcl> ::= <register_file_bracket_index> `]' | + * <register_file_bracket_index> `..' <index> `]' + */ +static boolean +parse_register_dcl( + struct translate_ctx *ctx, + uint *file, + int *first, + int *last ) +{ + if (!parse_register_file_bracket_index( ctx, file, first )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { + uint uindex; + + ctx->cur += 2; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + *last = (int) uindex; + eat_opt_white( &ctx->cur ); + } + else { + *last = *first; + } + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]' or `..'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +static const char *modulate_names[TGSI_MODULATE_COUNT] = +{ + "_1X", + "_2X", + "_4X", + "_8X", + "_D2", + "_D4", + "_D8" +}; + +static boolean +parse_dst_operand( + struct translate_ctx *ctx, + struct tgsi_full_dst_register *dst ) +{ + uint file; + int index; + uint writemask; + const char *cur; + + if (!parse_register_dst( ctx, &file, &index )) + return FALSE; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '_') { + uint i; + + for (i = 0; i < TGSI_MODULATE_COUNT; i++) { + if (str_match_no_case( &cur, modulate_names[i] )) { + if (!is_digit_alpha_underscore( cur )) { + dst->DstRegisterExtModulate.Modulate = i; + ctx->cur = cur; + break; + } + } + } + } + + if (!parse_opt_writemask( ctx, &writemask )) + return FALSE; + + dst->DstRegister.File = file; + dst->DstRegister.Index = index; + dst->DstRegister.WriteMask = writemask; + return TRUE; +} + +static boolean +parse_optional_swizzle( + struct translate_ctx *ctx, + uint swizzle[4], + boolean *parsed_swizzle, + boolean *parsed_extswizzle ) +{ + const char *cur = ctx->cur; + + *parsed_swizzle = FALSE; + *parsed_extswizzle = FALSE; + + eat_opt_white( &cur ); + if (*cur == '.') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < 4; i++) { + if (toupper( *cur ) == 'X') + swizzle[i] = TGSI_SWIZZLE_X; + else if (toupper( *cur ) == 'Y') + swizzle[i] = TGSI_SWIZZLE_Y; + else if (toupper( *cur ) == 'Z') + swizzle[i] = TGSI_SWIZZLE_Z; + else if (toupper( *cur ) == 'W') + swizzle[i] = TGSI_SWIZZLE_W; + else { + if (*cur == '0') + swizzle[i] = TGSI_EXTSWIZZLE_ZERO; + else if (*cur == '1') + swizzle[i] = TGSI_EXTSWIZZLE_ONE; + else { + report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); + return FALSE; + } + *parsed_extswizzle = TRUE; + } + cur++; + } + *parsed_swizzle = TRUE; + ctx->cur = cur; + } + return TRUE; +} + +static boolean +parse_src_operand( + struct translate_ctx *ctx, + struct tgsi_full_src_register *src ) +{ + const char *cur; + float value; + uint file; + int index; + uint ind_file; + int ind_index; + uint swizzle[4]; + boolean parsed_swizzle; + boolean parsed_extswizzle; + + if (*ctx->cur == '-') { + cur = ctx->cur; + cur++; + eat_opt_white( &cur ); + if (*cur == '(') { + cur++; + src->SrcRegisterExtMod.Negate = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (*ctx->cur == '|') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegisterExtMod.Absolute = 1; + } + + if (*ctx->cur == '-') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegister.Negate = 1; + } + + cur = ctx->cur; + if (parse_float( &cur, &value )) { + if (value == 2.0f) { + eat_opt_white( &cur ); + if (*cur != '*') { + report_error( ctx, "Expected `*'" ); + return FALSE; + } + cur++; + if (*cur != '(') { + report_error( ctx, "Expected `('" ); + return FALSE; + } + cur++; + src->SrcRegisterExtMod.Scale2X = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (*ctx->cur == '(') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegisterExtMod.Bias = 1; + } + + cur = ctx->cur; + if (parse_float( &cur, &value )) { + if (value == 1.0f) { + eat_opt_white( &cur ); + if (*cur != '-') { + report_error( ctx, "Expected `-'" ); + return FALSE; + } + cur++; + if (*cur != '(') { + report_error( ctx, "Expected `('" ); + return FALSE; + } + cur++; + src->SrcRegisterExtMod.Complement = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index )) + return FALSE; + src->SrcRegister.File = file; + src->SrcRegister.Index = index; + if (ind_file != TGSI_FILE_NULL) { + src->SrcRegister.Indirect = 1; + src->SrcRegisterInd.File = ind_file; + src->SrcRegisterInd.Index = ind_index; + } + + /* Parse optional swizzle. + */ + if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) { + if (parsed_extswizzle) { + assert( parsed_swizzle ); + + src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0]; + src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1]; + src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2]; + src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3]; + } + else if (parsed_swizzle) { + src->SrcRegister.SwizzleX = swizzle[0]; + src->SrcRegister.SwizzleY = swizzle[1]; + src->SrcRegister.SwizzleZ = swizzle[2]; + src->SrcRegister.SwizzleW = swizzle[3]; + } + } + + if (src->SrcRegisterExtMod.Complement) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Bias) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '-') { + report_error( ctx, "Expected `-'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_float( &ctx->cur, &value )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + if (value != 0.5f) { + report_error( ctx, "Expected 0.5" ); + return FALSE; + } + } + + if (src->SrcRegisterExtMod.Scale2X) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Absolute) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '|') { + report_error( ctx, "Expected `|'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Negate) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + return TRUE; +} + +struct opcode_info +{ + uint num_dst; + uint num_src; + uint is_tex; + uint is_branch; + const char *mnemonic; +}; + +static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = +{ + { 1, 1, 0, 0, "ARL" }, + { 1, 1, 0, 0, "MOV" }, + { 1, 1, 0, 0, "LIT" }, + { 1, 1, 0, 0, "RCP" }, + { 1, 1, 0, 0, "RSQ" }, + { 1, 1, 0, 0, "EXP" }, + { 1, 1, 0, 0, "LOG" }, + { 1, 2, 0, 0, "MUL" }, + { 1, 2, 0, 0, "ADD" }, + { 1, 2, 0, 0, "DP3" }, + { 1, 2, 0, 0, "DP4" }, + { 1, 2, 0, 0, "DST" }, + { 1, 2, 0, 0, "MIN" }, + { 1, 2, 0, 0, "MAX" }, + { 1, 2, 0, 0, "SLT" }, + { 1, 2, 0, 0, "SGE" }, + { 1, 3, 0, 0, "MAD" }, + { 1, 2, 0, 0, "SUB" }, + { 1, 3, 0, 0, "LERP" }, + { 1, 3, 0, 0, "CND" }, + { 1, 3, 0, 0, "CND0" }, + { 1, 3, 0, 0, "DOT2ADD" }, + { 1, 2, 0, 0, "INDEX" }, + { 1, 1, 0, 0, "NEGATE" }, + { 1, 1, 0, 0, "FRAC" }, + { 1, 3, 0, 0, "CLAMP" }, + { 1, 1, 0, 0, "FLOOR" }, + { 1, 1, 0, 0, "ROUND" }, + { 1, 1, 0, 0, "EXPBASE2" }, + { 1, 1, 0, 0, "LOGBASE2" }, + { 1, 2, 0, 0, "POWER" }, + { 1, 2, 0, 0, "CROSSPRODUCT" }, + { 1, 2, 0, 0, "MULTIPLYMATRIX" }, + { 1, 1, 0, 0, "ABS" }, + { 1, 1, 0, 0, "RCC" }, + { 1, 2, 0, 0, "DPH" }, + { 1, 1, 0, 0, "COS" }, + { 1, 1, 0, 0, "DDX" }, + { 1, 1, 0, 0, "DDY" }, + { 0, 1, 0, 0, "KILP" }, + { 1, 1, 0, 0, "PK2H" }, + { 1, 1, 0, 0, "PK2US" }, + { 1, 1, 0, 0, "PK4B" }, + { 1, 1, 0, 0, "PK4UB" }, + { 1, 2, 0, 0, "RFL" }, + { 1, 2, 0, 0, "SEQ" }, + { 1, 2, 0, 0, "SFL" }, + { 1, 2, 0, 0, "SGT" }, + { 1, 1, 0, 0, "SIN" }, + { 1, 2, 0, 0, "SLE" }, + { 1, 2, 0, 0, "SNE" }, + { 1, 2, 0, 0, "STR" }, + { 1, 2, 1, 0, "TEX" }, + { 1, 4, 1, 0, "TXD" }, + { 1, 2, 1, 0, "TXP" }, + { 1, 1, 0, 0, "UP2H" }, + { 1, 1, 0, 0, "UP2US" }, + { 1, 1, 0, 0, "UP4B" }, + { 1, 1, 0, 0, "UP4UB" }, + { 1, 3, 0, 0, "X2D" }, + { 1, 1, 0, 0, "ARA" }, + { 1, 1, 0, 0, "ARR" }, + { 0, 1, 0, 0, "BRA" }, + { 0, 0, 0, 1, "CAL" }, + { 0, 0, 0, 0, "RET" }, + { 1, 1, 0, 0, "SSG" }, + { 1, 3, 0, 0, "CMP" }, + { 1, 1, 0, 0, "SCS" }, + { 1, 2, 1, 0, "TXB" }, + { 1, 1, 0, 0, "NRM" }, + { 1, 2, 0, 0, "DIV" }, + { 1, 2, 0, 0, "DP2" }, + { 1, 2, 1, 0, "TXL" }, + { 0, 0, 0, 0, "BRK" }, + { 0, 1, 0, 1, "IF" }, + { 0, 0, 0, 0, "LOOP" }, + { 0, 1, 0, 0, "REP" }, + { 0, 0, 0, 1, "ELSE" }, + { 0, 0, 0, 0, "ENDIF" }, + { 0, 0, 0, 0, "ENDLOOP" }, + { 0, 0, 0, 0, "ENDREP" }, + { 0, 1, 0, 0, "PUSHA" }, + { 1, 0, 0, 0, "POPA" }, + { 1, 1, 0, 0, "CEIL" }, + { 1, 1, 0, 0, "I2F" }, + { 1, 1, 0, 0, "NOT" }, + { 1, 1, 0, 0, "TRUNC" }, + { 1, 2, 0, 0, "SHL" }, + { 1, 2, 0, 0, "SHR" }, + { 1, 2, 0, 0, "AND" }, + { 1, 2, 0, 0, "OR" }, + { 1, 2, 0, 0, "MOD" }, + { 1, 2, 0, 0, "XOR" }, + { 1, 3, 0, 0, "SAD" }, + { 1, 2, 1, 0, "TXF" }, + { 1, 2, 1, 0, "TXQ" }, + { 0, 0, 0, 0, "CONT" }, + { 0, 0, 0, 0, "EMIT" }, + { 0, 0, 0, 0, "ENDPRIM" }, + { 0, 0, 0, 1, "BGNLOOP2" }, + { 0, 0, 0, 0, "BGNSUB" }, + { 0, 0, 0, 1, "ENDLOOP2" }, + { 0, 0, 0, 0, "ENDSUB" }, + { 1, 1, 0, 0, "NOISE1" }, + { 1, 1, 0, 0, "NOISE2" }, + { 1, 1, 0, 0, "NOISE3" }, + { 1, 1, 0, 0, "NOISE4" }, + { 0, 0, 0, 0, "NOP" }, + { 1, 2, 0, 0, "M4X3" }, + { 1, 2, 0, 0, "M3X4" }, + { 1, 2, 0, 0, "M3X3" }, + { 1, 2, 0, 0, "M3X2" }, + { 1, 1, 0, 0, "NRM4" }, + { 0, 1, 0, 0, "CALLNZ" }, + { 0, 1, 0, 0, "IFC" }, + { 0, 1, 0, 0, "BREAKC" }, + { 0, 0, 0, 0, "KIL" }, + { 0, 0, 0, 0, "END" }, + { 1, 1, 0, 0, "SWZ" } +}; + +static const char *texture_names[TGSI_TEXTURE_COUNT] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" +}; + +static boolean +parse_instruction( + struct translate_ctx *ctx, + boolean has_label ) +{ + uint i; + uint saturate = TGSI_SAT_NONE; + const struct opcode_info *info; + struct tgsi_full_instruction inst; + uint advance; + + /* Parse instruction name. + */ + eat_opt_white( &ctx->cur ); + for (i = 0; i < TGSI_OPCODE_LAST; i++) { + const char *cur = ctx->cur; + + info = &opcode_info[i]; + if (str_match_no_case( &cur, info->mnemonic )) { + if (str_match_no_case( &cur, "_SATNV" )) + saturate = TGSI_SAT_MINUS_PLUS_ONE; + else if (str_match_no_case( &cur, "_SAT" )) + saturate = TGSI_SAT_ZERO_ONE; + + if (info->num_dst + info->num_src + info->is_tex == 0) { + if (!is_digit_alpha_underscore( cur )) { + ctx->cur = cur; + break; + } + } + else if (*cur == '\0' || eat_white( &cur )) { + ctx->cur = cur; + break; + } + } + } + if (i == TGSI_OPCODE_LAST) { + if (has_label) + report_error( ctx, "Unknown opcode" ); + else + report_error( ctx, "Expected `DCL', `IMM' or a label" ); + return FALSE; + } + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = i; + inst.Instruction.Saturate = saturate; + inst.Instruction.NumDstRegs = info->num_dst; + inst.Instruction.NumSrcRegs = info->num_src; + + /* Parse instruction operands. + */ + for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) { + if (i > 0) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + + if (i < info->num_dst) { + if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) + return FALSE; + } + else if (i < info->num_dst + info->num_src) { + if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) + return FALSE; + } + else { + uint j; + + for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { + if (str_match_no_case( &ctx->cur, texture_names[j] )) { + if (!is_digit_alpha_underscore( ctx->cur )) { + inst.InstructionExtTexture.Texture = j; + break; + } + } + } + if (j == TGSI_TEXTURE_COUNT) { + report_error( ctx, "Expected texture target" ); + return FALSE; + } + } + } + + if (info->is_branch) { + uint target; + + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ':') { + report_error( ctx, "Expected `:'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &target )) { + report_error( ctx, "Expected a label" ); + return FALSE; + } + inst.InstructionExtLabel.Label = target; + } + + advance = tgsi_build_full_instruction( + &inst, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static const char *semantic_names[TGSI_SEMANTIC_COUNT] = +{ + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", + "NORMAL" +}; + +static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = +{ + "CONSTANT", + "LINEAR", + "PERSPECTIVE" +}; + +static boolean parse_declaration( struct translate_ctx *ctx ) +{ + struct tgsi_full_declaration decl; + uint file; + int first; + int last; + uint writemask; + const char *cur; + uint advance; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!parse_register_dcl( ctx, &file, &first, &last )) + return FALSE; + if (!parse_opt_writemask( ctx, &writemask )) + return FALSE; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = file; + decl.Declaration.UsageMask = writemask; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { + if (str_match_no_case( &cur, semantic_names[i] )) { + const char *cur2 = cur; + uint index; + + if (is_digit_alpha_underscore( cur )) + continue; + eat_opt_white( &cur2 ); + if (*cur2 == '[') { + cur2++; + eat_opt_white( &cur2 ); + if (!parse_uint( &cur2, &index )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &cur2 ); + if (*cur2 != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + cur2++; + + decl.Semantic.SemanticIndex = index; + + cur = cur2; + } + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = i; + + ctx->cur = cur; + break; + } + } + } + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) { + if (str_match_no_case( &cur, interpolate_names[i] )) { + if (is_digit_alpha_underscore( cur )) + continue; + decl.Declaration.Interpolate = i; + + ctx->cur = cur; + break; + } + } + if (i == TGSI_INTERPOLATE_COUNT) { + report_error( ctx, "Expected semantic or interpolate attribute" ); + return FALSE; + } + } + + advance = tgsi_build_full_declaration( + &decl, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static boolean parse_immediate( struct translate_ctx *ctx ) +{ + struct tgsi_full_immediate imm; + uint i; + float values[4]; + uint advance; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { + report_error( ctx, "Expected `FLT32'" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '{') { + report_error( ctx, "Expected `{'" ); + return FALSE; + } + ctx->cur++; + for (i = 0; i < 4; i++) { + eat_opt_white( &ctx->cur ); + if (i > 0) { + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + if (!parse_float( &ctx->cur, &values[i] )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + report_error( ctx, "Expected `}'" ); + return FALSE; + } + ctx->cur++; + + imm = tgsi_default_full_immediate(); + imm.Immediate.Size += 4; + imm.Immediate.DataType = TGSI_IMM_FLOAT32; + imm.u.Pointer = values; + + advance = tgsi_build_full_immediate( + &imm, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static boolean translate( struct translate_ctx *ctx ) +{ + eat_opt_white( &ctx->cur ); + if (!parse_header( ctx )) + return FALSE; + + while (*ctx->cur != '\0') { + uint label_val = 0; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + + if (*ctx->cur == '\0') + break; + + if (parse_label( ctx, &label_val )) { + if (!parse_instruction( ctx, TRUE )) + return FALSE; + } + else if (str_match_no_case( &ctx->cur, "DCL" )) { + if (!parse_declaration( ctx )) + return FALSE; + } + else if (str_match_no_case( &ctx->cur, "IMM" )) { + if (!parse_immediate( ctx )) + return FALSE; + } + else if (!parse_instruction( ctx, FALSE )) { + return FALSE; + } + } + + return TRUE; +} + +boolean +tgsi_text_translate( + const char *text, + struct tgsi_token *tokens, + uint num_tokens ) +{ + struct translate_ctx ctx; + + ctx.text = text; + ctx.cur = text; + ctx.tokens = tokens; + ctx.tokens_cur = tokens; + ctx.tokens_end = tokens + num_tokens; + + if (!translate( &ctx )) + return FALSE; + + return tgsi_sanity_check( tokens ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.h b/src/gallium/auxiliary/tgsi/tgsi_text.h new file mode 100644 index 00000000000..8eeeeef1402 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_text.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TEXT_H +#define TGSI_TEXT_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +boolean +tgsi_text_translate( + const char *text, + struct tgsi_token *tokens, + uint num_tokens ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_TEXT_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c new file mode 100644 index 00000000000..357f77b05a6 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -0,0 +1,199 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program transformation utility. + * + * Authors: Brian Paul + */ + + +#include "tgsi_transform.h" + + + +static void +emit_instruction(struct tgsi_transform_context *ctx, + const struct tgsi_full_instruction *inst) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_instruction(inst, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + +static void +emit_declaration(struct tgsi_transform_context *ctx, + const struct tgsi_full_declaration *decl) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_declaration(decl, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + +static void +emit_immediate(struct tgsi_transform_context *ctx, + const struct tgsi_full_immediate *imm) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_immediate(imm, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + + +/** + * Apply user-defined transformations to the input shader to produce + * the output shader. + * For example, a register search-and-replace operation could be applied + * by defining a transform_instruction() callback that examined and changed + * the instruction src/dest regs. + * + * \return number of tokens emitted + */ +int +tgsi_transform_shader(const struct tgsi_token *tokens_in, + struct tgsi_token *tokens_out, + uint max_tokens_out, + struct tgsi_transform_context *ctx) +{ + uint procType; + + /* input shader */ + struct tgsi_parse_context parse; + + /* output shader */ + struct tgsi_processor *processor; + + + /** + ** callback context init + **/ + ctx->emit_instruction = emit_instruction; + ctx->emit_declaration = emit_declaration; + ctx->emit_immediate = emit_immediate; + ctx->tokens_out = tokens_out; + ctx->max_tokens_out = max_tokens_out; + + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n"); + return -1; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Setup output shader + **/ + *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version(); + + ctx->header = (struct tgsi_header *) (tokens_out + 1); + *ctx->header = tgsi_build_header(); + + processor = (struct tgsi_processor *) (tokens_out + 2); + *processor = tgsi_build_processor( procType, ctx->header ); + + ctx->ti = 3; + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + if (ctx->transform_instruction) + ctx->transform_instruction(ctx, fullinst); + else + ctx->emit_instruction(ctx, fullinst); + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + + if (ctx->transform_declaration) + ctx->transform_declaration(ctx, fulldecl); + else + ctx->emit_declaration(ctx, fulldecl); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *fullimm + = &parse.FullToken.FullImmediate; + + if (ctx->transform_immediate) + ctx->transform_immediate(ctx, fullimm); + else + ctx->emit_immediate(ctx, fullimm); + } + break; + + default: + assert( 0 ); + } + } + + if (ctx->epilog) { + ctx->epilog(ctx); + } + + tgsi_parse_free (&parse); + + return ctx->ti; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h new file mode 100644 index 00000000000..3da0b382711 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TRANSFORM_H +#define TGSI_TRANSFORM_H + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" + + + +/** + * Subclass this to add caller-specific data + */ +struct tgsi_transform_context +{ +/**** PUBLIC ***/ + + /** + * User-defined callbacks invoked per instruction. + */ + void (*transform_instruction)(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst); + + void (*transform_declaration)(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl); + + void (*transform_immediate)(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *imm); + + /** + * Called at end of input program to allow caller to append extra + * instructions. Return number of tokens emitted. + */ + void (*epilog)(struct tgsi_transform_context *ctx); + + +/*** PRIVATE ***/ + + /** + * These are setup by tgsi_transform_shader() and cannot be overridden. + * Meant to be called from in the above user callback functions. + */ + void (*emit_instruction)(struct tgsi_transform_context *ctx, + const struct tgsi_full_instruction *inst); + void (*emit_declaration)(struct tgsi_transform_context *ctx, + const struct tgsi_full_declaration *decl); + void (*emit_immediate)(struct tgsi_transform_context *ctx, + const struct tgsi_full_immediate *imm); + + struct tgsi_header *header; + uint max_tokens_out; + struct tgsi_token *tokens_out; + uint ti; +}; + + + +extern int +tgsi_transform_shader(const struct tgsi_token *tokens_in, + struct tgsi_token *tokens_out, + uint max_tokens_out, + struct tgsi_transform_context *ctx); + + +#endif /* TGSI_TRANSFORM_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c new file mode 100644 index 00000000000..09486e649e1 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -0,0 +1,300 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" +#include "tgsi_util.h" + +union pointer_hack +{ + void *pointer; + uint64_t uint64; +}; + +void * +tgsi_align_128bit( + void *unaligned ) +{ + union pointer_hack ph; + + ph.uint64 = 0; + ph.pointer = unaligned; + ph.uint64 = (ph.uint64 + 15) & ~15; + return ph.pointer; +} + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + swizzle ); + } + + return swizzle; +} + +void +tgsi_util_set_src_register_swizzle( + struct tgsi_src_register *reg, + unsigned swizzle, + unsigned component ) +{ + switch( component ) { + case 0: + reg->SwizzleX = swizzle; + break; + case 1: + reg->SwizzleY = swizzle; + break; + case 2: + reg->SwizzleZ = swizzle; + break; + case 3: + reg->SwizzleW = swizzle; + break; + default: + assert( 0 ); + } +} + +void +tgsi_util_set_src_register_extswizzle( + struct tgsi_src_register_ext_swz *reg, + unsigned swizzle, + unsigned component ) +{ + switch( component ) { + case 0: + reg->ExtSwizzleX = swizzle; + break; + case 1: + reg->ExtSwizzleY = swizzle; + break; + case 2: + reg->ExtSwizzleZ = swizzle; + break; + case 3: + reg->ExtSwizzleW = swizzle; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + assert( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} + +void +tgsi_util_set_full_src_register_sign_mode( + struct tgsi_full_src_register *reg, + unsigned sign_mode ) +{ + reg->SrcRegisterExtSwz.NegateX = 0; + reg->SrcRegisterExtSwz.NegateY = 0; + reg->SrcRegisterExtSwz.NegateZ = 0; + reg->SrcRegisterExtSwz.NegateW = 0; + + switch (sign_mode) + { + case TGSI_UTIL_SIGN_CLEAR: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 1; + reg->SrcRegisterExtMod.Negate = 0; + break; + + case TGSI_UTIL_SIGN_SET: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 1; + reg->SrcRegisterExtMod.Negate = 1; + break; + + case TGSI_UTIL_SIGN_TOGGLE: + reg->SrcRegister.Negate = 1; + reg->SrcRegisterExtMod.Absolute = 0; + reg->SrcRegisterExtMod.Negate = 0; + break; + + case TGSI_UTIL_SIGN_KEEP: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 0; + reg->SrcRegisterExtMod.Negate = 0; + break; + + default: + assert( 0 ); + } +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h new file mode 100644 index 00000000000..7877f345587 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_UTIL_H +#define TGSI_UTIL_H + +#if defined __cplusplus +extern "C" { +#endif + +void * +tgsi_align_128bit( + void *unaligned ); + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ); + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component); + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ); + +void +tgsi_util_set_src_register_swizzle( + struct tgsi_src_register *reg, + unsigned swizzle, + unsigned component ); + +void +tgsi_util_set_src_register_extswizzle( + struct tgsi_src_register_ext_swz *reg, + unsigned swizzle, + unsigned component ); + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ); + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ); + +#define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */ +#define TGSI_UTIL_SIGN_SET 1 /* Force negative */ +#define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */ +#define TGSI_UTIL_SIGN_KEEP 3 /* No change */ + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ); + +void +tgsi_util_set_full_src_register_sign_mode( + struct tgsi_full_src_register *reg, + unsigned sign_mode ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_UTIL_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c deleted file mode 100644 index 742ef14c352..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ /dev/null @@ -1,1324 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi_build.h" -#include "tgsi_parse.h" - -/* - * version - */ - -struct tgsi_version -tgsi_build_version( void ) -{ - struct tgsi_version version; - - version.MajorVersion = 1; - version.MinorVersion = 1; - version.Padding = 0; - - return version; -} - -/* - * header - */ - -struct tgsi_header -tgsi_build_header( void ) -{ - struct tgsi_header header; - - header.HeaderSize = 1; - header.BodySize = 0; - - return header; -} - -static void -header_headersize_grow( struct tgsi_header *header ) -{ - assert( header->HeaderSize < 0xFF ); - assert( header->BodySize == 0 ); - - header->HeaderSize++; -} - -static void -header_bodysize_grow( struct tgsi_header *header ) -{ - assert( header->BodySize < 0xFFFFFF ); - - header->BodySize++; -} - -struct tgsi_processor -tgsi_default_processor( void ) -{ - struct tgsi_processor processor; - - processor.Processor = TGSI_PROCESSOR_FRAGMENT; - processor.Padding = 0; - - return processor; -} - -struct tgsi_processor -tgsi_build_processor( - unsigned type, - struct tgsi_header *header ) -{ - struct tgsi_processor processor; - - processor = tgsi_default_processor(); - processor.Processor = type; - - header_headersize_grow( header ); - - return processor; -} - -/* - * declaration - */ - -struct tgsi_declaration -tgsi_default_declaration( void ) -{ - struct tgsi_declaration declaration; - - declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; - declaration.Size = 1; - declaration.File = TGSI_FILE_NULL; - declaration.UsageMask = TGSI_WRITEMASK_XYZW; - declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; - declaration.Semantic = 0; - declaration.Padding = 0; - declaration.Extended = 0; - - return declaration; -} - -struct tgsi_declaration -tgsi_build_declaration( - unsigned file, - unsigned usage_mask, - unsigned interpolate, - unsigned semantic, - struct tgsi_header *header ) -{ - struct tgsi_declaration declaration; - - assert( file <= TGSI_FILE_IMMEDIATE ); - assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE ); - - declaration = tgsi_default_declaration(); - declaration.File = file; - declaration.UsageMask = usage_mask; - declaration.Interpolate = interpolate; - declaration.Semantic = semantic; - - header_bodysize_grow( header ); - - return declaration; -} - -static void -declaration_grow( - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - assert( declaration->Size < 0xFF ); - - declaration->Size++; - - header_bodysize_grow( header ); -} - -struct tgsi_full_declaration -tgsi_default_full_declaration( void ) -{ - struct tgsi_full_declaration full_declaration; - - full_declaration.Declaration = tgsi_default_declaration(); - full_declaration.DeclarationRange = tgsi_default_declaration_range(); - full_declaration.Semantic = tgsi_default_declaration_semantic(); - - return full_declaration; -} - -unsigned -tgsi_build_full_declaration( - const struct tgsi_full_declaration *full_decl, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ) -{ - unsigned size = 0; - struct tgsi_declaration *declaration; - struct tgsi_declaration_range *dr; - - if( maxsize <= size ) - return 0; - declaration = (struct tgsi_declaration *) &tokens[size]; - size++; - - *declaration = tgsi_build_declaration( - full_decl->Declaration.File, - full_decl->Declaration.UsageMask, - full_decl->Declaration.Interpolate, - full_decl->Declaration.Semantic, - header ); - - if (maxsize <= size) - return 0; - dr = (struct tgsi_declaration_range *) &tokens[size]; - size++; - - *dr = tgsi_build_declaration_range( - full_decl->DeclarationRange.First, - full_decl->DeclarationRange.Last, - declaration, - header ); - - if( full_decl->Declaration.Semantic ) { - struct tgsi_declaration_semantic *ds; - - if( maxsize <= size ) - return 0; - ds = (struct tgsi_declaration_semantic *) &tokens[size]; - size++; - - *ds = tgsi_build_declaration_semantic( - full_decl->Semantic.SemanticName, - full_decl->Semantic.SemanticIndex, - declaration, - header ); - } - - return size; -} - -struct tgsi_declaration_range -tgsi_default_declaration_range( void ) -{ - struct tgsi_declaration_range dr; - - dr.First = 0; - dr.Last = 0; - - return dr; -} - -struct tgsi_declaration_range -tgsi_build_declaration_range( - unsigned first, - unsigned last, - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - struct tgsi_declaration_range declaration_range; - - assert( last >= first ); - assert( last <= 0xFFFF ); - - declaration_range = tgsi_default_declaration_range(); - declaration_range.First = first; - declaration_range.Last = last; - - declaration_grow( declaration, header ); - - return declaration_range; -} - -struct tgsi_declaration_semantic -tgsi_default_declaration_semantic( void ) -{ - struct tgsi_declaration_semantic ds; - - ds.SemanticName = TGSI_SEMANTIC_POSITION; - ds.SemanticIndex = 0; - ds.Padding = 0; - - return ds; -} - -struct tgsi_declaration_semantic -tgsi_build_declaration_semantic( - unsigned semantic_name, - unsigned semantic_index, - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - struct tgsi_declaration_semantic ds; - - assert( semantic_name <= TGSI_SEMANTIC_COUNT ); - assert( semantic_index <= 0xFFFF ); - - ds = tgsi_default_declaration_semantic(); - ds.SemanticName = semantic_name; - ds.SemanticIndex = semantic_index; - - declaration_grow( declaration, header ); - - return ds; -} - -/* - * immediate - */ - -struct tgsi_immediate -tgsi_default_immediate( void ) -{ - struct tgsi_immediate immediate; - - immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE; - immediate.Size = 1; - immediate.DataType = TGSI_IMM_FLOAT32; - immediate.Padding = 0; - immediate.Extended = 0; - - return immediate; -} - -struct tgsi_immediate -tgsi_build_immediate( - struct tgsi_header *header ) -{ - struct tgsi_immediate immediate; - - immediate = tgsi_default_immediate(); - - header_bodysize_grow( header ); - - return immediate; -} - -struct tgsi_full_immediate -tgsi_default_full_immediate( void ) -{ - struct tgsi_full_immediate fullimm; - - fullimm.Immediate = tgsi_default_immediate(); - fullimm.u.Pointer = (void *) 0; - - return fullimm; -} - -static void -immediate_grow( - struct tgsi_immediate *immediate, - struct tgsi_header *header ) -{ - assert( immediate->Size < 0xFF ); - - immediate->Size++; - - header_bodysize_grow( header ); -} - -struct tgsi_immediate_float32 -tgsi_build_immediate_float32( - float value, - struct tgsi_immediate *immediate, - struct tgsi_header *header ) -{ - struct tgsi_immediate_float32 immediate_float32; - - immediate_float32.Float = value; - - immediate_grow( immediate, header ); - - return immediate_float32; -} - -unsigned -tgsi_build_full_immediate( - const struct tgsi_full_immediate *full_imm, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ) -{ - unsigned size = 0, i; - struct tgsi_immediate *immediate; - - if( maxsize <= size ) - return 0; - immediate = (struct tgsi_immediate *) &tokens[size]; - size++; - - *immediate = tgsi_build_immediate( header ); - - for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) { - struct tgsi_immediate_float32 *if32; - - if( maxsize <= size ) - return 0; - if32 = (struct tgsi_immediate_float32 *) &tokens[size]; - size++; - - *if32 = tgsi_build_immediate_float32( - full_imm->u.ImmediateFloat32[i].Float, - immediate, - header ); - } - - return size; -} - -/* - * instruction - */ - -struct tgsi_instruction -tgsi_default_instruction( void ) -{ - struct tgsi_instruction instruction; - - instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - instruction.Size = 1; - instruction.Opcode = TGSI_OPCODE_MOV; - instruction.Saturate = TGSI_SAT_NONE; - instruction.NumDstRegs = 1; - instruction.NumSrcRegs = 1; - instruction.Padding = 0; - instruction.Extended = 0; - - return instruction; -} - -struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ) -{ - struct tgsi_instruction instruction; - - assert (opcode <= TGSI_OPCODE_LAST); - assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE); - assert (num_dst_regs <= 3); - assert (num_src_regs <= 15); - - instruction = tgsi_default_instruction(); - instruction.Opcode = opcode; - instruction.Saturate = saturate; - instruction.NumDstRegs = num_dst_regs; - instruction.NumSrcRegs = num_src_regs; - - header_bodysize_grow( header ); - - return instruction; -} - -static void -instruction_grow( - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - assert (instruction->Size < 0xFF); - - instruction->Size++; - - header_bodysize_grow( header ); -} - -struct tgsi_full_instruction -tgsi_default_full_instruction( void ) -{ - struct tgsi_full_instruction full_instruction; - unsigned i; - - full_instruction.Instruction = tgsi_default_instruction(); - full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv(); - full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); - full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); - for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { - full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); - } - for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { - full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); - } - - return full_instruction; -} - -unsigned -tgsi_build_full_instruction( - const struct tgsi_full_instruction *full_inst, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ) -{ - unsigned size = 0; - unsigned i; - struct tgsi_instruction *instruction; - struct tgsi_token *prev_token; - - if( maxsize <= size ) - return 0; - instruction = (struct tgsi_instruction *) &tokens[size]; - size++; - - *instruction = tgsi_build_instruction( - full_inst->Instruction.Opcode, - full_inst->Instruction.Saturate, - full_inst->Instruction.NumDstRegs, - full_inst->Instruction.NumSrcRegs, - header ); - prev_token = (struct tgsi_token *) instruction; - - if( tgsi_compare_instruction_ext_nv( - full_inst->InstructionExtNv, - tgsi_default_instruction_ext_nv() ) ) { - struct tgsi_instruction_ext_nv *instruction_ext_nv; - - if( maxsize <= size ) - return 0; - instruction_ext_nv = - (struct tgsi_instruction_ext_nv *) &tokens[size]; - size++; - - *instruction_ext_nv = tgsi_build_instruction_ext_nv( - full_inst->InstructionExtNv.Precision, - full_inst->InstructionExtNv.CondDstIndex, - full_inst->InstructionExtNv.CondFlowIndex, - full_inst->InstructionExtNv.CondMask, - full_inst->InstructionExtNv.CondSwizzleX, - full_inst->InstructionExtNv.CondSwizzleY, - full_inst->InstructionExtNv.CondSwizzleZ, - full_inst->InstructionExtNv.CondSwizzleW, - full_inst->InstructionExtNv.CondDstUpdate, - full_inst->InstructionExtNv.CondFlowEnable, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_nv; - } - - if( tgsi_compare_instruction_ext_label( - full_inst->InstructionExtLabel, - tgsi_default_instruction_ext_label() ) ) { - struct tgsi_instruction_ext_label *instruction_ext_label; - - if( maxsize <= size ) - return 0; - instruction_ext_label = - (struct tgsi_instruction_ext_label *) &tokens[size]; - size++; - - *instruction_ext_label = tgsi_build_instruction_ext_label( - full_inst->InstructionExtLabel.Label, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_label; - } - - if( tgsi_compare_instruction_ext_texture( - full_inst->InstructionExtTexture, - tgsi_default_instruction_ext_texture() ) ) { - struct tgsi_instruction_ext_texture *instruction_ext_texture; - - if( maxsize <= size ) - return 0; - instruction_ext_texture = - (struct tgsi_instruction_ext_texture *) &tokens[size]; - size++; - - *instruction_ext_texture = tgsi_build_instruction_ext_texture( - full_inst->InstructionExtTexture.Texture, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_texture; - } - - for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { - const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; - struct tgsi_dst_register *dst_register; - struct tgsi_token *prev_token; - - if( maxsize <= size ) - return 0; - dst_register = (struct tgsi_dst_register *) &tokens[size]; - size++; - - *dst_register = tgsi_build_dst_register( - reg->DstRegister.File, - reg->DstRegister.WriteMask, - reg->DstRegister.Index, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register; - - if( tgsi_compare_dst_register_ext_concode( - reg->DstRegisterExtConcode, - tgsi_default_dst_register_ext_concode() ) ) { - struct tgsi_dst_register_ext_concode *dst_register_ext_concode; - - if( maxsize <= size ) - return 0; - dst_register_ext_concode = - (struct tgsi_dst_register_ext_concode *) &tokens[size]; - size++; - - *dst_register_ext_concode = tgsi_build_dst_register_ext_concode( - reg->DstRegisterExtConcode.CondMask, - reg->DstRegisterExtConcode.CondSwizzleX, - reg->DstRegisterExtConcode.CondSwizzleY, - reg->DstRegisterExtConcode.CondSwizzleZ, - reg->DstRegisterExtConcode.CondSwizzleW, - reg->DstRegisterExtConcode.CondSrcIndex, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register_ext_concode; - } - - if( tgsi_compare_dst_register_ext_modulate( - reg->DstRegisterExtModulate, - tgsi_default_dst_register_ext_modulate() ) ) { - struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; - - if( maxsize <= size ) - return 0; - dst_register_ext_modulate = - (struct tgsi_dst_register_ext_modulate *) &tokens[size]; - size++; - - *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( - reg->DstRegisterExtModulate.Modulate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register_ext_modulate; - } - } - - for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { - const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; - struct tgsi_src_register *src_register; - struct tgsi_token *prev_token; - - if( maxsize <= size ) - return 0; - src_register = (struct tgsi_src_register *) &tokens[size]; - size++; - - *src_register = tgsi_build_src_register( - reg->SrcRegister.File, - reg->SrcRegister.SwizzleX, - reg->SrcRegister.SwizzleY, - reg->SrcRegister.SwizzleZ, - reg->SrcRegister.SwizzleW, - reg->SrcRegister.Negate, - reg->SrcRegister.Indirect, - reg->SrcRegister.Dimension, - reg->SrcRegister.Index, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register; - - if( tgsi_compare_src_register_ext_swz( - reg->SrcRegisterExtSwz, - tgsi_default_src_register_ext_swz() ) ) { - struct tgsi_src_register_ext_swz *src_register_ext_swz; - - /* Use of the extended swizzle requires the simple swizzle to be identity. - */ - assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X ); - assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y ); - assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z ); - assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W ); - assert( reg->SrcRegister.Negate == FALSE ); - - if( maxsize <= size ) - return 0; - src_register_ext_swz = - (struct tgsi_src_register_ext_swz *) &tokens[size]; - size++; - - *src_register_ext_swz = tgsi_build_src_register_ext_swz( - reg->SrcRegisterExtSwz.ExtSwizzleX, - reg->SrcRegisterExtSwz.ExtSwizzleY, - reg->SrcRegisterExtSwz.ExtSwizzleZ, - reg->SrcRegisterExtSwz.ExtSwizzleW, - reg->SrcRegisterExtSwz.NegateX, - reg->SrcRegisterExtSwz.NegateY, - reg->SrcRegisterExtSwz.NegateZ, - reg->SrcRegisterExtSwz.NegateW, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_swz; - } - - if( tgsi_compare_src_register_ext_mod( - reg->SrcRegisterExtMod, - tgsi_default_src_register_ext_mod() ) ) { - struct tgsi_src_register_ext_mod *src_register_ext_mod; - - if( maxsize <= size ) - return 0; - src_register_ext_mod = - (struct tgsi_src_register_ext_mod *) &tokens[size]; - size++; - - *src_register_ext_mod = tgsi_build_src_register_ext_mod( - reg->SrcRegisterExtMod.Complement, - reg->SrcRegisterExtMod.Bias, - reg->SrcRegisterExtMod.Scale2X, - reg->SrcRegisterExtMod.Absolute, - reg->SrcRegisterExtMod.Negate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_mod; - } - - if( reg->SrcRegister.Indirect ) { - struct tgsi_src_register *ind; - - if( maxsize <= size ) - return 0; - ind = (struct tgsi_src_register *) &tokens[size]; - size++; - - *ind = tgsi_build_src_register( - reg->SrcRegisterInd.File, - reg->SrcRegisterInd.SwizzleX, - reg->SrcRegisterInd.SwizzleY, - reg->SrcRegisterInd.SwizzleZ, - reg->SrcRegisterInd.SwizzleW, - reg->SrcRegisterInd.Negate, - reg->SrcRegisterInd.Indirect, - reg->SrcRegisterInd.Dimension, - reg->SrcRegisterInd.Index, - instruction, - header ); - } - - if( reg->SrcRegister.Dimension ) { - struct tgsi_dimension *dim; - - assert( !reg->SrcRegisterDim.Dimension ); - - if( maxsize <= size ) - return 0; - dim = (struct tgsi_dimension *) &tokens[size]; - size++; - - *dim = tgsi_build_dimension( - reg->SrcRegisterDim.Indirect, - reg->SrcRegisterDim.Index, - instruction, - header ); - - if( reg->SrcRegisterDim.Indirect ) { - struct tgsi_src_register *ind; - - if( maxsize <= size ) - return 0; - ind = (struct tgsi_src_register *) &tokens[size]; - size++; - - *ind = tgsi_build_src_register( - reg->SrcRegisterDimInd.File, - reg->SrcRegisterDimInd.SwizzleX, - reg->SrcRegisterDimInd.SwizzleY, - reg->SrcRegisterDimInd.SwizzleZ, - reg->SrcRegisterDimInd.SwizzleW, - reg->SrcRegisterDimInd.Negate, - reg->SrcRegisterDimInd.Indirect, - reg->SrcRegisterDimInd.Dimension, - reg->SrcRegisterDimInd.Index, - instruction, - header ); - } - } - } - - return size; -} - -struct tgsi_instruction_ext_nv -tgsi_default_instruction_ext_nv( void ) -{ - struct tgsi_instruction_ext_nv instruction_ext_nv; - - instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV; - instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT; - instruction_ext_nv.CondDstIndex = 0; - instruction_ext_nv.CondFlowIndex = 0; - instruction_ext_nv.CondMask = TGSI_CC_TR; - instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X; - instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y; - instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z; - instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W; - instruction_ext_nv.CondDstUpdate = 0; - instruction_ext_nv.CondFlowEnable = 0; - instruction_ext_nv.Padding = 0; - instruction_ext_nv.Extended = 0; - - return instruction_ext_nv; -} - -union token_u32 -{ - unsigned u32; -}; - -unsigned -tgsi_compare_instruction_ext_nv( - struct tgsi_instruction_ext_nv a, - struct tgsi_instruction_ext_nv b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_instruction_ext_nv -tgsi_build_instruction_ext_nv( - unsigned precision, - unsigned cond_dst_index, - unsigned cond_flow_index, - unsigned cond_mask, - unsigned cond_swizzle_x, - unsigned cond_swizzle_y, - unsigned cond_swizzle_z, - unsigned cond_swizzle_w, - unsigned cond_dst_update, - unsigned cond_flow_update, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_ext_nv instruction_ext_nv; - - instruction_ext_nv = tgsi_default_instruction_ext_nv(); - instruction_ext_nv.Precision = precision; - instruction_ext_nv.CondDstIndex = cond_dst_index; - instruction_ext_nv.CondFlowIndex = cond_flow_index; - instruction_ext_nv.CondMask = cond_mask; - instruction_ext_nv.CondSwizzleX = cond_swizzle_x; - instruction_ext_nv.CondSwizzleY = cond_swizzle_y; - instruction_ext_nv.CondSwizzleZ = cond_swizzle_z; - instruction_ext_nv.CondSwizzleW = cond_swizzle_w; - instruction_ext_nv.CondDstUpdate = cond_dst_update; - instruction_ext_nv.CondFlowEnable = cond_flow_update; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return instruction_ext_nv; -} - -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ) -{ - struct tgsi_instruction_ext_label instruction_ext_label; - - instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; - instruction_ext_label.Label = 0; - instruction_ext_label.Padding = 0; - instruction_ext_label.Extended = 0; - - return instruction_ext_label; -} - -unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( - unsigned label, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_ext_label instruction_ext_label; - - instruction_ext_label = tgsi_default_instruction_ext_label(); - instruction_ext_label.Label = label; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return instruction_ext_label; -} - -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ) -{ - struct tgsi_instruction_ext_texture instruction_ext_texture; - - instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; - instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; - instruction_ext_texture.Padding = 0; - instruction_ext_texture.Extended = 0; - - return instruction_ext_texture; -} - -unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( - unsigned texture, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_ext_texture instruction_ext_texture; - - instruction_ext_texture = tgsi_default_instruction_ext_texture(); - instruction_ext_texture.Texture = texture; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return instruction_ext_texture; -} - -struct tgsi_src_register -tgsi_default_src_register( void ) -{ - struct tgsi_src_register src_register; - - src_register.File = TGSI_FILE_NULL; - src_register.SwizzleX = TGSI_SWIZZLE_X; - src_register.SwizzleY = TGSI_SWIZZLE_Y; - src_register.SwizzleZ = TGSI_SWIZZLE_Z; - src_register.SwizzleW = TGSI_SWIZZLE_W; - src_register.Negate = 0; - src_register.Indirect = 0; - src_register.Dimension = 0; - src_register.Index = 0; - src_register.Extended = 0; - - return src_register; -} - -struct tgsi_src_register -tgsi_build_src_register( - unsigned file, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - unsigned negate, - unsigned indirect, - unsigned dimension, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register src_register; - - assert( file <= TGSI_FILE_IMMEDIATE ); - assert( swizzle_x <= TGSI_SWIZZLE_W ); - assert( swizzle_y <= TGSI_SWIZZLE_W ); - assert( swizzle_z <= TGSI_SWIZZLE_W ); - assert( swizzle_w <= TGSI_SWIZZLE_W ); - assert( negate <= 1 ); - assert( index >= -0x8000 && index <= 0x7FFF ); - - src_register = tgsi_default_src_register(); - src_register.File = file; - src_register.SwizzleX = swizzle_x; - src_register.SwizzleY = swizzle_y; - src_register.SwizzleZ = swizzle_z; - src_register.SwizzleW = swizzle_w; - src_register.Negate = negate; - src_register.Indirect = indirect; - src_register.Dimension = dimension; - src_register.Index = index; - - instruction_grow( instruction, header ); - - return src_register; -} - -struct tgsi_full_src_register -tgsi_default_full_src_register( void ) -{ - struct tgsi_full_src_register full_src_register; - - full_src_register.SrcRegister = tgsi_default_src_register(); - full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz(); - full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); - full_src_register.SrcRegisterInd = tgsi_default_src_register(); - full_src_register.SrcRegisterDim = tgsi_default_dimension(); - full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); - - return full_src_register; -} - -struct tgsi_src_register_ext_swz -tgsi_default_src_register_ext_swz( void ) -{ - struct tgsi_src_register_ext_swz src_register_ext_swz; - - src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; - src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X; - src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y; - src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z; - src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W; - src_register_ext_swz.NegateX = 0; - src_register_ext_swz.NegateY = 0; - src_register_ext_swz.NegateZ = 0; - src_register_ext_swz.NegateW = 0; - src_register_ext_swz.Padding = 0; - src_register_ext_swz.Extended = 0; - - return src_register_ext_swz; -} - -unsigned -tgsi_compare_src_register_ext_swz( - struct tgsi_src_register_ext_swz a, - struct tgsi_src_register_ext_swz b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_src_register_ext_swz -tgsi_build_src_register_ext_swz( - unsigned ext_swizzle_x, - unsigned ext_swizzle_y, - unsigned ext_swizzle_z, - unsigned ext_swizzle_w, - unsigned negate_x, - unsigned negate_y, - unsigned negate_z, - unsigned negate_w, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_swz src_register_ext_swz; - - assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE ); - assert( negate_x <= 1 ); - assert( negate_y <= 1 ); - assert( negate_z <= 1 ); - assert( negate_w <= 1 ); - - src_register_ext_swz = tgsi_default_src_register_ext_swz(); - src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; - src_register_ext_swz.ExtSwizzleY = ext_swizzle_y; - src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z; - src_register_ext_swz.ExtSwizzleW = ext_swizzle_w; - src_register_ext_swz.NegateX = negate_x; - src_register_ext_swz.NegateY = negate_y; - src_register_ext_swz.NegateZ = negate_z; - src_register_ext_swz.NegateW = negate_w; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_swz; -} - -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; - src_register_ext_mod.Complement = 0; - src_register_ext_mod.Bias = 0; - src_register_ext_mod.Scale2X = 0; - src_register_ext_mod.Absolute = 0; - src_register_ext_mod.Negate = 0; - src_register_ext_mod.Padding = 0; - src_register_ext_mod.Extended = 0; - - return src_register_ext_mod; -} - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - assert( complement <= 1 ); - assert( bias <= 1 ); - assert( scale_2x <= 1 ); - assert( absolute <= 1 ); - assert( negate <= 1 ); - - src_register_ext_mod = tgsi_default_src_register_ext_mod(); - src_register_ext_mod.Complement = complement; - src_register_ext_mod.Bias = bias; - src_register_ext_mod.Scale2X = scale_2x; - src_register_ext_mod.Absolute = absolute; - src_register_ext_mod.Negate = negate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_mod; -} - -struct tgsi_dimension -tgsi_default_dimension( void ) -{ - struct tgsi_dimension dimension; - - dimension.Indirect = 0; - dimension.Dimension = 0; - dimension.Padding = 0; - dimension.Index = 0; - dimension.Extended = 0; - - return dimension; -} - -struct tgsi_dimension -tgsi_build_dimension( - unsigned indirect, - unsigned index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dimension dimension; - - dimension = tgsi_default_dimension(); - dimension.Indirect = indirect; - dimension.Index = index; - - instruction_grow( instruction, header ); - - return dimension; -} - -struct tgsi_dst_register -tgsi_default_dst_register( void ) -{ - struct tgsi_dst_register dst_register; - - dst_register.File = TGSI_FILE_NULL; - dst_register.WriteMask = TGSI_WRITEMASK_XYZW; - dst_register.Indirect = 0; - dst_register.Dimension = 0; - dst_register.Index = 0; - dst_register.Padding = 0; - dst_register.Extended = 0; - - return dst_register; -} - -struct tgsi_dst_register -tgsi_build_dst_register( - unsigned file, - unsigned mask, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register dst_register; - - assert( file <= TGSI_FILE_IMMEDIATE ); - assert( mask <= TGSI_WRITEMASK_XYZW ); - assert( index >= -32768 && index <= 32767 ); - - dst_register = tgsi_default_dst_register(); - dst_register.File = file; - dst_register.WriteMask = mask; - dst_register.Index = index; - - instruction_grow( instruction, header ); - - return dst_register; -} - -struct tgsi_full_dst_register -tgsi_default_full_dst_register( void ) -{ - struct tgsi_full_dst_register full_dst_register; - - full_dst_register.DstRegister = tgsi_default_dst_register(); - full_dst_register.DstRegisterExtConcode = - tgsi_default_dst_register_ext_concode(); - full_dst_register.DstRegisterExtModulate = - tgsi_default_dst_register_ext_modulate(); - - return full_dst_register; -} - -struct tgsi_dst_register_ext_concode -tgsi_default_dst_register_ext_concode( void ) -{ - struct tgsi_dst_register_ext_concode dst_register_ext_concode; - - dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE; - dst_register_ext_concode.CondMask = TGSI_CC_TR; - dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X; - dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y; - dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z; - dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W; - dst_register_ext_concode.CondSrcIndex = 0; - dst_register_ext_concode.Padding = 0; - dst_register_ext_concode.Extended = 0; - - return dst_register_ext_concode; -} - -unsigned -tgsi_compare_dst_register_ext_concode( - struct tgsi_dst_register_ext_concode a, - struct tgsi_dst_register_ext_concode b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_dst_register_ext_concode -tgsi_build_dst_register_ext_concode( - unsigned cc, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - int index, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register_ext_concode dst_register_ext_concode; - - assert( cc <= TGSI_CC_FL ); - assert( swizzle_x <= TGSI_SWIZZLE_W ); - assert( swizzle_y <= TGSI_SWIZZLE_W ); - assert( swizzle_z <= TGSI_SWIZZLE_W ); - assert( swizzle_w <= TGSI_SWIZZLE_W ); - assert( index >= -32768 && index <= 32767 ); - - dst_register_ext_concode = tgsi_default_dst_register_ext_concode(); - dst_register_ext_concode.CondMask = cc; - dst_register_ext_concode.CondSwizzleX = swizzle_x; - dst_register_ext_concode.CondSwizzleY = swizzle_y; - dst_register_ext_concode.CondSwizzleZ = swizzle_z; - dst_register_ext_concode.CondSwizzleW = swizzle_w; - dst_register_ext_concode.CondSrcIndex = index; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return dst_register_ext_concode; -} - -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ) -{ - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; - - dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; - dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; - dst_register_ext_modulate.Padding = 0; - dst_register_ext_modulate.Extended = 0; - - return dst_register_ext_modulate; -} - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; - - assert( modulate <= TGSI_MODULATE_EIGHTH ); - - dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); - dst_register_ext_modulate.Modulate = modulate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return dst_register_ext_modulate; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h deleted file mode 100644 index ed258302487..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_BUILD_H -#define TGSI_BUILD_H - -#if defined __cplusplus -extern "C" { -#endif - -/* - * version - */ - -struct tgsi_version -tgsi_build_version( void ); - -/* - * header - */ - -struct tgsi_header -tgsi_build_header( void ); - -struct tgsi_processor -tgsi_default_processor( void ); - -struct tgsi_processor -tgsi_build_processor( - unsigned processor, - struct tgsi_header *header ); - -/* - * declaration - */ - -struct tgsi_declaration -tgsi_default_declaration( void ); - -struct tgsi_declaration -tgsi_build_declaration( - unsigned file, - unsigned usage_mask, - unsigned interpolate, - unsigned semantic, - struct tgsi_header *header ); - -struct tgsi_full_declaration -tgsi_default_full_declaration( void ); - -unsigned -tgsi_build_full_declaration( - const struct tgsi_full_declaration *full_decl, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ); - -struct tgsi_declaration_range -tgsi_default_declaration_range( void ); - -struct tgsi_declaration_range -tgsi_build_declaration_range( - unsigned first, - unsigned last, - struct tgsi_declaration *declaration, - struct tgsi_header *header ); - -struct tgsi_declaration_semantic -tgsi_default_declaration_semantic( void ); - -struct tgsi_declaration_semantic -tgsi_build_declaration_semantic( - unsigned semantic_name, - unsigned semantic_index, - struct tgsi_declaration *declaration, - struct tgsi_header *header ); - -/* - * immediate - */ - -struct tgsi_immediate -tgsi_default_immediate( void ); - -struct tgsi_immediate -tgsi_build_immediate( - struct tgsi_header *header ); - -struct tgsi_full_immediate -tgsi_default_full_immediate( void ); - -struct tgsi_immediate_float32 -tgsi_build_immediate_float32( - float value, - struct tgsi_immediate *immediate, - struct tgsi_header *header ); - -unsigned -tgsi_build_full_immediate( - const struct tgsi_full_immediate *full_imm, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ); - -/* - * instruction - */ - -struct tgsi_instruction -tgsi_default_instruction( void ); - -struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ); - -struct tgsi_full_instruction -tgsi_default_full_instruction( void ); - -unsigned -tgsi_build_full_instruction( - const struct tgsi_full_instruction *full_inst, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ); - -struct tgsi_instruction_ext_nv -tgsi_default_instruction_ext_nv( void ); - -unsigned -tgsi_compare_instruction_ext_nv( - struct tgsi_instruction_ext_nv a, - struct tgsi_instruction_ext_nv b ); - -struct tgsi_instruction_ext_nv -tgsi_build_instruction_ext_nv( - unsigned precision, - unsigned cond_dst_index, - unsigned cond_flow_index, - unsigned cond_mask, - unsigned cond_swizzle_x, - unsigned cond_swizzle_y, - unsigned cond_swizzle_z, - unsigned cond_swizzle_w, - unsigned cond_dst_update, - unsigned cond_flow_update, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ); - -unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ); - -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( - unsigned label, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ); - -unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ); - -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( - unsigned texture, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_src_register -tgsi_default_src_register( void ); - -struct tgsi_src_register -tgsi_build_src_register( - unsigned file, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - unsigned negate, - unsigned indirect, - unsigned dimension, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_full_src_register -tgsi_default_full_src_register( void ); - -struct tgsi_src_register_ext_swz -tgsi_default_src_register_ext_swz( void ); - -unsigned -tgsi_compare_src_register_ext_swz( - struct tgsi_src_register_ext_swz a, - struct tgsi_src_register_ext_swz b ); - -struct tgsi_src_register_ext_swz -tgsi_build_src_register_ext_swz( - unsigned ext_swizzle_x, - unsigned ext_swizzle_y, - unsigned ext_swizzle_z, - unsigned ext_swizzle_w, - unsigned negate_x, - unsigned negate_y, - unsigned negate_z, - unsigned negate_w, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ); - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ); - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_dimension -tgsi_default_dimension( void ); - -struct tgsi_dimension -tgsi_build_dimension( - unsigned indirect, - unsigned index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_dst_register -tgsi_default_dst_register( void ); - -struct tgsi_dst_register -tgsi_build_dst_register( - unsigned file, - unsigned mask, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_full_dst_register -tgsi_default_full_dst_register( void ); - -struct tgsi_dst_register_ext_concode -tgsi_default_dst_register_ext_concode( void ); - -unsigned -tgsi_compare_dst_register_ext_concode( - struct tgsi_dst_register_ext_concode a, - struct tgsi_dst_register_ext_concode b ); - -struct tgsi_dst_register_ext_concode -tgsi_build_dst_register_ext_concode( - unsigned cc, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - int index, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ); - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ); - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_BUILD_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c deleted file mode 100644 index d2e6375212f..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ /dev/null @@ -1,582 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "tgsi_dump.h" -#include "tgsi_iterate.h" - -struct dump_ctx -{ - struct tgsi_iterate_context iter; - - uint instno; -}; - -static void -dump_enum( - uint e, - const char **enums, - uint enum_count ) -{ - if (e >= enum_count) - debug_printf( "%u", e ); - else - debug_printf( "%s", enums[e] ); -} - -#define EOL() debug_printf( "\n" ) -#define TXT(S) debug_printf( "%s", S ) -#define CHR(C) debug_printf( "%c", C ) -#define UIX(I) debug_printf( "0x%x", I ) -#define UID(I) debug_printf( "%u", I ) -#define SID(I) debug_printf( "%d", I ) -#define FLT(F) debug_printf( "%10.4f", F ) -#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) - -static const char *processor_type_names[] = -{ - "FRAG", - "VERT", - "GEOM" -}; - -static const char *file_names[] = -{ - "NULL", - "CONST", - "IN", - "OUT", - "TEMP", - "SAMP", - "ADDR", - "IMM" -}; - -static const char *interpolate_names[] = -{ - "CONSTANT", - "LINEAR", - "PERSPECTIVE" -}; - -static const char *semantic_names[] = -{ - "POSITION", - "COLOR", - "BCOLOR", - "FOG", - "PSIZE", - "GENERIC", - "NORMAL" -}; - -static const char *immediate_type_names[] = -{ - "FLT32" -}; - -static const char *opcode_names[TGSI_OPCODE_LAST] = -{ - "ARL", - "MOV", - "LIT", - "RCP", - "RSQ", - "EXP", - "LOG", - "MUL", - "ADD", - "DP3", - "DP4", - "DST", - "MIN", - "MAX", - "SLT", - "SGE", - "MAD", - "SUB", - "LERP", - "CND", - "CND0", - "DOT2ADD", - "INDEX", - "NEGATE", - "FRAC", - "CLAMP", - "FLOOR", - "ROUND", - "EXPBASE2", - "LOGBASE2", - "POWER", - "CROSSPRODUCT", - "MULTIPLYMATRIX", - "ABS", - "RCC", - "DPH", - "COS", - "DDX", - "DDY", - "KILP", - "PK2H", - "PK2US", - "PK4B", - "PK4UB", - "RFL", - "SEQ", - "SFL", - "SGT", - "SIN", - "SLE", - "SNE", - "STR", - "TEX", - "TXD", - "TXP", - "UP2H", - "UP2US", - "UP4B", - "UP4UB", - "X2D", - "ARA", - "ARR", - "BRA", - "CAL", - "RET", - "SSG", - "CMP", - "SCS", - "TXB", - "NRM", - "DIV", - "DP2", - "TXL", - "BRK", - "IF", - "LOOP", - "REP", - "ELSE", - "ENDIF", - "ENDLOOP", - "ENDREP", - "PUSHA", - "POPA", - "CEIL", - "I2F", - "NOT", - "TRUNC", - "SHL", - "SHR", - "AND", - "OR", - "MOD", - "XOR", - "SAD", - "TXF", - "TXQ", - "CONT", - "EMIT", - "ENDPRIM", - "BGNLOOP2", - "BGNSUB", - "ENDLOOP2", - "ENDSUB", - "NOISE1", - "NOISE2", - "NOISE3", - "NOISE4", - "NOP", - "M4X3", - "M3X4", - "M3X3", - "M3X2", - "NRM4", - "CALLNZ", - "IFC", - "BREAKC", - "KIL", - "END", - "SWZ" -}; - -static const char *swizzle_names[] = -{ - "x", - "y", - "z", - "w" -}; - -static const char *texture_names[] = -{ - "UNKNOWN", - "1D", - "2D", - "3D", - "CUBE", - "RECT", - "SHADOW1D", - "SHADOW2D", - "SHADOWRECT" -}; - -static const char *extswizzle_names[] = -{ - "x", - "y", - "z", - "w", - "0", - "1" -}; - -static const char *modulate_names[TGSI_MODULATE_COUNT] = -{ - "", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; - -static void -_dump_register_prefix( - uint file, - uint first, - uint last ) -{ - - -} - -static void -_dump_register( - uint file, - int first, - int last ) -{ - ENM( file, file_names ); - CHR( '[' ); - SID( first ); - if (first != last) { - TXT( ".." ); - SID( last ); - } - CHR( ']' ); -} - -static void -_dump_register_ind( - uint file, - int index, - uint ind_file, - int ind_index ) -{ - ENM( file, file_names ); - CHR( '[' ); - ENM( ind_file, file_names ); - CHR( '[' ); - SID( ind_index ); - CHR( ']' ); - if (index != 0) { - if (index > 0) - CHR( '+' ); - SID( index ); - } - CHR( ']' ); -} - -static void -_dump_writemask( - uint writemask ) -{ - if (writemask != TGSI_WRITEMASK_XYZW) { - CHR( '.' ); - if (writemask & TGSI_WRITEMASK_X) - CHR( 'x' ); - if (writemask & TGSI_WRITEMASK_Y) - CHR( 'y' ); - if (writemask & TGSI_WRITEMASK_Z) - CHR( 'z' ); - if (writemask & TGSI_WRITEMASK_W) - CHR( 'w' ); - } -} - -void -tgsi_dump_declaration( - const struct tgsi_full_declaration *decl ) -{ - TXT( "\nDCL " ); - - _dump_register( - decl->Declaration.File, - decl->DeclarationRange.First, - decl->DeclarationRange.Last ); - _dump_writemask( - decl->Declaration.UsageMask ); - - if (decl->Declaration.Semantic) { - TXT( ", " ); - ENM( decl->Semantic.SemanticName, semantic_names ); - if (decl->Semantic.SemanticIndex != 0 || - decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { - CHR( '[' ); - UID( decl->Semantic.SemanticIndex ); - CHR( ']' ); - } - } - - TXT( ", " ); - ENM( decl->Declaration.Interpolate, interpolate_names ); -} - -static boolean -iter_declaration( - struct tgsi_iterate_context *iter, - struct tgsi_full_declaration *decl ) -{ - tgsi_dump_declaration( decl ); - return TRUE; -} - -void -tgsi_dump_immediate( - const struct tgsi_full_immediate *imm ) -{ - uint i; - - TXT( "\nIMM " ); - ENM( imm->Immediate.DataType, immediate_type_names ); - - TXT( " { " ); - for (i = 0; i < imm->Immediate.Size - 1; i++) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - FLT( imm->u.ImmediateFloat32[i].Float ); - break; - default: - assert( 0 ); - } - - if (i < imm->Immediate.Size - 2) - TXT( ", " ); - } - TXT( " }" ); -} - -static boolean -iter_immediate( - struct tgsi_iterate_context *iter, - struct tgsi_full_immediate *imm ) -{ - tgsi_dump_immediate( imm ); - return TRUE; -} - -void -tgsi_dump_instruction( - const struct tgsi_full_instruction *inst, - uint instno ) -{ - uint i; - boolean first_reg = TRUE; - - EOL(); - UID( instno ); - CHR( ':' ); - ENM( inst->Instruction.Opcode, opcode_names ); - - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - TXT( "_SAT" ); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - TXT( "_SATNV" ); - break; - default: - assert( 0 ); - } - - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - - if (!first_reg) - CHR( ',' ); - CHR( ' ' ); - - _dump_register( - dst->DstRegister.File, - dst->DstRegister.Index, - dst->DstRegister.Index ); - ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); - _dump_writemask( dst->DstRegister.WriteMask ); - - first_reg = FALSE; - } - - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - - if (!first_reg) - CHR( ',' ); - CHR( ' ' ); - - if (src->SrcRegisterExtMod.Negate) - TXT( "-(" ); - if (src->SrcRegisterExtMod.Absolute) - CHR( '|' ); - if (src->SrcRegisterExtMod.Scale2X) - TXT( "2*(" ); - if (src->SrcRegisterExtMod.Bias) - CHR( '(' ); - if (src->SrcRegisterExtMod.Complement) - TXT( "1-(" ); - if (src->SrcRegister.Negate) - CHR( '-' ); - - if (src->SrcRegister.Indirect) { - _dump_register_ind( - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegisterInd.File, - src->SrcRegisterInd.Index ); - } - else { - _dump_register( - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegister.Index ); - } - - if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { - CHR( '.' ); - ENM( src->SrcRegister.SwizzleX, swizzle_names ); - ENM( src->SrcRegister.SwizzleY, swizzle_names ); - ENM( src->SrcRegister.SwizzleZ, swizzle_names ); - ENM( src->SrcRegister.SwizzleW, swizzle_names ); - } - if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { - CHR( '.' ); - if (src->SrcRegisterExtSwz.NegateX) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateY) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateZ) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateW) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); - } - - if (src->SrcRegisterExtMod.Complement) - CHR( ')' ); - if (src->SrcRegisterExtMod.Bias) - TXT( ")-.5" ); - if (src->SrcRegisterExtMod.Scale2X) - CHR( ')' ); - if (src->SrcRegisterExtMod.Absolute) - CHR( '|' ); - if (src->SrcRegisterExtMod.Negate) - CHR( ')' ); - - first_reg = FALSE; - } - - if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { - TXT( ", " ); - ENM( inst->InstructionExtTexture.Texture, texture_names ); - } - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_IF: - case TGSI_OPCODE_ELSE: - case TGSI_OPCODE_BGNLOOP2: - case TGSI_OPCODE_ENDLOOP2: - case TGSI_OPCODE_CAL: - TXT( " :" ); - UID( inst->InstructionExtLabel.Label ); - break; - } -} - -static boolean -iter_instruction( - struct tgsi_iterate_context *iter, - struct tgsi_full_instruction *inst ) -{ - struct dump_ctx *ctx = (struct dump_ctx *) iter; - - tgsi_dump_instruction( inst, ctx->instno++ ); - return TRUE; -} - -static boolean -prolog( - struct tgsi_iterate_context *ctx ) -{ - EOL(); - ENM( ctx->processor.Processor, processor_type_names ); - UID( ctx->version.MajorVersion ); - CHR( '.' ); - UID( ctx->version.MinorVersion ); - return TRUE; -} - -void -tgsi_dump( - const struct tgsi_token *tokens, - uint flags ) -{ - struct dump_ctx ctx; - - /* sanity checks */ - assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); - assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); - - ctx.iter.prolog = prolog; - ctx.iter.iterate_instruction = iter_instruction; - ctx.iter.iterate_declaration = iter_declaration; - ctx.iter.iterate_immediate = iter_immediate; - ctx.iter.epilog = NULL; - - ctx.instno = 0; - - tgsi_iterate_shader( tokens, &ctx.iter ); -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h deleted file mode 100644 index 51c230b5db4..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ /dev/null @@ -1,63 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_DUMP_H -#define TGSI_DUMP_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -void -tgsi_dump( - const struct tgsi_token *tokens, - uint flags ); - -struct tgsi_full_immediate; -struct tgsi_full_instruction; -struct tgsi_full_declaration; - -void -tgsi_dump_immediate( - const struct tgsi_full_immediate *imm ); - -void -tgsi_dump_instruction( - const struct tgsi_full_instruction *inst, - uint instno ); - -void -tgsi_dump_declaration( - const struct tgsi_full_declaration *decl ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_DUMP_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c deleted file mode 100644 index eabd74bd6d9..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c +++ /dev/null @@ -1,845 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "util/u_string.h" -#include "tgsi_dump_c.h" -#include "tgsi_parse.h" -#include "tgsi_build.h" - -static void -dump_enum( - const unsigned e, - const char **enums, - const unsigned enums_count ) -{ - if (e >= enums_count) { - debug_printf( "%u", e ); - } - else { - debug_printf( "%s", enums[e] ); - } -} - -#define EOL() debug_printf( "\n" ) -#define TXT(S) debug_printf( "%s", S ) -#define CHR(C) debug_printf( "%c", C ) -#define UIX(I) debug_printf( "0x%x", I ) -#define UID(I) debug_printf( "%u", I ) -#define SID(I) debug_printf( "%d", I ) -#define FLT(F) debug_printf( "%10.4f", F ) -#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) - -static const char *TGSI_PROCESSOR_TYPES[] = -{ - "PROCESSOR_FRAGMENT", - "PROCESSOR_VERTEX", - "PROCESSOR_GEOMETRY" -}; - -static const char *TGSI_TOKEN_TYPES[] = -{ - "TOKEN_TYPE_DECLARATION", - "TOKEN_TYPE_IMMEDIATE", - "TOKEN_TYPE_INSTRUCTION" -}; - -static const char *TGSI_FILES[] = -{ - "FILE_NULL", - "FILE_CONSTANT", - "FILE_INPUT", - "FILE_OUTPUT", - "FILE_TEMPORARY", - "FILE_SAMPLER", - "FILE_ADDRESS", - "FILE_IMMEDIATE" -}; - -static const char *TGSI_INTERPOLATES[] = -{ - "INTERPOLATE_CONSTANT", - "INTERPOLATE_LINEAR", - "INTERPOLATE_PERSPECTIVE" -}; - -static const char *TGSI_SEMANTICS[] = -{ - "SEMANTIC_POSITION", - "SEMANTIC_COLOR", - "SEMANTIC_BCOLOR", - "SEMANTIC_FOG", - "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC", - "SEMANTIC_NORMAL" -}; - -static const char *TGSI_IMMS[] = -{ - "IMM_FLOAT32" -}; - -static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] = -{ - "OPCODE_ARL", - "OPCODE_MOV", - "OPCODE_LIT", - "OPCODE_RCP", - "OPCODE_RSQ", - "OPCODE_EXP", - "OPCODE_LOG", - "OPCODE_MUL", - "OPCODE_ADD", - "OPCODE_DP3", - "OPCODE_DP4", - "OPCODE_DST", - "OPCODE_MIN", - "OPCODE_MAX", - "OPCODE_SLT", - "OPCODE_SGE", - "OPCODE_MAD", - "OPCODE_SUB", - "OPCODE_LERP", - "OPCODE_CND", - "OPCODE_CND0", - "OPCODE_DOT2ADD", - "OPCODE_INDEX", - "OPCODE_NEGATE", - "OPCODE_FRAC", - "OPCODE_CLAMP", - "OPCODE_FLOOR", - "OPCODE_ROUND", - "OPCODE_EXPBASE2", - "OPCODE_LOGBASE2", - "OPCODE_POWER", - "OPCODE_CROSSPRODUCT", - "OPCODE_MULTIPLYMATRIX", - "OPCODE_ABS", - "OPCODE_RCC", - "OPCODE_DPH", - "OPCODE_COS", - "OPCODE_DDX", - "OPCODE_DDY", - "OPCODE_KILP", - "OPCODE_PK2H", - "OPCODE_PK2US", - "OPCODE_PK4B", - "OPCODE_PK4UB", - "OPCODE_RFL", - "OPCODE_SEQ", - "OPCODE_SFL", - "OPCODE_SGT", - "OPCODE_SIN", - "OPCODE_SLE", - "OPCODE_SNE", - "OPCODE_STR", - "OPCODE_TEX", - "OPCODE_TXD", - "OPCODE_TXP", - "OPCODE_UP2H", - "OPCODE_UP2US", - "OPCODE_UP4B", - "OPCODE_UP4UB", - "OPCODE_X2D", - "OPCODE_ARA", - "OPCODE_ARR", - "OPCODE_BRA", - "OPCODE_CAL", - "OPCODE_RET", - "OPCODE_SSG", - "OPCODE_CMP", - "OPCODE_SCS", - "OPCODE_TXB", - "OPCODE_NRM", - "OPCODE_DIV", - "OPCODE_DP2", - "OPCODE_TXL", - "OPCODE_BRK", - "OPCODE_IF", - "OPCODE_LOOP", - "OPCODE_REP", - "OPCODE_ELSE", - "OPCODE_ENDIF", - "OPCODE_ENDLOOP", - "OPCODE_ENDREP", - "OPCODE_PUSHA", - "OPCODE_POPA", - "OPCODE_CEIL", - "OPCODE_I2F", - "OPCODE_NOT", - "OPCODE_TRUNC", - "OPCODE_SHL", - "OPCODE_SHR", - "OPCODE_AND", - "OPCODE_OR", - "OPCODE_MOD", - "OPCODE_XOR", - "OPCODE_SAD", - "OPCODE_TXF", - "OPCODE_TXQ", - "OPCODE_CONT", - "OPCODE_EMIT", - "OPCODE_ENDPRIM", - "OPCODE_BGNLOOP2", - "OPCODE_BGNSUB", - "OPCODE_ENDLOOP2", - "OPCODE_ENDSUB", - "OPCODE_NOISE1", - "OPCODE_NOISE2", - "OPCODE_NOISE3", - "OPCODE_NOISE4", - "OPCODE_NOP", - "OPCODE_M4X3", - "OPCODE_M3X4", - "OPCODE_M3X3", - "OPCODE_M3X2", - "OPCODE_NRM4", - "OPCODE_CALLNZ", - "OPCODE_IFC", - "OPCODE_BREAKC", - "OPCODE_KIL", - "OPCODE_END" -}; - -static const char *TGSI_SATS[] = -{ - "SAT_NONE", - "SAT_ZERO_ONE", - "SAT_MINUS_PLUS_ONE" -}; - -static const char *TGSI_INSTRUCTION_EXTS[] = -{ - "INSTRUCTION_EXT_TYPE_NV", - "INSTRUCTION_EXT_TYPE_LABEL", - "INSTRUCTION_EXT_TYPE_TEXTURE" -}; - -static const char *TGSI_PRECISIONS[] = -{ - "PRECISION_DEFAULT", - "PRECISION_FLOAT32", - "PRECISION_FLOAT16", - "PRECISION_FIXED12" -}; - -static const char *TGSI_CCS[] = -{ - "CC_GT", - "CC_EQ", - "CC_LT", - "CC_UN", - "CC_GE", - "CC_LE", - "CC_NE", - "CC_TR", - "CC_FL" -}; - -static const char *TGSI_SWIZZLES[] = -{ - "SWIZZLE_X", - "SWIZZLE_Y", - "SWIZZLE_Z", - "SWIZZLE_W" -}; - -static const char *TGSI_TEXTURES[] = -{ - "TEXTURE_UNKNOWN", - "TEXTURE_1D", - "TEXTURE_2D", - "TEXTURE_3D", - "TEXTURE_CUBE", - "TEXTURE_RECT", - "TEXTURE_SHADOW1D", - "TEXTURE_SHADOW2D", - "TEXTURE_SHADOWRECT" -}; - -static const char *TGSI_SRC_REGISTER_EXTS[] = -{ - "SRC_REGISTER_EXT_TYPE_SWZ", - "SRC_REGISTER_EXT_TYPE_MOD" -}; - -static const char *TGSI_EXTSWIZZLES[] = -{ - "EXTSWIZZLE_X", - "EXTSWIZZLE_Y", - "EXTSWIZZLE_Z", - "EXTSWIZZLE_W", - "EXTSWIZZLE_ZERO", - "EXTSWIZZLE_ONE" -}; - -static const char *TGSI_WRITEMASKS[] = -{ - "0", - "WRITEMASK_X", - "WRITEMASK_Y", - "WRITEMASK_XY", - "WRITEMASK_Z", - "WRITEMASK_XZ", - "WRITEMASK_YZ", - "WRITEMASK_XYZ", - "WRITEMASK_W", - "WRITEMASK_XW", - "WRITEMASK_YW", - "WRITEMASK_XYW", - "WRITEMASK_ZW", - "WRITEMASK_XZW", - "WRITEMASK_YZW", - "WRITEMASK_XYZW" -}; - -static const char *TGSI_DST_REGISTER_EXTS[] = -{ - "DST_REGISTER_EXT_TYPE_CONDCODE", - "DST_REGISTER_EXT_TYPE_MODULATE" -}; - -static const char *TGSI_MODULATES[] = -{ - "MODULATE_1X", - "MODULATE_2X", - "MODULATE_4X", - "MODULATE_8X", - "MODULATE_HALF", - "MODULATE_QUARTER", - "MODULATE_EIGHTH" -}; - -static void -dump_declaration_verbose( - struct tgsi_full_declaration *decl, - unsigned ignored, - unsigned deflt, - struct tgsi_full_declaration *fd ) -{ - TXT( "\nFile : " ); - ENM( decl->Declaration.File, TGSI_FILES ); - if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { - TXT( "\nUsageMask : " ); - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { - CHR( 'X' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { - CHR( 'Y' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { - CHR( 'Z' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { - CHR( 'W' ); - } - } - if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { - TXT( "\nInterpolate: " ); - ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); - } - if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { - TXT( "\nSemantic : " ); - UID( decl->Declaration.Semantic ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Declaration.Padding ); - } - - EOL(); - TXT( "\nFirst: " ); - UID( decl->DeclarationRange.First ); - TXT( "\nLast : " ); - UID( decl->DeclarationRange.Last ); - - if( decl->Declaration.Semantic ) { - EOL(); - TXT( "\nSemanticName : " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); - TXT( "\nSemanticIndex: " ); - UID( decl->Semantic.SemanticIndex ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Semantic.Padding ); - } - } -} - -static void -dump_immediate_verbose( - struct tgsi_full_immediate *imm, - unsigned ignored ) -{ - unsigned i; - - TXT( "\nDataType : " ); - ENM( imm->Immediate.DataType, TGSI_IMMS ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( imm->Immediate.Padding ); - } - - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - EOL(); - switch( imm->Immediate.DataType ) { - case TGSI_IMM_FLOAT32: - TXT( "\nFloat: " ); - FLT( imm->u.ImmediateFloat32[i].Float ); - break; - - default: - assert( 0 ); - } - } -} - -static void -dump_instruction_verbose( - struct tgsi_full_instruction *inst, - unsigned ignored, - unsigned deflt, - struct tgsi_full_instruction *fi ) -{ - unsigned i; - - TXT( "\nOpcode : " ); - ENM( inst->Instruction.Opcode, TGSI_OPCODES ); - if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { - TXT( "\nSaturate : " ); - ENM( inst->Instruction.Saturate, TGSI_SATS ); - } - if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { - TXT( "\nNumDstRegs : " ); - UID( inst->Instruction.NumDstRegs ); - } - if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { - TXT( "\nNumSrcRegs : " ); - UID( inst->Instruction.NumSrcRegs ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->Instruction.Padding ); - } - - if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { - TXT( "\nPrecision : " ); - ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); - } - if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { - TXT( "\nCondDstIndex : " ); - UID( inst->InstructionExtNv.CondDstIndex ); - } - if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { - TXT( "\nCondFlowIndex : " ); - UID( inst->InstructionExtNv.CondFlowIndex ); - } - if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { - TXT( "\nCondMask : " ); - ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { - TXT( "\nCondSwizzleX : " ); - ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { - TXT( "\nCondSwizzleY : " ); - ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { - TXT( "\nCondSwizzleZ : " ); - ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { - TXT( "\nCondSwizzleW : " ); - ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { - TXT( "\nCondDstUpdate : " ); - UID( inst->InstructionExtNv.CondDstUpdate ); - } - if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { - TXT( "\nCondFlowEnable: " ); - UID( inst->InstructionExtNv.CondFlowEnable ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtNv.Padding ); - if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { - TXT( "\nExtended : " ); - UID( inst->InstructionExtNv.Extended ); - } - } - } - - if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { - TXT( "\nLabel : " ); - UID( inst->InstructionExtLabel.Label ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtLabel.Padding ); - if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtLabel.Extended ); - } - } - } - - if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { - TXT( "\nTexture : " ); - ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtTexture.Padding ); - if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtTexture.Extended ); - } - } - } - - for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; - - EOL(); - TXT( "\nFile : " ); - ENM( dst->DstRegister.File, TGSI_FILES ); - if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { - TXT( "\nWriteMask: " ); - ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); - } - if( ignored ) { - if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { - TXT( "\nIndirect : " ); - UID( dst->DstRegister.Indirect ); - } - if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { - TXT( "\nDimension: " ); - UID( dst->DstRegister.Dimension ); - } - } - if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { - TXT( "\nIndex : " ); - SID( dst->DstRegister.Index ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegister.Padding ); - if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegister.Extended ); - } - } - - if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { - TXT( "\nCondMask : " ); - ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { - TXT( "\nCondSwizzleX: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { - TXT( "\nCondSwizzleY: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { - TXT( "\nCondSwizzleZ: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { - TXT( "\nCondSwizzleW: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { - TXT( "\nCondSrcIndex: " ); - UID( dst->DstRegisterExtConcode.CondSrcIndex ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtConcode.Padding ); - if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegisterExtConcode.Extended ); - } - } - } - - if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { - TXT( "\nModulate: " ); - ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtModulate.Padding ); - if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { - TXT( "\nExtended: " ); - UID( dst->DstRegisterExtModulate.Extended ); - } - } - } - } - - for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; - - EOL(); - TXT( "\nFile : "); - ENM( src->SrcRegister.File, TGSI_FILES ); - if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { - TXT( "\nSwizzleX : " ); - ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { - TXT( "\nSwizzleY : " ); - ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { - TXT( "\nSwizzleZ : " ); - ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { - TXT( "\nSwizzleW : " ); - ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { - TXT( "\nNegate : " ); - UID( src->SrcRegister.Negate ); - } - if( ignored ) { - if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { - TXT( "\nIndirect : " ); - UID( src->SrcRegister.Indirect ); - } - if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { - TXT( "\nDimension: " ); - UID( src->SrcRegister.Dimension ); - } - } - if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { - TXT( "\nIndex : " ); - SID( src->SrcRegister.Index ); - } - if( ignored ) { - if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegister.Extended ); - } - } - - if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { - TXT( "\nExtSwizzleX: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { - TXT( "\nExtSwizzleY: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { - TXT( "\nExtSwizzleZ: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { - TXT( "\nExtSwizzleW: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { - TXT( "\nNegateX : " ); - UID( src->SrcRegisterExtSwz.NegateX ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { - TXT( "\nNegateY : " ); - UID( src->SrcRegisterExtSwz.NegateY ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { - TXT( "\nNegateZ : " ); - UID( src->SrcRegisterExtSwz.NegateZ ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { - TXT( "\nNegateW : " ); - UID( src->SrcRegisterExtSwz.NegateW ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtSwz.Padding ); - if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtSwz.Extended ); - } - } - } - - if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { - TXT( "\nComplement: " ); - UID( src->SrcRegisterExtMod.Complement ); - } - if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { - TXT( "\nBias : " ); - UID( src->SrcRegisterExtMod.Bias ); - } - if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { - TXT( "\nScale2X : " ); - UID( src->SrcRegisterExtMod.Scale2X ); - } - if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { - TXT( "\nAbsolute : " ); - UID( src->SrcRegisterExtMod.Absolute ); - } - if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { - TXT( "\nNegate : " ); - UID( src->SrcRegisterExtMod.Negate ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtMod.Padding ); - if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtMod.Extended ); - } - } - } - } -} - -void -tgsi_dump_c( - const struct tgsi_token *tokens, - uint flags ) -{ - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - uint ignored = flags & TGSI_DUMP_C_IGNORED; - uint deflt = flags & TGSI_DUMP_C_DEFAULT; - uint instno = 0; - - /* sanity checks */ - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); - - tgsi_parse_init( &parse, tokens ); - - TXT( "tgsi-dump begin -----------------" ); - - TXT( "\nMajorVersion: " ); - UID( parse.FullVersion.Version.MajorVersion ); - TXT( "\nMinorVersion: " ); - UID( parse.FullVersion.Version.MinorVersion ); - EOL(); - - TXT( "\nHeaderSize: " ); - UID( parse.FullHeader.Header.HeaderSize ); - TXT( "\nBodySize : " ); - UID( parse.FullHeader.Header.BodySize ); - TXT( "\nProcessor : " ); - ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); - EOL(); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - TXT( "\nType : " ); - ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); - if( ignored ) { - TXT( "\nSize : " ); - UID( parse.FullToken.Token.Size ); - if( deflt || parse.FullToken.Token.Extended ) { - TXT( "\nExtended : " ); - UID( parse.FullToken.Token.Extended ); - } - } - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - dump_declaration_verbose( - &parse.FullToken.FullDeclaration, - ignored, - deflt, - &fd ); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - dump_immediate_verbose( - &parse.FullToken.FullImmediate, - ignored ); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - dump_instruction_verbose( - &parse.FullToken.FullInstruction, - ignored, - deflt, - &fi ); - break; - - default: - assert( 0 ); - } - - EOL(); - } - - TXT( "\ntgsi-dump end -------------------\n" ); - - tgsi_parse_free( &parse ); -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h deleted file mode 100644 index d91cd35b3b7..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h +++ /dev/null @@ -1,49 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_DUMP_C_H -#define TGSI_DUMP_C_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -#define TGSI_DUMP_C_IGNORED 1 -#define TGSI_DUMP_C_DEFAULT 2 - -void -tgsi_dump_c( - const struct tgsi_token *tokens, - uint flags ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_DUMP_C_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c deleted file mode 100644 index 5371a88b964..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "tgsi_iterate.h" - -boolean -tgsi_iterate_shader( - const struct tgsi_token *tokens, - struct tgsi_iterate_context *ctx ) -{ - struct tgsi_parse_context parse; - - if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) - return FALSE; - - ctx->processor = parse.FullHeader.Processor; - ctx->version = parse.FullVersion.Version; - - if (ctx->prolog) - if (!ctx->prolog( ctx )) - goto fail; - - while (!tgsi_parse_end_of_tokens( &parse )) { - tgsi_parse_token( &parse ); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (ctx->iterate_instruction) - if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction )) - goto fail; - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - if (ctx->iterate_declaration) - if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration )) - goto fail; - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - if (ctx->iterate_immediate) - if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate )) - goto fail; - break; - - default: - assert( 0 ); - } - } - - if (ctx->epilog) - if (!ctx->epilog( ctx )) - goto fail; - - tgsi_parse_free( &parse ); - return TRUE; - -fail: - tgsi_parse_free( &parse ); - return FALSE; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h deleted file mode 100644 index f5bebf89b82..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h +++ /dev/null @@ -1,76 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_ITERATE_H -#define TGSI_ITERATE_H - -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" - -#if defined __cplusplus -extern "C" { -#endif - -struct tgsi_iterate_context -{ - boolean - (* prolog)( - struct tgsi_iterate_context *ctx ); - - boolean - (* iterate_instruction)( - struct tgsi_iterate_context *ctx, - struct tgsi_full_instruction *inst ); - - boolean - (* iterate_declaration)( - struct tgsi_iterate_context *ctx, - struct tgsi_full_declaration *decl ); - - boolean - (* iterate_immediate)( - struct tgsi_iterate_context *ctx, - struct tgsi_full_immediate *imm ); - - boolean - (* epilog)( - struct tgsi_iterate_context *ctx ); - - struct tgsi_processor processor; - struct tgsi_version version; -}; - -boolean -tgsi_iterate_shader( - const struct tgsi_token *tokens, - struct tgsi_iterate_context *ctx ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_ITERATE_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c deleted file mode 100644 index d16f0cdcad4..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi_parse.h" -#include "tgsi_build.h" - -void -tgsi_full_token_init( - union tgsi_full_token *full_token ) -{ - full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION; -} - -void -tgsi_full_token_free( - union tgsi_full_token *full_token ) -{ - if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { - FREE( (void *) full_token->FullImmediate.u.Pointer ); - } -} - -unsigned -tgsi_parse_init( - struct tgsi_parse_context *ctx, - const struct tgsi_token *tokens ) -{ - ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; - if( ctx->FullVersion.Version.MajorVersion > 1 ) { - return TGSI_PARSE_ERROR; - } - - ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1]; - if( ctx->FullHeader.Header.HeaderSize >= 2 ) { - ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; - } - else { - ctx->FullHeader.Processor = tgsi_default_processor(); - } - - ctx->Tokens = tokens; - ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize; - - tgsi_full_token_init( &ctx->FullToken ); - - return TGSI_PARSE_OK; -} - -void -tgsi_parse_free( - struct tgsi_parse_context *ctx ) -{ - tgsi_full_token_free( &ctx->FullToken ); -} - -boolean -tgsi_parse_end_of_tokens( - struct tgsi_parse_context *ctx ) -{ - return ctx->Position >= - 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; -} - -static void -next_token( - struct tgsi_parse_context *ctx, - void *token ) -{ - assert( !tgsi_parse_end_of_tokens( ctx ) ); - - *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++]; -} - -void -tgsi_parse_token( - struct tgsi_parse_context *ctx ) -{ - struct tgsi_token token; - unsigned i; - - tgsi_full_token_free( &ctx->FullToken ); - tgsi_full_token_init( &ctx->FullToken ); - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; - - *decl = tgsi_default_full_declaration(); - decl->Declaration = *(struct tgsi_declaration *) &token; - - next_token( ctx, &decl->DeclarationRange ); - - if( decl->Declaration.Semantic ) { - next_token( ctx, &decl->Semantic ); - } - - break; - } - - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; - - *imm = tgsi_default_full_immediate(); - imm->Immediate = *(struct tgsi_immediate *) &token; - - assert( !imm->Immediate.Extended ); - - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - imm->u.Pointer = MALLOC( - sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); - } - break; - - default: - assert( 0 ); - } - - break; - } - - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; - unsigned extended; - - *inst = tgsi_default_full_instruction(); - inst->Instruction = *(struct tgsi_instruction *) &token; - - extended = inst->Instruction.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_INSTRUCTION_EXT_TYPE_NV: - inst->InstructionExtNv = - *(struct tgsi_instruction_ext_nv *) &token; - break; - - case TGSI_INSTRUCTION_EXT_TYPE_LABEL: - inst->InstructionExtLabel = - *(struct tgsi_instruction_ext_label *) &token; - break; - - case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: - inst->InstructionExtTexture = - *(struct tgsi_instruction_ext_texture *) &token; - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - - assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); - - for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - unsigned extended; - - next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); - - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->FullDstRegisters[i].DstRegister.Indirect ); - assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); - - extended = inst->FullDstRegisters[i].DstRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: - inst->FullDstRegisters[i].DstRegisterExtConcode = - *(struct tgsi_dst_register_ext_concode *) &token; - break; - - case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: - inst->FullDstRegisters[i].DstRegisterExtModulate = - *(struct tgsi_dst_register_ext_modulate *) &token; - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - } - - assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); - - for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - unsigned extended; - - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); - - extended = inst->FullSrcRegisters[i].SrcRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: - inst->FullSrcRegisters[i].SrcRegisterExtSwz = - *(struct tgsi_src_register_ext_swz *) &token; - break; - - case TGSI_SRC_REGISTER_EXT_TYPE_MOD: - inst->FullSrcRegisters[i].SrcRegisterExtMod = - *(struct tgsi_src_register_ext_mod *) &token; - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - - if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); - - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); - } - - if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); - - /* - * No support for multi-dimensional addressing. - */ - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); - - if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); - - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); - } - } - } - - break; - } - - default: - assert( 0 ); - } -} - - -unsigned -tgsi_num_tokens(const struct tgsi_token *tokens) -{ - struct tgsi_parse_context ctx; - if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) { - unsigned len = (ctx.FullHeader.Header.HeaderSize + - ctx.FullHeader.Header.BodySize + - 1); - return len; - } - return 0; -} - - -/** - * Make a new copy of a token array. - */ -struct tgsi_token * -tgsi_dup_tokens(const struct tgsi_token *tokens) -{ - unsigned n = tgsi_num_tokens(tokens); - unsigned bytes = n * sizeof(struct tgsi_token); - struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes); - if (new_tokens) - memcpy(new_tokens, tokens, bytes); - return new_tokens; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h deleted file mode 100644 index 054350712d8..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_PARSE_H -#define TGSI_PARSE_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -struct tgsi_full_version -{ - struct tgsi_version Version; -}; - -struct tgsi_full_header -{ - struct tgsi_header Header; - struct tgsi_processor Processor; -}; - -struct tgsi_full_dst_register -{ - struct tgsi_dst_register DstRegister; - struct tgsi_dst_register_ext_concode DstRegisterExtConcode; - struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; -}; - -struct tgsi_full_src_register -{ - struct tgsi_src_register SrcRegister; - struct tgsi_src_register_ext_swz SrcRegisterExtSwz; - struct tgsi_src_register_ext_mod SrcRegisterExtMod; - struct tgsi_src_register SrcRegisterInd; - struct tgsi_dimension SrcRegisterDim; - struct tgsi_src_register SrcRegisterDimInd; -}; - -struct tgsi_full_declaration -{ - struct tgsi_declaration Declaration; - struct tgsi_declaration_range DeclarationRange; - struct tgsi_declaration_semantic Semantic; -}; - -struct tgsi_full_immediate -{ - struct tgsi_immediate Immediate; - union - { - const void *Pointer; - const struct tgsi_immediate_float32 *ImmediateFloat32; - } u; -}; - -#define TGSI_FULL_MAX_DST_REGISTERS 2 -#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ - -struct tgsi_full_instruction -{ - struct tgsi_instruction Instruction; - struct tgsi_instruction_ext_nv InstructionExtNv; - struct tgsi_instruction_ext_label InstructionExtLabel; - struct tgsi_instruction_ext_texture InstructionExtTexture; - struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; - struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; -}; - -union tgsi_full_token -{ - struct tgsi_token Token; - struct tgsi_full_declaration FullDeclaration; - struct tgsi_full_immediate FullImmediate; - struct tgsi_full_instruction FullInstruction; -}; - -void -tgsi_full_token_init( - union tgsi_full_token *full_token ); - -void -tgsi_full_token_free( - union tgsi_full_token *full_token ); - -struct tgsi_parse_context -{ - const struct tgsi_token *Tokens; - unsigned Position; - struct tgsi_full_version FullVersion; - struct tgsi_full_header FullHeader; - union tgsi_full_token FullToken; -}; - -#define TGSI_PARSE_OK 0 -#define TGSI_PARSE_ERROR 1 - -unsigned -tgsi_parse_init( - struct tgsi_parse_context *ctx, - const struct tgsi_token *tokens ); - -void -tgsi_parse_free( - struct tgsi_parse_context *ctx ); - -boolean -tgsi_parse_end_of_tokens( - struct tgsi_parse_context *ctx ); - -void -tgsi_parse_token( - struct tgsi_parse_context *ctx ); - -unsigned -tgsi_num_tokens(const struct tgsi_token *tokens); - -struct tgsi_token * -tgsi_dup_tokens(const struct tgsi_token *tokens); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_PARSE_H */ - diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c deleted file mode 100644 index 2e3ec96b5b5..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ /dev/null @@ -1,341 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "tgsi_sanity.h" -#include "tgsi_iterate.h" - -#define MAX_REGISTERS 256 - -typedef uint reg_flag; - -#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) - -struct sanity_check_ctx -{ - struct tgsi_iterate_context iter; - - reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; - reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; - boolean regs_ind_used[TGSI_FILE_COUNT]; - uint num_imms; - uint num_instructions; - uint index_of_END; - - uint errors; - uint warnings; -}; - -static void -report_error( - struct sanity_check_ctx *ctx, - const char *format, - ... ) -{ - va_list args; - - debug_printf( "Error : " ); - va_start( args, format ); - _debug_vprintf( format, args ); - va_end( args ); - debug_printf( "\n" ); - ctx->errors++; -} - -static void -report_warning( - struct sanity_check_ctx *ctx, - const char *format, - ... ) -{ - va_list args; - - debug_printf( "Warning: " ); - va_start( args, format ); - _debug_vprintf( format, args ); - va_end( args ); - debug_printf( "\n" ); - ctx->warnings++; -} - -static boolean -check_file_name( - struct sanity_check_ctx *ctx, - uint file ) -{ - if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { - report_error( ctx, "Invalid register file name" ); - return FALSE; - } - return TRUE; -} - -static boolean -is_register_declared( - struct sanity_check_ctx *ctx, - uint file, - int index ) -{ - assert( index >= 0 && index < MAX_REGISTERS ); - - return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; -} - -static boolean -is_any_register_declared( - struct sanity_check_ctx *ctx, - uint file ) -{ - uint i; - - for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++) - if (ctx->regs_decl[file][i]) - return TRUE; - return FALSE; -} - -static boolean -is_register_used( - struct sanity_check_ctx *ctx, - uint file, - int index ) -{ - assert( index < MAX_REGISTERS ); - - return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; -} - -static const char *file_names[] = -{ - "NULL", - "CONST", - "IN", - "OUT", - "TEMP", - "SAMP", - "ADDR", - "IMM" -}; - -static boolean -check_register_usage( - struct sanity_check_ctx *ctx, - uint file, - int index, - const char *name, - boolean indirect_access ) -{ - if (!check_file_name( ctx, file )) - return FALSE; - if (indirect_access) { - if (!is_any_register_declared( ctx, file )) - report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); - ctx->regs_ind_used[file] = TRUE; - } - else { - if (!is_register_declared( ctx, file, index )) - report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); - ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); - } - return TRUE; -} - -static boolean -iter_instruction( - struct tgsi_iterate_context *iter, - struct tgsi_full_instruction *inst ) -{ - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint i; - - /* There must be no other instructions after END. - */ - if (ctx->index_of_END != ~0) { - report_error( ctx, "Unexpected instruction after END" ); - } - else if (inst->Instruction.Opcode == TGSI_OPCODE_END) { - ctx->index_of_END = ctx->num_instructions; - } - - /* Check destination and source registers' validity. - * Mark the registers as used. - */ - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - check_register_usage( - ctx, - inst->FullDstRegisters[i].DstRegister.File, - inst->FullDstRegisters[i].DstRegister.Index, - "destination", - FALSE ); - } - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - check_register_usage( - ctx, - inst->FullSrcRegisters[i].SrcRegister.File, - inst->FullSrcRegisters[i].SrcRegister.Index, - "source", - (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); - if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { - uint file; - int index; - - file = inst->FullSrcRegisters[i].SrcRegisterInd.File; - index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; - check_register_usage( - ctx, - file, - index, - "indirect", - FALSE ); - if (file != TGSI_FILE_ADDRESS || index != 0) - report_warning( ctx, "Indirect register not ADDR[0]" ); - } - } - - ctx->num_instructions++; - - return TRUE; -} - -static boolean -iter_declaration( - struct tgsi_iterate_context *iter, - struct tgsi_full_declaration *decl ) -{ - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint file; - uint i; - - /* No declarations allowed after the first instruction. - */ - if (ctx->num_instructions > 0) - report_error( ctx, "Instruction expected but declaration found" ); - - /* Check registers' validity. - * Mark the registers as declared. - */ - file = decl->Declaration.File; - if (!check_file_name( ctx, file )) - return TRUE; - for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { - if (is_register_declared( ctx, file, i )) - report_error( ctx, "The same register declared twice" ); - ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); - } - - return TRUE; -} - -static boolean -iter_immediate( - struct tgsi_iterate_context *iter, - struct tgsi_full_immediate *imm ) -{ - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - - assert( ctx->num_imms < MAX_REGISTERS ); - - /* No immediates allowed after the first instruction. - */ - if (ctx->num_instructions > 0) - report_error( ctx, "Instruction expected but immediate found" ); - - /* Mark the register as declared. - */ - ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); - ctx->num_imms++; - - /* Check data type validity. - */ - if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { - report_error( ctx, "Invalid immediate data type" ); - return TRUE; - } - - return TRUE; -} - -static boolean -epilog( - struct tgsi_iterate_context *iter ) -{ - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint file; - - /* There must be an END instruction at the end. - */ - if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) { - report_error( ctx, "Expected END at end of instruction sequence" ); - } - - /* Check if all declared registers were used. - */ - for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { - uint i; - - for (i = 0; i < MAX_REGISTERS; i++) { - if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { - report_warning( ctx, "Register never used" ); - } - } - } - - /* Print totals, if any. - */ - if (ctx->errors || ctx->warnings) - debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings ); - - return TRUE; -} - -boolean -tgsi_sanity_check( - struct tgsi_token *tokens ) -{ - struct sanity_check_ctx ctx; - - ctx.iter.prolog = NULL; - ctx.iter.iterate_instruction = iter_instruction; - ctx.iter.iterate_declaration = iter_declaration; - ctx.iter.iterate_immediate = iter_immediate; - ctx.iter.epilog = epilog; - - memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); - memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); - memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); - ctx.num_imms = 0; - ctx.num_instructions = 0; - ctx.index_of_END = ~0; - - ctx.errors = 0; - ctx.warnings = 0; - - if (!tgsi_iterate_shader( tokens, &ctx.iter )) - return FALSE; - - return ctx.errors == 0; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h deleted file mode 100644 index ca45e94c7ad..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h +++ /dev/null @@ -1,49 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_SANITY_H -#define TGSI_SANITY_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -/* Check the given token stream for errors and common mistakes. - * Diagnostic messages are printed out to the debug output. - * Returns TRUE if there are no errors, even though there could be some warnings. - */ -boolean -tgsi_sanity_check( - struct tgsi_token *tokens ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_SANITY_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c deleted file mode 100644 index 240aaaf362c..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ /dev/null @@ -1,226 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI program scan utility. - * Used to determine which registers and instructions are used by a shader. - * - * Authors: Brian Paul - */ - - -#include "tgsi_scan.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_build.h" - -#include "pipe/p_util.h" - - - -/** - */ -void -tgsi_scan_shader(const struct tgsi_token *tokens, - struct tgsi_shader_info *info) -{ - uint procType, i; - struct tgsi_parse_context parse; - - memset(info, 0, sizeof(*info)); - for (i = 0; i < TGSI_FILE_COUNT; i++) - info->file_max[i] = -1; - - /** - ** Setup to begin parsing input shader - **/ - if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { - debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); - return; - } - procType = parse.FullHeader.Processor.Processor; - assert(procType == TGSI_PROCESSOR_FRAGMENT || - procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY); - - - /** - ** Loop over incoming program tokens/instructions - */ - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - info->num_tokens++; - - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *fullinst - = &parse.FullToken.FullInstruction; - - assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); - info->opcode_count[fullinst->Instruction.Opcode]++; - } - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - { - struct tgsi_full_declaration *fulldecl - = &parse.FullToken.FullDeclaration; - uint file = fulldecl->Declaration.File; - uint i; - for (i = fulldecl->DeclarationRange.First; - i <= fulldecl->DeclarationRange.Last; - i++) { - - /* only first 32 regs will appear in this bitfield */ - info->file_mask[file] |= (1 << i); - info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)i); - - if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; - info->num_inputs++; - } - - if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; - info->num_outputs++; - } - - /* special case */ - if (procType == TGSI_PROCESSOR_FRAGMENT && - file == TGSI_FILE_OUTPUT && - fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; - } - } - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - info->immediate_count++; - break; - - default: - assert( 0 ); - } - } - - assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); - assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); - - info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || - info->opcode_count[TGSI_OPCODE_KILP]); - - tgsi_parse_free (&parse); -} - - - -/** - * Check if the given shader is a "passthrough" shader consisting of only - * MOV instructions of the form: MOV OUT[n], IN[n] - * - */ -boolean -tgsi_is_passthrough_shader(const struct tgsi_token *tokens) -{ - struct tgsi_parse_context parse; - - /** - ** Setup to begin parsing input shader - **/ - if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { - debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n"); - return FALSE; - } - - /** - ** Loop over incoming program tokens/instructions - */ - while (!tgsi_parse_end_of_tokens(&parse)) { - - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *fullinst = - &parse.FullToken.FullInstruction; - const struct tgsi_full_src_register *src = - &fullinst->FullSrcRegisters[0]; - const struct tgsi_full_dst_register *dst = - &fullinst->FullDstRegisters[0]; - - /* Do a whole bunch of checks for a simple move */ - if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - src->SrcRegister.File != TGSI_FILE_INPUT || - dst->DstRegister.File != TGSI_FILE_OUTPUT || - src->SrcRegister.Index != dst->DstRegister.Index || - - src->SrcRegister.Negate || - src->SrcRegisterExtMod.Negate || - src->SrcRegisterExtMod.Absolute || - src->SrcRegisterExtMod.Scale2X || - src->SrcRegisterExtMod.Bias || - src->SrcRegisterExtMod.Complement || - - src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || - - src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W || - - dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) - { - tgsi_parse_free(&parse); - return FALSE; - } - } - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - /* fall-through */ - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* fall-through */ - default: - ; /* no-op */ - } - } - - tgsi_parse_free(&parse); - - /* if we get here, it's a pass-through shader */ - return TRUE; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h deleted file mode 100644 index 5cb6efb3439..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h +++ /dev/null @@ -1,74 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_SCAN_H -#define TGSI_SCAN_H - - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" - - -/** - * Shader summary info - */ -struct tgsi_shader_info -{ - uint num_tokens; - - /* XXX eventually remove the corresponding fields from pipe_shader_state: */ - ubyte num_inputs; - ubyte num_outputs; - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ - ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - - uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ - uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ - int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ - - uint immediate_count; /**< number of immediates declared */ - - uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ - - boolean writes_z; /**< does fragment shader write Z value? */ - boolean uses_kill; /**< KIL or KILP instruction used? */ -}; - - -extern void -tgsi_scan_shader(const struct tgsi_token *tokens, - struct tgsi_shader_info *info); - - -extern boolean -tgsi_is_passthrough_shader(const struct tgsi_token *tokens); - - -#endif /* TGSI_SCAN_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c deleted file mode 100644 index 35cb3055bb2..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ /dev/null @@ -1,1221 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "tgsi_text.h" -#include "tgsi_build.h" -#include "tgsi_parse.h" -#include "tgsi_sanity.h" -#include "tgsi_util.h" - -static boolean is_alpha_underscore( const char *cur ) -{ - return - (*cur >= 'a' && *cur <= 'z') || - (*cur >= 'A' && *cur <= 'Z') || - *cur == '_'; -} - -static boolean is_digit( const char *cur ) -{ - return *cur >= '0' && *cur <= '9'; -} - -static boolean is_digit_alpha_underscore( const char *cur ) -{ - return is_digit( cur ) || is_alpha_underscore( cur ); -} - -static boolean str_match_no_case( const char **pcur, const char *str ) -{ - const char *cur = *pcur; - - while (*str != '\0' && *str == toupper( *cur )) { - str++; - cur++; - } - if (*str == '\0') { - *pcur = cur; - return TRUE; - } - return FALSE; -} - -/* Eat zero or more whitespaces. - */ -static void eat_opt_white( const char **pcur ) -{ - while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n') - (*pcur)++; -} - -/* Eat one or more whitespaces. - * Return TRUE if at least one whitespace eaten. - */ -static boolean eat_white( const char **pcur ) -{ - const char *cur = *pcur; - - eat_opt_white( pcur ); - return *pcur > cur; -} - -/* Parse unsigned integer. - * No checks for overflow. - */ -static boolean parse_uint( const char **pcur, uint *val ) -{ - const char *cur = *pcur; - - if (is_digit( cur )) { - *val = *cur++ - '0'; - while (is_digit( cur )) - *val = *val * 10 + *cur++ - '0'; - *pcur = cur; - return TRUE; - } - return FALSE; -} - -/* Parse floating point. - */ -static boolean parse_float( const char **pcur, float *val ) -{ - const char *cur = *pcur; - boolean integral_part = FALSE; - boolean fractional_part = FALSE; - - *val = (float) atof( cur ); - - if (*cur == '-' || *cur == '+') - cur++; - if (is_digit( cur )) { - cur++; - integral_part = TRUE; - while (is_digit( cur )) - cur++; - } - if (*cur == '.') { - cur++; - if (is_digit( cur )) { - cur++; - fractional_part = TRUE; - while (is_digit( cur )) - cur++; - } - } - if (!integral_part && !fractional_part) - return FALSE; - if (toupper( *cur ) == 'E') { - cur++; - if (*cur == '-' || *cur == '+') - cur++; - if (is_digit( cur )) { - cur++; - while (is_digit( cur )) - cur++; - } - else - return FALSE; - } - *pcur = cur; - return TRUE; -} - -struct translate_ctx -{ - const char *text; - const char *cur; - struct tgsi_token *tokens; - struct tgsi_token *tokens_cur; - struct tgsi_token *tokens_end; - struct tgsi_header *header; -}; - -static void report_error( struct translate_ctx *ctx, const char *msg ) -{ - debug_printf( "\nError: %s", msg ); -} - -/* Parse shader header. - * Return TRUE for one of the following headers. - * FRAG1.1 - * GEOM1.1 - * VERT1.1 - */ -static boolean parse_header( struct translate_ctx *ctx ) -{ - uint processor; - - if (str_match_no_case( &ctx->cur, "FRAG1.1" )) - processor = TGSI_PROCESSOR_FRAGMENT; - else if (str_match_no_case( &ctx->cur, "VERT1.1" )) - processor = TGSI_PROCESSOR_VERTEX; - else if (str_match_no_case( &ctx->cur, "GEOM1.1" )) - processor = TGSI_PROCESSOR_GEOMETRY; - else { - report_error( ctx, "Unknown header" ); - return FALSE; - } - - if (ctx->tokens_cur >= ctx->tokens_end) - return FALSE; - *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version(); - - if (ctx->tokens_cur >= ctx->tokens_end) - return FALSE; - ctx->header = (struct tgsi_header *) ctx->tokens_cur++; - *ctx->header = tgsi_build_header(); - - if (ctx->tokens_cur >= ctx->tokens_end) - return FALSE; - *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); - - return TRUE; -} - -static boolean parse_label( struct translate_ctx *ctx, uint *val ) -{ - const char *cur = ctx->cur; - - if (parse_uint( &cur, val )) { - eat_opt_white( &cur ); - if (*cur == ':') { - cur++; - ctx->cur = cur; - return TRUE; - } - } - return FALSE; -} - -static const char *file_names[TGSI_FILE_COUNT] = -{ - "NULL", - "CONST", - "IN", - "OUT", - "TEMP", - "SAMP", - "ADDR", - "IMM" -}; - -static boolean -parse_file( const char **pcur, uint *file ) -{ - uint i; - - for (i = 0; i < TGSI_FILE_COUNT; i++) { - const char *cur = *pcur; - - if (str_match_no_case( &cur, file_names[i] )) { - if (!is_digit_alpha_underscore( cur )) { - *pcur = cur; - *file = i; - return TRUE; - } - } - } - return FALSE; -} - -static boolean -parse_opt_writemask( - struct translate_ctx *ctx, - uint *writemask ) -{ - const char *cur; - - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == '.') { - cur++; - *writemask = TGSI_WRITEMASK_NONE; - eat_opt_white( &cur ); - if (toupper( *cur ) == 'X') { - cur++; - *writemask |= TGSI_WRITEMASK_X; - } - if (toupper( *cur ) == 'Y') { - cur++; - *writemask |= TGSI_WRITEMASK_Y; - } - if (toupper( *cur ) == 'Z') { - cur++; - *writemask |= TGSI_WRITEMASK_Z; - } - if (toupper( *cur ) == 'W') { - cur++; - *writemask |= TGSI_WRITEMASK_W; - } - - if (*writemask == TGSI_WRITEMASK_NONE) { - report_error( ctx, "Writemask expected" ); - return FALSE; - } - - ctx->cur = cur; - } - else { - *writemask = TGSI_WRITEMASK_XYZW; - } - return TRUE; -} - -/* <register_file_bracket> ::= <file> `[' - */ -static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* <register_file_bracket_index> ::= <register_file_bracket> <uint> - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; - - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} - -/* Parse destination register operand. - * <register_dst> ::= <register_file_bracket_index> `]' - */ -static boolean -parse_register_dst( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* Parse source register operand. - * <register_src> ::= <register_file_bracket_index> `]' | - * <register_file_bracket> <register_dst> `]' | - * <register_file_bracket> <register_dst> `+' <uint> `]' | - * <register_file_bracket> <register_dst> `-' <uint> `]' - */ -static boolean -parse_register_src( - struct translate_ctx *ctx, - uint *file, - int *index, - uint *ind_file, - int *ind_index ) -{ - const char *cur; - uint uindex; - - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - cur = ctx->cur; - if (parse_file( &cur, ind_file )) { - if (!parse_register_dst( ctx, ind_file, ind_index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur == '+' || *ctx->cur == '-') { - boolean negate; - - negate = *ctx->cur == '-'; - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - if (negate) - *index = -(int) uindex; - else - *index = (int) uindex; - } - else { - *index = 0; - } - } - else { - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - *ind_file = TGSI_FILE_NULL; - *ind_index = 0; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* Parse register declaration. - * <register_dcl> ::= <register_file_bracket_index> `]' | - * <register_file_bracket_index> `..' <index> `]' - */ -static boolean -parse_register_dcl( - struct translate_ctx *ctx, - uint *file, - int *first, - int *last ) -{ - if (!parse_register_file_bracket_index( ctx, file, first )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { - uint uindex; - - ctx->cur += 2; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal integer" ); - return FALSE; - } - *last = (int) uindex; - eat_opt_white( &ctx->cur ); - } - else { - *last = *first; - } - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]' or `..'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -static const char *modulate_names[TGSI_MODULATE_COUNT] = -{ - "_1X", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; - -static boolean -parse_dst_operand( - struct translate_ctx *ctx, - struct tgsi_full_dst_register *dst ) -{ - uint file; - int index; - uint writemask; - const char *cur; - - if (!parse_register_dst( ctx, &file, &index )) - return FALSE; - - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == '_') { - uint i; - - for (i = 0; i < TGSI_MODULATE_COUNT; i++) { - if (str_match_no_case( &cur, modulate_names[i] )) { - if (!is_digit_alpha_underscore( cur )) { - dst->DstRegisterExtModulate.Modulate = i; - ctx->cur = cur; - break; - } - } - } - } - - if (!parse_opt_writemask( ctx, &writemask )) - return FALSE; - - dst->DstRegister.File = file; - dst->DstRegister.Index = index; - dst->DstRegister.WriteMask = writemask; - return TRUE; -} - -static boolean -parse_optional_swizzle( - struct translate_ctx *ctx, - uint swizzle[4], - boolean *parsed_swizzle, - boolean *parsed_extswizzle ) -{ - const char *cur = ctx->cur; - - *parsed_swizzle = FALSE; - *parsed_extswizzle = FALSE; - - eat_opt_white( &cur ); - if (*cur == '.') { - uint i; - - cur++; - eat_opt_white( &cur ); - for (i = 0; i < 4; i++) { - if (toupper( *cur ) == 'X') - swizzle[i] = TGSI_SWIZZLE_X; - else if (toupper( *cur ) == 'Y') - swizzle[i] = TGSI_SWIZZLE_Y; - else if (toupper( *cur ) == 'Z') - swizzle[i] = TGSI_SWIZZLE_Z; - else if (toupper( *cur ) == 'W') - swizzle[i] = TGSI_SWIZZLE_W; - else { - if (*cur == '0') - swizzle[i] = TGSI_EXTSWIZZLE_ZERO; - else if (*cur == '1') - swizzle[i] = TGSI_EXTSWIZZLE_ONE; - else { - report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); - return FALSE; - } - *parsed_extswizzle = TRUE; - } - cur++; - } - *parsed_swizzle = TRUE; - ctx->cur = cur; - } - return TRUE; -} - -static boolean -parse_src_operand( - struct translate_ctx *ctx, - struct tgsi_full_src_register *src ) -{ - const char *cur; - float value; - uint file; - int index; - uint ind_file; - int ind_index; - uint swizzle[4]; - boolean parsed_swizzle; - boolean parsed_extswizzle; - - if (*ctx->cur == '-') { - cur = ctx->cur; - cur++; - eat_opt_white( &cur ); - if (*cur == '(') { - cur++; - src->SrcRegisterExtMod.Negate = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } - - if (*ctx->cur == '|') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Absolute = 1; - } - - if (*ctx->cur == '-') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegister.Negate = 1; - } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 2.0f) { - eat_opt_white( &cur ); - if (*cur != '*') { - report_error( ctx, "Expected `*'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Scale2X = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } - - if (*ctx->cur == '(') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Bias = 1; - } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 1.0f) { - eat_opt_white( &cur ); - if (*cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Complement = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } - - if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index )) - return FALSE; - src->SrcRegister.File = file; - src->SrcRegister.Index = index; - if (ind_file != TGSI_FILE_NULL) { - src->SrcRegister.Indirect = 1; - src->SrcRegisterInd.File = ind_file; - src->SrcRegisterInd.Index = ind_index; - } - - /* Parse optional swizzle. - */ - if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) { - if (parsed_extswizzle) { - assert( parsed_swizzle ); - - src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0]; - src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1]; - src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2]; - src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3]; - } - else if (parsed_swizzle) { - src->SrcRegister.SwizzleX = swizzle[0]; - src->SrcRegister.SwizzleY = swizzle[1]; - src->SrcRegister.SwizzleZ = swizzle[2]; - src->SrcRegister.SwizzleW = swizzle[3]; - } - } - - if (src->SrcRegisterExtMod.Complement) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Bias) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_float( &ctx->cur, &value )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - if (value != 0.5f) { - report_error( ctx, "Expected 0.5" ); - return FALSE; - } - } - - if (src->SrcRegisterExtMod.Scale2X) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Absolute) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '|') { - report_error( ctx, "Expected `|'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Negate) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - return TRUE; -} - -struct opcode_info -{ - uint num_dst; - uint num_src; - uint is_tex; - uint is_branch; - const char *mnemonic; -}; - -static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = -{ - { 1, 1, 0, 0, "ARL" }, - { 1, 1, 0, 0, "MOV" }, - { 1, 1, 0, 0, "LIT" }, - { 1, 1, 0, 0, "RCP" }, - { 1, 1, 0, 0, "RSQ" }, - { 1, 1, 0, 0, "EXP" }, - { 1, 1, 0, 0, "LOG" }, - { 1, 2, 0, 0, "MUL" }, - { 1, 2, 0, 0, "ADD" }, - { 1, 2, 0, 0, "DP3" }, - { 1, 2, 0, 0, "DP4" }, - { 1, 2, 0, 0, "DST" }, - { 1, 2, 0, 0, "MIN" }, - { 1, 2, 0, 0, "MAX" }, - { 1, 2, 0, 0, "SLT" }, - { 1, 2, 0, 0, "SGE" }, - { 1, 3, 0, 0, "MAD" }, - { 1, 2, 0, 0, "SUB" }, - { 1, 3, 0, 0, "LERP" }, - { 1, 3, 0, 0, "CND" }, - { 1, 3, 0, 0, "CND0" }, - { 1, 3, 0, 0, "DOT2ADD" }, - { 1, 2, 0, 0, "INDEX" }, - { 1, 1, 0, 0, "NEGATE" }, - { 1, 1, 0, 0, "FRAC" }, - { 1, 3, 0, 0, "CLAMP" }, - { 1, 1, 0, 0, "FLOOR" }, - { 1, 1, 0, 0, "ROUND" }, - { 1, 1, 0, 0, "EXPBASE2" }, - { 1, 1, 0, 0, "LOGBASE2" }, - { 1, 2, 0, 0, "POWER" }, - { 1, 2, 0, 0, "CROSSPRODUCT" }, - { 1, 2, 0, 0, "MULTIPLYMATRIX" }, - { 1, 1, 0, 0, "ABS" }, - { 1, 1, 0, 0, "RCC" }, - { 1, 2, 0, 0, "DPH" }, - { 1, 1, 0, 0, "COS" }, - { 1, 1, 0, 0, "DDX" }, - { 1, 1, 0, 0, "DDY" }, - { 0, 1, 0, 0, "KILP" }, - { 1, 1, 0, 0, "PK2H" }, - { 1, 1, 0, 0, "PK2US" }, - { 1, 1, 0, 0, "PK4B" }, - { 1, 1, 0, 0, "PK4UB" }, - { 1, 2, 0, 0, "RFL" }, - { 1, 2, 0, 0, "SEQ" }, - { 1, 2, 0, 0, "SFL" }, - { 1, 2, 0, 0, "SGT" }, - { 1, 1, 0, 0, "SIN" }, - { 1, 2, 0, 0, "SLE" }, - { 1, 2, 0, 0, "SNE" }, - { 1, 2, 0, 0, "STR" }, - { 1, 2, 1, 0, "TEX" }, - { 1, 4, 1, 0, "TXD" }, - { 1, 2, 1, 0, "TXP" }, - { 1, 1, 0, 0, "UP2H" }, - { 1, 1, 0, 0, "UP2US" }, - { 1, 1, 0, 0, "UP4B" }, - { 1, 1, 0, 0, "UP4UB" }, - { 1, 3, 0, 0, "X2D" }, - { 1, 1, 0, 0, "ARA" }, - { 1, 1, 0, 0, "ARR" }, - { 0, 1, 0, 0, "BRA" }, - { 0, 0, 0, 1, "CAL" }, - { 0, 0, 0, 0, "RET" }, - { 1, 1, 0, 0, "SSG" }, - { 1, 3, 0, 0, "CMP" }, - { 1, 1, 0, 0, "SCS" }, - { 1, 2, 1, 0, "TXB" }, - { 1, 1, 0, 0, "NRM" }, - { 1, 2, 0, 0, "DIV" }, - { 1, 2, 0, 0, "DP2" }, - { 1, 2, 1, 0, "TXL" }, - { 0, 0, 0, 0, "BRK" }, - { 0, 1, 0, 1, "IF" }, - { 0, 0, 0, 0, "LOOP" }, - { 0, 1, 0, 0, "REP" }, - { 0, 0, 0, 1, "ELSE" }, - { 0, 0, 0, 0, "ENDIF" }, - { 0, 0, 0, 0, "ENDLOOP" }, - { 0, 0, 0, 0, "ENDREP" }, - { 0, 1, 0, 0, "PUSHA" }, - { 1, 0, 0, 0, "POPA" }, - { 1, 1, 0, 0, "CEIL" }, - { 1, 1, 0, 0, "I2F" }, - { 1, 1, 0, 0, "NOT" }, - { 1, 1, 0, 0, "TRUNC" }, - { 1, 2, 0, 0, "SHL" }, - { 1, 2, 0, 0, "SHR" }, - { 1, 2, 0, 0, "AND" }, - { 1, 2, 0, 0, "OR" }, - { 1, 2, 0, 0, "MOD" }, - { 1, 2, 0, 0, "XOR" }, - { 1, 3, 0, 0, "SAD" }, - { 1, 2, 1, 0, "TXF" }, - { 1, 2, 1, 0, "TXQ" }, - { 0, 0, 0, 0, "CONT" }, - { 0, 0, 0, 0, "EMIT" }, - { 0, 0, 0, 0, "ENDPRIM" }, - { 0, 0, 0, 1, "BGNLOOP2" }, - { 0, 0, 0, 0, "BGNSUB" }, - { 0, 0, 0, 1, "ENDLOOP2" }, - { 0, 0, 0, 0, "ENDSUB" }, - { 1, 1, 0, 0, "NOISE1" }, - { 1, 1, 0, 0, "NOISE2" }, - { 1, 1, 0, 0, "NOISE3" }, - { 1, 1, 0, 0, "NOISE4" }, - { 0, 0, 0, 0, "NOP" }, - { 1, 2, 0, 0, "M4X3" }, - { 1, 2, 0, 0, "M3X4" }, - { 1, 2, 0, 0, "M3X3" }, - { 1, 2, 0, 0, "M3X2" }, - { 1, 1, 0, 0, "NRM4" }, - { 0, 1, 0, 0, "CALLNZ" }, - { 0, 1, 0, 0, "IFC" }, - { 0, 1, 0, 0, "BREAKC" }, - { 0, 0, 0, 0, "KIL" }, - { 0, 0, 0, 0, "END" }, - { 1, 1, 0, 0, "SWZ" } -}; - -static const char *texture_names[TGSI_TEXTURE_COUNT] = -{ - "UNKNOWN", - "1D", - "2D", - "3D", - "CUBE", - "RECT", - "SHADOW1D", - "SHADOW2D", - "SHADOWRECT" -}; - -static boolean -parse_instruction( - struct translate_ctx *ctx, - boolean has_label ) -{ - uint i; - uint saturate = TGSI_SAT_NONE; - const struct opcode_info *info; - struct tgsi_full_instruction inst; - uint advance; - - /* Parse instruction name. - */ - eat_opt_white( &ctx->cur ); - for (i = 0; i < TGSI_OPCODE_LAST; i++) { - const char *cur = ctx->cur; - - info = &opcode_info[i]; - if (str_match_no_case( &cur, info->mnemonic )) { - if (str_match_no_case( &cur, "_SATNV" )) - saturate = TGSI_SAT_MINUS_PLUS_ONE; - else if (str_match_no_case( &cur, "_SAT" )) - saturate = TGSI_SAT_ZERO_ONE; - - if (info->num_dst + info->num_src + info->is_tex == 0) { - if (!is_digit_alpha_underscore( cur )) { - ctx->cur = cur; - break; - } - } - else if (*cur == '\0' || eat_white( &cur )) { - ctx->cur = cur; - break; - } - } - } - if (i == TGSI_OPCODE_LAST) { - if (has_label) - report_error( ctx, "Unknown opcode" ); - else - report_error( ctx, "Expected `DCL', `IMM' or a label" ); - return FALSE; - } - - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = i; - inst.Instruction.Saturate = saturate; - inst.Instruction.NumDstRegs = info->num_dst; - inst.Instruction.NumSrcRegs = info->num_src; - - /* Parse instruction operands. - */ - for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) { - if (i > 0) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ',') { - report_error( ctx, "Expected `,'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - } - - if (i < info->num_dst) { - if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) - return FALSE; - } - else if (i < info->num_dst + info->num_src) { - if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) - return FALSE; - } - else { - uint j; - - for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { - if (str_match_no_case( &ctx->cur, texture_names[j] )) { - if (!is_digit_alpha_underscore( ctx->cur )) { - inst.InstructionExtTexture.Texture = j; - break; - } - } - } - if (j == TGSI_TEXTURE_COUNT) { - report_error( ctx, "Expected texture target" ); - return FALSE; - } - } - } - - if (info->is_branch) { - uint target; - - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ':') { - report_error( ctx, "Expected `:'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &target )) { - report_error( ctx, "Expected a label" ); - return FALSE; - } - inst.InstructionExtLabel.Label = target; - } - - advance = tgsi_build_full_instruction( - &inst, - ctx->tokens_cur, - ctx->header, - (uint) (ctx->tokens_end - ctx->tokens_cur) ); - if (advance == 0) - return FALSE; - ctx->tokens_cur += advance; - - return TRUE; -} - -static const char *semantic_names[TGSI_SEMANTIC_COUNT] = -{ - "POSITION", - "COLOR", - "BCOLOR", - "FOG", - "PSIZE", - "GENERIC", - "NORMAL" -}; - -static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = -{ - "CONSTANT", - "LINEAR", - "PERSPECTIVE" -}; - -static boolean parse_declaration( struct translate_ctx *ctx ) -{ - struct tgsi_full_declaration decl; - uint file; - int first; - int last; - uint writemask; - const char *cur; - uint advance; - - if (!eat_white( &ctx->cur )) { - report_error( ctx, "Syntax error" ); - return FALSE; - } - if (!parse_register_dcl( ctx, &file, &first, &last )) - return FALSE; - if (!parse_opt_writemask( ctx, &writemask )) - return FALSE; - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = file; - decl.Declaration.UsageMask = writemask; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; - - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == ',') { - uint i; - - cur++; - eat_opt_white( &cur ); - for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { - if (str_match_no_case( &cur, semantic_names[i] )) { - const char *cur2 = cur; - uint index; - - if (is_digit_alpha_underscore( cur )) - continue; - eat_opt_white( &cur2 ); - if (*cur2 == '[') { - cur2++; - eat_opt_white( &cur2 ); - if (!parse_uint( &cur2, &index )) { - report_error( ctx, "Expected literal integer" ); - return FALSE; - } - eat_opt_white( &cur2 ); - if (*cur2 != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - cur2++; - - decl.Semantic.SemanticIndex = index; - - cur = cur2; - } - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = i; - - ctx->cur = cur; - break; - } - } - } - - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == ',') { - uint i; - - cur++; - eat_opt_white( &cur ); - for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) { - if (str_match_no_case( &cur, interpolate_names[i] )) { - if (is_digit_alpha_underscore( cur )) - continue; - decl.Declaration.Interpolate = i; - - ctx->cur = cur; - break; - } - } - if (i == TGSI_INTERPOLATE_COUNT) { - report_error( ctx, "Expected semantic or interpolate attribute" ); - return FALSE; - } - } - - advance = tgsi_build_full_declaration( - &decl, - ctx->tokens_cur, - ctx->header, - (uint) (ctx->tokens_end - ctx->tokens_cur) ); - if (advance == 0) - return FALSE; - ctx->tokens_cur += advance; - - return TRUE; -} - -static boolean parse_immediate( struct translate_ctx *ctx ) -{ - struct tgsi_full_immediate imm; - uint i; - float values[4]; - uint advance; - - if (!eat_white( &ctx->cur )) { - report_error( ctx, "Syntax error" ); - return FALSE; - } - if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { - report_error( ctx, "Expected `FLT32'" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '{') { - report_error( ctx, "Expected `{'" ); - return FALSE; - } - ctx->cur++; - for (i = 0; i < 4; i++) { - eat_opt_white( &ctx->cur ); - if (i > 0) { - if (*ctx->cur != ',') { - report_error( ctx, "Expected `,'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - } - if (!parse_float( &ctx->cur, &values[i] )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '}') { - report_error( ctx, "Expected `}'" ); - return FALSE; - } - ctx->cur++; - - imm = tgsi_default_full_immediate(); - imm.Immediate.Size += 4; - imm.Immediate.DataType = TGSI_IMM_FLOAT32; - imm.u.Pointer = values; - - advance = tgsi_build_full_immediate( - &imm, - ctx->tokens_cur, - ctx->header, - (uint) (ctx->tokens_end - ctx->tokens_cur) ); - if (advance == 0) - return FALSE; - ctx->tokens_cur += advance; - - return TRUE; -} - -static boolean translate( struct translate_ctx *ctx ) -{ - eat_opt_white( &ctx->cur ); - if (!parse_header( ctx )) - return FALSE; - - while (*ctx->cur != '\0') { - uint label_val = 0; - - if (!eat_white( &ctx->cur )) { - report_error( ctx, "Syntax error" ); - return FALSE; - } - - if (*ctx->cur == '\0') - break; - - if (parse_label( ctx, &label_val )) { - if (!parse_instruction( ctx, TRUE )) - return FALSE; - } - else if (str_match_no_case( &ctx->cur, "DCL" )) { - if (!parse_declaration( ctx )) - return FALSE; - } - else if (str_match_no_case( &ctx->cur, "IMM" )) { - if (!parse_immediate( ctx )) - return FALSE; - } - else if (!parse_instruction( ctx, FALSE )) { - return FALSE; - } - } - - return TRUE; -} - -boolean -tgsi_text_translate( - const char *text, - struct tgsi_token *tokens, - uint num_tokens ) -{ - struct translate_ctx ctx; - - ctx.text = text; - ctx.cur = text; - ctx.tokens = tokens; - ctx.tokens_cur = tokens; - ctx.tokens_end = tokens + num_tokens; - - if (!translate( &ctx )) - return FALSE; - - return tgsi_sanity_check( tokens ); -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.h b/src/gallium/auxiliary/tgsi/util/tgsi_text.h deleted file mode 100644 index 8eeeeef1402..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.h +++ /dev/null @@ -1,47 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_TEXT_H -#define TGSI_TEXT_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -boolean -tgsi_text_translate( - const char *text, - struct tgsi_token *tokens, - uint num_tokens ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_TEXT_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.c b/src/gallium/auxiliary/tgsi/util/tgsi_transform.c deleted file mode 100644 index 357f77b05a6..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_transform.c +++ /dev/null @@ -1,199 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI program transformation utility. - * - * Authors: Brian Paul - */ - - -#include "tgsi_transform.h" - - - -static void -emit_instruction(struct tgsi_transform_context *ctx, - const struct tgsi_full_instruction *inst) -{ - uint ti = ctx->ti; - - ti += tgsi_build_full_instruction(inst, - ctx->tokens_out + ti, - ctx->header, - ctx->max_tokens_out - ti); - ctx->ti = ti; -} - - -static void -emit_declaration(struct tgsi_transform_context *ctx, - const struct tgsi_full_declaration *decl) -{ - uint ti = ctx->ti; - - ti += tgsi_build_full_declaration(decl, - ctx->tokens_out + ti, - ctx->header, - ctx->max_tokens_out - ti); - ctx->ti = ti; -} - - -static void -emit_immediate(struct tgsi_transform_context *ctx, - const struct tgsi_full_immediate *imm) -{ - uint ti = ctx->ti; - - ti += tgsi_build_full_immediate(imm, - ctx->tokens_out + ti, - ctx->header, - ctx->max_tokens_out - ti); - ctx->ti = ti; -} - - - -/** - * Apply user-defined transformations to the input shader to produce - * the output shader. - * For example, a register search-and-replace operation could be applied - * by defining a transform_instruction() callback that examined and changed - * the instruction src/dest regs. - * - * \return number of tokens emitted - */ -int -tgsi_transform_shader(const struct tgsi_token *tokens_in, - struct tgsi_token *tokens_out, - uint max_tokens_out, - struct tgsi_transform_context *ctx) -{ - uint procType; - - /* input shader */ - struct tgsi_parse_context parse; - - /* output shader */ - struct tgsi_processor *processor; - - - /** - ** callback context init - **/ - ctx->emit_instruction = emit_instruction; - ctx->emit_declaration = emit_declaration; - ctx->emit_immediate = emit_immediate; - ctx->tokens_out = tokens_out; - ctx->max_tokens_out = max_tokens_out; - - - /** - ** Setup to begin parsing input shader - **/ - if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) { - debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n"); - return -1; - } - procType = parse.FullHeader.Processor.Processor; - assert(procType == TGSI_PROCESSOR_FRAGMENT || - procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY); - - - /** - ** Setup output shader - **/ - *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version(); - - ctx->header = (struct tgsi_header *) (tokens_out + 1); - *ctx->header = tgsi_build_header(); - - processor = (struct tgsi_processor *) (tokens_out + 2); - *processor = tgsi_build_processor( procType, ctx->header ); - - ctx->ti = 3; - - - /** - ** Loop over incoming program tokens/instructions - */ - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *fullinst - = &parse.FullToken.FullInstruction; - - if (ctx->transform_instruction) - ctx->transform_instruction(ctx, fullinst); - else - ctx->emit_instruction(ctx, fullinst); - } - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - { - struct tgsi_full_declaration *fulldecl - = &parse.FullToken.FullDeclaration; - - if (ctx->transform_declaration) - ctx->transform_declaration(ctx, fulldecl); - else - ctx->emit_declaration(ctx, fulldecl); - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *fullimm - = &parse.FullToken.FullImmediate; - - if (ctx->transform_immediate) - ctx->transform_immediate(ctx, fullimm); - else - ctx->emit_immediate(ctx, fullimm); - } - break; - - default: - assert( 0 ); - } - } - - if (ctx->epilog) { - ctx->epilog(ctx); - } - - tgsi_parse_free (&parse); - - return ctx->ti; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.h b/src/gallium/auxiliary/tgsi/util/tgsi_transform.h deleted file mode 100644 index fcf85d603be..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_transform.h +++ /dev/null @@ -1,93 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_TRANSFORM_H -#define TGSI_TRANSFORM_H - - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_build.h" - - - -/** - * Subclass this to add caller-specific data - */ -struct tgsi_transform_context -{ -/**** PUBLIC ***/ - - /** - * User-defined callbacks invoked per instruction. - */ - void (*transform_instruction)(struct tgsi_transform_context *ctx, - struct tgsi_full_instruction *inst); - - void (*transform_declaration)(struct tgsi_transform_context *ctx, - struct tgsi_full_declaration *decl); - - void (*transform_immediate)(struct tgsi_transform_context *ctx, - struct tgsi_full_immediate *imm); - - /** - * Called at end of input program to allow caller to append extra - * instructions. Return number of tokens emitted. - */ - void (*epilog)(struct tgsi_transform_context *ctx); - - -/*** PRIVATE ***/ - - /** - * These are setup by tgsi_transform_shader() and cannot be overridden. - * Meant to be called from in the above user callback functions. - */ - void (*emit_instruction)(struct tgsi_transform_context *ctx, - const struct tgsi_full_instruction *inst); - void (*emit_declaration)(struct tgsi_transform_context *ctx, - const struct tgsi_full_declaration *decl); - void (*emit_immediate)(struct tgsi_transform_context *ctx, - const struct tgsi_full_immediate *imm); - - struct tgsi_header *header; - uint max_tokens_out; - struct tgsi_token *tokens_out; - uint ti; -}; - - - -extern int -tgsi_transform_shader(const struct tgsi_token *tokens_in, - struct tgsi_token *tokens_out, - uint max_tokens_out, - struct tgsi_transform_context *ctx); - - -#endif /* TGSI_TRANSFORM_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c deleted file mode 100644 index 09486e649e1..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c +++ /dev/null @@ -1,300 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi_parse.h" -#include "tgsi_build.h" -#include "tgsi_util.h" - -union pointer_hack -{ - void *pointer; - uint64_t uint64; -}; - -void * -tgsi_align_128bit( - void *unaligned ) -{ - union pointer_hack ph; - - ph.uint64 = 0; - ph.pointer = unaligned; - ph.uint64 = (ph.uint64 + 15) & ~15; - return ph.pointer; -} - -unsigned -tgsi_util_get_src_register_swizzle( - const struct tgsi_src_register *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->SwizzleX; - case 1: - return reg->SwizzleY; - case 2: - return reg->SwizzleZ; - case 3: - return reg->SwizzleW; - default: - assert( 0 ); - } - return 0; -} - -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->ExtSwizzleX; - case 1: - return reg->ExtSwizzleY; - case 2: - return reg->ExtSwizzleZ; - case 3: - return reg->ExtSwizzleW; - default: - assert( 0 ); - } - return 0; -} - -unsigned -tgsi_util_get_full_src_register_extswizzle( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - unsigned swizzle; - - /* - * First, calculate the extended swizzle for a given channel. This will give - * us either a channel index into the simple swizzle or a constant 1 or 0. - */ - swizzle = tgsi_util_get_src_register_extswizzle( - ®->SrcRegisterExtSwz, - component ); - - assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); - - /* - * Second, calculate the simple swizzle for the unswizzled channel index. - * Leave the constants intact, they are not affected by the simple swizzle. - */ - if( swizzle <= TGSI_SWIZZLE_W ) { - swizzle = tgsi_util_get_src_register_swizzle( - ®->SrcRegister, - swizzle ); - } - - return swizzle; -} - -void -tgsi_util_set_src_register_swizzle( - struct tgsi_src_register *reg, - unsigned swizzle, - unsigned component ) -{ - switch( component ) { - case 0: - reg->SwizzleX = swizzle; - break; - case 1: - reg->SwizzleY = swizzle; - break; - case 2: - reg->SwizzleZ = swizzle; - break; - case 3: - reg->SwizzleW = swizzle; - break; - default: - assert( 0 ); - } -} - -void -tgsi_util_set_src_register_extswizzle( - struct tgsi_src_register_ext_swz *reg, - unsigned swizzle, - unsigned component ) -{ - switch( component ) { - case 0: - reg->ExtSwizzleX = swizzle; - break; - case 1: - reg->ExtSwizzleY = swizzle; - break; - case 2: - reg->ExtSwizzleZ = swizzle; - break; - case 3: - reg->ExtSwizzleW = swizzle; - break; - default: - assert( 0 ); - } -} - -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->NegateX; - case 1: - return reg->NegateY; - case 2: - return reg->NegateZ; - case 3: - return reg->NegateW; - default: - assert( 0 ); - } - return 0; -} - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ) -{ - switch( component ) { - case 0: - reg->NegateX = negate; - break; - case 1: - reg->NegateY = negate; - break; - case 2: - reg->NegateZ = negate; - break; - case 3: - reg->NegateW = negate; - break; - default: - assert( 0 ); - } -} - -unsigned -tgsi_util_get_full_src_register_sign_mode( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - unsigned sign_mode; - - if( reg->SrcRegisterExtMod.Absolute ) { - /* Consider only the post-abs negation. */ - - if( reg->SrcRegisterExtMod.Negate ) { - sign_mode = TGSI_UTIL_SIGN_SET; - } - else { - sign_mode = TGSI_UTIL_SIGN_CLEAR; - } - } - else { - /* Accumulate the three negations. */ - - unsigned negate; - - negate = reg->SrcRegister.Negate; - if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { - negate = !negate; - } - if( reg->SrcRegisterExtMod.Negate ) { - negate = !negate; - } - - if( negate ) { - sign_mode = TGSI_UTIL_SIGN_TOGGLE; - } - else { - sign_mode = TGSI_UTIL_SIGN_KEEP; - } - } - - return sign_mode; -} - -void -tgsi_util_set_full_src_register_sign_mode( - struct tgsi_full_src_register *reg, - unsigned sign_mode ) -{ - reg->SrcRegisterExtSwz.NegateX = 0; - reg->SrcRegisterExtSwz.NegateY = 0; - reg->SrcRegisterExtSwz.NegateZ = 0; - reg->SrcRegisterExtSwz.NegateW = 0; - - switch (sign_mode) - { - case TGSI_UTIL_SIGN_CLEAR: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 0; - break; - - case TGSI_UTIL_SIGN_SET: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 1; - break; - - case TGSI_UTIL_SIGN_TOGGLE: - reg->SrcRegister.Negate = 1; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; - break; - - case TGSI_UTIL_SIGN_KEEP: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; - break; - - default: - assert( 0 ); - } -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h deleted file mode 100644 index 7877f345587..00000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.h +++ /dev/null @@ -1,96 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_UTIL_H -#define TGSI_UTIL_H - -#if defined __cplusplus -extern "C" { -#endif - -void * -tgsi_align_128bit( - void *unaligned ); - -unsigned -tgsi_util_get_src_register_swizzle( - const struct tgsi_src_register *reg, - unsigned component ); - -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component); - -unsigned -tgsi_util_get_full_src_register_extswizzle( - const struct tgsi_full_src_register *reg, - unsigned component ); - -void -tgsi_util_set_src_register_swizzle( - struct tgsi_src_register *reg, - unsigned swizzle, - unsigned component ); - -void -tgsi_util_set_src_register_extswizzle( - struct tgsi_src_register_ext_swz *reg, - unsigned swizzle, - unsigned component ); - -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ); - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ); - -#define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */ -#define TGSI_UTIL_SIGN_SET 1 /* Force negative */ -#define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */ -#define TGSI_UTIL_SIGN_KEEP 3 /* No change */ - -unsigned -tgsi_util_get_full_src_register_sign_mode( - const struct tgsi_full_src_register *reg, - unsigned component ); - -void -tgsi_util_set_full_src_register_sign_mode( - struct tgsi_full_src_register *reg, - unsigned sign_mode ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_UTIL_H */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 4999822068a..8713ff5d584 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -45,9 +45,9 @@ #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" #include "cso_cache/cso_context.h" diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 505d93d7277..c34fb6ee334 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -43,9 +43,9 @@ #include "util/u_simple_shaders.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 8df41c1d4ca..f1d1ca89a97 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -37,7 +37,7 @@ #include "cell_winsys.h" #include "cell/common.h" #include "rtasm/rtasm_ppc_spe.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct cell_vbuf_render; diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index c3a3fbd066d..f5707f2bb8b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -30,7 +30,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "draw/draw_context.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 3a80df427da..96393732ed8 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -65,8 +65,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 3e17c490d20..c68f78f59b6 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -29,7 +29,7 @@ #define SPU_EXEC_H #include "pipe/p_compiler.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_exec.h" #if defined __cplusplus extern "C" { diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index ea4274a0a7b..74ab2bbd1f0 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,8 +1,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" //#include "tgsi_build.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_util.h" unsigned tgsi_util_get_src_register_swizzle( diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index c8db4f608c5..3cdabe45f9d 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -35,7 +35,7 @@ #include "draw/draw_vertex.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" #define I915_TEX_UNITS 8 diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 23cd909337e..04507ab8adc 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -34,8 +34,8 @@ #include "pipe/p_shader_tokens.h" #include "util/u_string.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index dbb33f26955..e8521b385ef 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -33,7 +33,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "i915_context.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 2cae7665f78..f00eb34f92b 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -38,7 +38,7 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" #include "brw_structs.h" #include "brw_winsys.h" diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c index 96f8fb87a3d..b82a2e143ba 100644 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -36,7 +36,7 @@ #include "brw_util.h" #include "brw_sf.h" #include "brw_state.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" static void compile_sf_prog( struct brw_context *brw, diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index fb3da92421e..30f37a99d46 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -3,7 +3,7 @@ #include "brw_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" /** diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index caeeba46300..27ca32843da 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -34,8 +34,8 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 81423e2d7d5..34dbc0624d5 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "brw_vs.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" struct brw_prog_info { unsigned num_temps; diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index bf1b4d961a7..e6f1a44817d 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index 5c905838243..6a4a5aef092 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 0b199a2193e..cc171bbc396 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -36,8 +36,8 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" struct sp_exec_fragment_shader { struct sp_fragment_shader base; diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 6e1d9280bb4..20226da78c3 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -38,7 +38,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/exec/tgsi_sse2.h" +#include "tgsi/tgsi_sse2.h" #if 0 diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 69f7f960aa1..8b7da7c7473 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -36,8 +36,8 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/exec/tgsi_sse2.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" #ifdef PIPE_ARCH_X86 diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index 3d9ede69bbd..ae2ee210fc9 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -32,7 +32,7 @@ #define SP_HEADERS_H #include "pipe/p_state.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_exec.h" #define PRIM_POINT 1 #define PRIM_LINE 2 diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 701e02b295e..476ef3dc8fb 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -32,7 +32,7 @@ #define SP_STATE_H #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" #define SP_NEW_VIEWPORT 0x1 diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 901c8f83e75..76fe6bfef9f 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -35,8 +35,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" void * diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index be0b57d9fa6..63b3b91110e 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_exec.h" /* diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 8d8b762ea57..284ecb827df 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -47,8 +47,8 @@ #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" #include "util/p_tile.h" -#include "tgsi/util/tgsi_text.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_text.h" +#include "tgsi/tgsi_dump.h" #include "st_device.h" #include "st_sample.h" diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 23ecfff0aa4..c7d26ce33cf 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -31,7 +31,7 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_dump.h" #include "cso_cache/cso_cache.h" diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 9029f12056f..6565107b107 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -32,9 +32,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_util.h" #include "st_mesa_to_tgsi.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 5966bbadae8..c25c668329a 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -40,7 +40,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_dump.h" #include "st_context.h" #include "st_atom.h" -- cgit v1.2.3 From a55ced56760f90fe84794b23c557050403d514b2 Mon Sep 17 00:00:00 2001 From: Younes Manton <younes.m@gmail.com> Date: Sun, 3 Aug 2008 17:20:48 -0400 Subject: nv40: Support for PIPE_FORMAT_R16_SNORM. --- src/gallium/drivers/nouveau/nouveau_class.h | 1 + src/gallium/drivers/nv40/nv40_fragtex.c | 36 ++++++++++++++++------------- src/gallium/drivers/nv40/nv40_screen.c | 3 ++- 3 files changed, 23 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h index 749fbf041e7..c3d8d7539d1 100644 --- a/src/gallium/drivers/nouveau/nouveau_class.h +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -5194,6 +5194,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NV40TCL_TEX_FORMAT_FORMAT_A8L8 0x00000b00 #define NV40TCL_TEX_FORMAT_FORMAT_Z24 0x00001000 #define NV40TCL_TEX_FORMAT_FORMAT_Z16 0x00001200 +#define NV40TCL_TEX_FORMAT_FORMAT_A16 0x00001400 #define NV40TCL_TEX_FORMAT_FORMAT_HILO8 0x00001800 #define NV40TCL_TEX_FORMAT_FORMAT_RGBA16F 0x00001a00 #define NV40TCL_TEX_FORMAT_FORMAT_RGBA32F 0x00001b00 diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index c79ea8becb3..2d45c2545cd 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -1,6 +1,6 @@ #include "nv40_context.h" -#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ { \ TRUE, \ PIPE_FORMAT_##m, \ @@ -9,6 +9,8 @@ NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \ NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \ NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \ + ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \ + (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \ } struct nv40_texture_format { @@ -16,24 +18,26 @@ struct nv40_texture_format { uint pipe; int format; int swizzle; + int sign; }; static struct nv40_texture_format nv40_texture_formats[] = { - _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), - _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), - _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), - _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), - _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), - _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), - _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), - _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), - _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), - _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), - _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), - _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), - _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), - _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0), + _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), + _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), {}, }; @@ -113,7 +117,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) so_data (so, ps->wrap); so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); so_data (so, txs); - so_data (so, ps->filt | 0x2000 /*voodoo*/); + so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | pt->height[0]); so_data (so, ps->bcol); diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 3872c0a0c9b..a63a6136c52 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -110,7 +110,8 @@ nv40_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R16_SNORM: case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: -- cgit v1.2.3 From ea0007cc4ca077c7e3951c4fda122bd242728d70 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 6 Aug 2008 17:14:22 -0600 Subject: softpipe: add texture border color code --- src/gallium/drivers/softpipe/sp_tex_sample.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 63b3b91110e..ed150527e2d 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -601,15 +601,25 @@ get_texel(struct tgsi_sampler *sampler, unsigned face, unsigned level, int x, int y, int z, float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) { - const int tx = x % TILE_SIZE; - const int ty = y % TILE_SIZE; - const struct softpipe_cached_tile *tile - = sp_get_cached_tile_tex(sampler->pipe, sampler->cache, - x, y, z, face, level); - rgba[0][j] = tile->data.color[ty][tx][0]; - rgba[1][j] = tile->data.color[ty][tx][1]; - rgba[2][j] = tile->data.color[ty][tx][2]; - rgba[3][j] = tile->data.color[ty][tx][3]; + if (x < 0 || x >= sampler->texture->width[level] || + y < 0 || y >= sampler->texture->height[level] || + z < 0 || z >= sampler->texture->depth[level]) { + rgba[0][j] = sampler->state->border_color[0]; + rgba[1][j] = sampler->state->border_color[1]; + rgba[2][j] = sampler->state->border_color[2]; + rgba[3][j] = sampler->state->border_color[3]; + } + else { + const int tx = x % TILE_SIZE; + const int ty = y % TILE_SIZE; + const struct softpipe_cached_tile *tile + = sp_get_cached_tile_tex(sampler->pipe, sampler->cache, + x, y, z, face, level); + rgba[0][j] = tile->data.color[ty][tx][0]; + rgba[1][j] = tile->data.color[ty][tx][1]; + rgba[2][j] = tile->data.color[ty][tx][2]; + rgba[3][j] = tile->data.color[ty][tx][3]; + } } -- cgit v1.2.3 From be66a8f43172327e3cdde27281e40377cacbb121 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 6 Aug 2008 17:22:29 -0600 Subject: gallium: added PIPE_CAP_TEXTURE_MIRROR_CLAMP, PIPE_CAP_TEXTURE_MIRROR_REPEAT Check for these caps in state tracker and enable corresponding GL extensions if supported. --- src/gallium/drivers/softpipe/sp_screen.c | 4 ++++ src/gallium/include/pipe/p_defines.h | 3 +++ src/mesa/state_tracker/st_extensions.c | 10 ++++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 3f9d4b0ed31..ceb5616b5d0 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -72,6 +72,10 @@ softpipe_get_param(struct pipe_screen *screen, int param) return PIPE_MAX_COLOR_BUFS; case PIPE_CAP_OCCLUSION_QUERY: return 1; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index b1d100ef53c..bdc6d4ef468 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -281,6 +281,9 @@ enum pipe_texture_target { #define PIPE_CAP_GUARD_BAND_TOP 21 /*< float */ #define PIPE_CAP_GUARD_BAND_RIGHT 22 /*< float */ #define PIPE_CAP_GUARD_BAND_BOTTOM 23 /*< float */ +#define PIPE_CAP_TEXTURE_MIRROR_CLAMP 24 +#define PIPE_CAP_TEXTURE_MIRROR_REPEAT 25 + #ifdef __cplusplus diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index cacf972a1ba..b7444b298cf 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -134,8 +134,6 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.ARB_texture_env_combine = GL_TRUE; ctx->Extensions.ARB_texture_env_crossbar = GL_TRUE; ctx->Extensions.ARB_texture_env_dot3 = GL_TRUE; - ctx->Extensions.ARB_texture_mirrored_repeat = GL_TRUE; /* XXX temp */ - ctx->Extensions.ARB_vertex_program = GL_TRUE; ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE; @@ -179,6 +177,14 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.ARB_shading_language_120 = GL_TRUE; } + if (screen->get_param(screen, PIPE_CAP_TEXTURE_MIRROR_REPEAT) > 0) { + ctx->Extensions.ARB_texture_mirrored_repeat = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_TEXTURE_MIRROR_CLAMP) > 0) { + ctx->Extensions.EXT_texture_mirror_clamp = GL_TRUE; + } + if (screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) { ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE; ctx->Extensions.NV_texture_rectangle = GL_TRUE; -- cgit v1.2.3 From fda01b584715c05696a0e6768fda669ef1eb5f3b Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@localhost.localdomain> Date: Thu, 7 Aug 2008 11:26:17 +1000 Subject: nouveau: fix build --- src/gallium/drivers/nv04/nv04_fragprog.c | 4 ++-- src/gallium/drivers/nv04/nv04_state.c | 2 +- src/gallium/drivers/nv04/nv04_state.h | 2 +- src/gallium/drivers/nv10/nv10_fragprog.c | 4 ++-- src/gallium/drivers/nv10/nv10_state.c | 2 +- src/gallium/drivers/nv10/nv10_state.h | 2 +- src/gallium/drivers/nv30/nv30_fragprog.c | 4 ++-- src/gallium/drivers/nv30/nv30_state.c | 2 +- src/gallium/drivers/nv30/nv30_state.h | 2 +- src/gallium/drivers/nv30/nv30_vertprog.c | 2 +- src/gallium/drivers/nv40/nv40_fragprog.c | 4 ++-- src/gallium/drivers/nv40/nv40_state.c | 2 +- src/gallium/drivers/nv40/nv40_state.h | 2 +- src/gallium/drivers/nv40/nv40_vertprog.c | 4 ++-- src/gallium/drivers/nv50/nv50_program.c | 4 ++-- src/gallium/drivers/nv50/nv50_program.h | 2 +- src/gallium/drivers/nv50/nv50_state.c | 2 +- 17 files changed, 23 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c index 11f4360ed80..215974eec00 100644 --- a/src/gallium/drivers/nv04/nv04_fragprog.c +++ b/src/gallium/drivers/nv04/nv04_fragprog.c @@ -4,8 +4,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv04_context.h" diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index 7e71dee7b5d..668d875671f 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -4,7 +4,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv04_context.h" #include "nv04_state.h" diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h index 39f7cd17b30..399f750dbe7 100644 --- a/src/gallium/drivers/nv04/nv04_state.h +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -2,7 +2,7 @@ #define __NV04_STATE_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv04_blend_state { uint32_t b_enable; diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c index 2a63c8a704e..137de9d53ef 100644 --- a/src/gallium/drivers/nv10/nv10_fragprog.c +++ b/src/gallium/drivers/nv10/nv10_fragprog.c @@ -4,8 +4,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv10_context.h" diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 43b9c32f12d..f902fd54b66 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -4,7 +4,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv10_context.h" #include "nv10_state.h" diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h index f1f9a12110f..3a3fd0d4f4f 100644 --- a/src/gallium/drivers/nv10/nv10_state.h +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -2,7 +2,7 @@ #define __NV10_STATE_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv10_blend_state { uint32_t b_enable; diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 59e79d66f2a..68058264e39 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -3,8 +3,8 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv30_context.h" diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index ba02413de5c..8d88d6c806e 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -3,7 +3,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv30_context.h" #include "nv30_state.h" diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h index 20c51761607..e6f23bf1667 100644 --- a/src/gallium/drivers/nv30/nv30_state.h +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -2,7 +2,7 @@ #define __NV30_STATE_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv30_sampler_state { uint32_t fmt; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 39852ce9834..6c075cdb752 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -3,7 +3,7 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv30_context.h" #include "nv30_state.h" diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 428348c3388..a361509e8f1 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -3,8 +3,8 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv40_context.h" diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 5d2c3ab8810..63d0ecc9158 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -5,7 +5,7 @@ #include "draw/draw_context.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv40_context.h" #include "nv40_state.h" diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index 2b4225deb2e..8a9d8c8fdf6 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -2,7 +2,7 @@ #define __NV40_STATE_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv40_sampler_state { uint32_t fmt; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 1e486a66ef0..ff988e6a5f4 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -3,8 +3,8 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv40_context.h" #include "nv40_state.h" diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 0d3ddb8a596..d6fbdd18243 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -26,8 +26,8 @@ #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "nv50_context.h" diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index d643e8db218..78deed6a384 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -2,7 +2,7 @@ #define __NV50_PROGRAM_H__ #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct nv50_program_exec { struct nv50_program_exec *next; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 731409bed4e..4055527d9f8 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -25,7 +25,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "nv50_context.h" #include "nv50_texture.h" -- cgit v1.2.3 From ce8e846ffea8e1a11b8ae4ba05a7386e7c34cc9f Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@localhost.localdomain> Date: Thu, 7 Aug 2008 11:38:56 +1000 Subject: nv40/nv50: enable mirror wrap modes --- src/gallium/drivers/nv40/nv40_screen.c | 3 +++ src/gallium/drivers/nv50/nv50_screen.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index a63a6136c52..0e1df89ee85 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -57,6 +57,9 @@ nv40_screen_get_param(struct pipe_screen *pscreen, int param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; case NOUVEAU_CAP_HW_VTXBUF: return 1; case NOUVEAU_CAP_HW_IDXBUF: diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 996b0b3e8f2..ec439239298 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -114,6 +114,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; case NOUVEAU_CAP_HW_VTXBUF: return 1; case NOUVEAU_CAP_HW_IDXBUF: -- cgit v1.2.3 From d50d7a54de89e602a9951264878dfe06924e1adb Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Thu, 7 Aug 2008 16:22:34 +0200 Subject: softpipe: Silence compiler warnings on Windows. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index ed150527e2d..8e392695336 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -601,9 +601,9 @@ get_texel(struct tgsi_sampler *sampler, unsigned face, unsigned level, int x, int y, int z, float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) { - if (x < 0 || x >= sampler->texture->width[level] || - y < 0 || y >= sampler->texture->height[level] || - z < 0 || z >= sampler->texture->depth[level]) { + if (x < 0 || x >= (int) sampler->texture->width[level] || + y < 0 || y >= (int) sampler->texture->height[level] || + z < 0 || z >= (int) sampler->texture->depth[level]) { rgba[0][j] = sampler->state->border_color[0]; rgba[1][j] = sampler->state->border_color[1]; rgba[2][j] = sampler->state->border_color[2]; -- cgit v1.2.3 From 35355f7610b69dcd2fdba451db4554478fe0acaa Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Wed, 6 Aug 2008 21:36:25 +0100 Subject: trace: New pipe driver to trace incoming calls. Only pipe_screen calls traced, and only linux supported, for now. --- SConstruct | 2 +- src/gallium/drivers/trace/README | 37 ++++ src/gallium/drivers/trace/SConscript | 15 ++ src/gallium/drivers/trace/tr_context.c | 41 ++++ src/gallium/drivers/trace/tr_context.h | 58 +++++ src/gallium/drivers/trace/tr_dump.c | 329 ++++++++++++++++++++++++++++ src/gallium/drivers/trace/tr_dump.h | 102 +++++++++ src/gallium/drivers/trace/tr_screen.c | 385 +++++++++++++++++++++++++++++++++ src/gallium/drivers/trace/tr_screen.h | 65 ++++++ src/gallium/drivers/trace/tr_state.c | 72 ++++++ src/gallium/drivers/trace/tr_state.h | 44 ++++ src/gallium/drivers/trace/tr_stream.c | 100 +++++++++ src/gallium/drivers/trace/tr_stream.h | 52 +++++ 13 files changed, 1301 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/trace/README create mode 100644 src/gallium/drivers/trace/SConscript create mode 100644 src/gallium/drivers/trace/tr_context.c create mode 100644 src/gallium/drivers/trace/tr_context.h create mode 100644 src/gallium/drivers/trace/tr_dump.c create mode 100644 src/gallium/drivers/trace/tr_dump.h create mode 100644 src/gallium/drivers/trace/tr_screen.c create mode 100644 src/gallium/drivers/trace/tr_screen.h create mode 100644 src/gallium/drivers/trace/tr_state.c create mode 100644 src/gallium/drivers/trace/tr_state.h create mode 100644 src/gallium/drivers/trace/tr_stream.c create mode 100644 src/gallium/drivers/trace/tr_stream.h (limited to 'src/gallium/drivers') diff --git a/SConstruct b/SConstruct index fe27ceafe7b..ee96bc5ab22 100644 --- a/SConstruct +++ b/SConstruct @@ -46,7 +46,7 @@ common.AddOptions(opts) opts.Add(ListOption('statetrackers', 'state trackers to build', default_statetrackers, ['mesa', 'python'])) opts.Add(ListOption('drivers', 'pipe drivers to build', default_drivers, - ['softpipe', 'failover', 'i915simple', 'i965simple', 'cell'])) + ['softpipe', 'failover', 'i915simple', 'i965simple', 'cell', 'trace'])) opts.Add(ListOption('winsys', 'winsys drivers to build', default_winsys, ['xlib', 'intel', 'gdi'])) diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README new file mode 100644 index 00000000000..81da610bd5e --- /dev/null +++ b/src/gallium/drivers/trace/README @@ -0,0 +1,37 @@ +This directory contains a Gallium3D pipe driver which traces all incoming calls. + +To build, invoke scons on the top dir as + + scons statetrackers=mesa drivers=softpipe,i915simple,trace winsys=xlib + +To use do + + ln -s libGL.so build/linux-x86-debug/gallium/winsys/xlib/libGL.so.1 + export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/gallium/winsys/xlib + export GALLIUM_TRACE=y + +ensure the right libGL.so is being picked by doing + + ldd `which glxingo` + +and then try running + + glxinfo + +which should create a gallium.*.trace file, which is an XML file. You can view +copying trace.xsl and trace.css to the same directory, and opening with a +XSLT capable browser like Firefox or Internet Explorer. It often happens that +the trace file was not properly terminated, and a + + </trace> + +closing tag is missing from the file end. Add it before try to open or +further transform it by doing + + echo '</trace>' >> gallium.??.trace + + +This is still work in progress. + +-- +Jose Fonseca <jrfonseca@tungstengraphics.com> diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript new file mode 100644 index 00000000000..30225b5a54c --- /dev/null +++ b/src/gallium/drivers/trace/SConscript @@ -0,0 +1,15 @@ +Import('*') + +env = env.Clone() + +trace = env.ConvenienceLibrary( + target = 'trace', + source = [ + 'tr_context.c', + 'tr_dump.c', + 'tr_screen.c', + 'tr_state.c', + 'tr_stream.c', + ]) + +Export('trace') \ No newline at end of file diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c new file mode 100644 index 00000000000..67a67b3da62 --- /dev/null +++ b/src/gallium/drivers/trace/tr_context.c @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" + +#include "tr_stream.h" +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_context.h" + + +struct pipe_context * +trace_context_create(struct pipe_context *pipe) +{ + /* TODO */ + return pipe; +} diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h new file mode 100644 index 00000000000..75b8d11a7ac --- /dev/null +++ b/src/gallium/drivers/trace/tr_context.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_CONTEXT_H_ +#define TR_CONTEXT_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_context.h" + + +struct trace_context +{ + struct pipe_context base; + + /* TODO */ +}; + + +static INLINE struct trace_context * +trace_context(struct pipe_context *context) +{ + assert(context); + return (struct trace_context *)context; +} + + + +struct pipe_context * +trace_context_create(struct pipe_context *context); + + +#endif /* TR_CONTEXT_H_ */ diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c new file mode 100644 index 00000000000..30c09863634 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump.c @@ -0,0 +1,329 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Trace dumping functions. + * + * For now we just use standard XML for dumping the trace calls, as this is + * simple to write, parse, and visually inspect, but the actual representation + * is abstracted out of this file, so that we can switch to a binary + * representation if/when it becomes justified. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "util/u_string.h" + +#include "tr_stream.h" +#include "tr_dump.h" + + +static INLINE void +trace_dump_write(struct trace_stream *stream, const char *s) +{ + trace_stream_write(stream, s, strlen(s)); +} + + +static INLINE void +trace_dump_writef(struct trace_stream *stream, const char *format, ...) +{ + char buf[1024]; + va_list ap; + va_start(ap, format); + util_vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + trace_dump_write(stream, buf); +} + + +static INLINE void +trace_dump_escape(struct trace_stream *stream, const char *str) +{ + const unsigned char *p = (const unsigned char *)str; + unsigned char c; + while((c = *p++) != 0) { + if(c == '<') + trace_dump_write(stream, "<"); + else if(c == '>') + trace_dump_write(stream, ">"); + else if(c == '&') + trace_dump_write(stream, "&"); + else if(c == '\'') + trace_dump_write(stream, "'"); + else if(c == '\"') + trace_dump_write(stream, """); + else if(c >= 0x20 && c <= 0x7e) + trace_dump_writef(stream, "%c", c); + else + trace_dump_writef(stream, "&#%u;", c); + } +} + + +static INLINE void +trace_dump_indent(struct trace_stream *stream, unsigned level) +{ + unsigned i; + for(i = 0; i < level; ++i) + trace_dump_write(stream, "\t"); +} + + +static INLINE void +trace_dump_newline(struct trace_stream *stream) +{ + trace_dump_write(stream, "\n"); +} + + +static INLINE void +trace_dump_tag(struct trace_stream *stream, + const char *name) +{ + trace_dump_write(stream, "<"); + trace_dump_write(stream, name); + trace_dump_write(stream, "/>"); +} + + +static INLINE void +trace_dump_tag_begin(struct trace_stream *stream, + const char *name) +{ + trace_dump_write(stream, "<"); + trace_dump_write(stream, name); + trace_dump_write(stream, ">"); +} + +static INLINE void +trace_dump_tag_begin1(struct trace_stream *stream, + const char *name, + const char *attr1, const char *value1) +{ + trace_dump_write(stream, "<"); + trace_dump_write(stream, name); + trace_dump_write(stream, " "); + trace_dump_write(stream, attr1); + trace_dump_write(stream, "='"); + trace_dump_escape(stream, value1); + trace_dump_write(stream, "'>"); +} + + +static INLINE void +trace_dump_tag_begin2(struct trace_stream *stream, + const char *name, + const char *attr1, const char *value1, + const char *attr2, const char *value2) +{ + trace_dump_write(stream, "<"); + trace_dump_write(stream, name); + trace_dump_write(stream, " "); + trace_dump_write(stream, attr1); + trace_dump_write(stream, "=\'"); + trace_dump_escape(stream, value1); + trace_dump_write(stream, "\' "); + trace_dump_write(stream, attr2); + trace_dump_write(stream, "=\'"); + trace_dump_escape(stream, value2); + trace_dump_write(stream, "\'>"); +} + + +static INLINE void +trace_dump_tag_begin3(struct trace_stream *stream, + const char *name, + const char *attr1, const char *value1, + const char *attr2, const char *value2, + const char *attr3, const char *value3) +{ + trace_dump_write(stream, "<"); + trace_dump_write(stream, name); + trace_dump_write(stream, " "); + trace_dump_write(stream, attr1); + trace_dump_write(stream, "=\'"); + trace_dump_escape(stream, value1); + trace_dump_write(stream, "\' "); + trace_dump_write(stream, attr2); + trace_dump_write(stream, "=\'"); + trace_dump_escape(stream, value2); + trace_dump_write(stream, "\' "); + trace_dump_write(stream, attr3); + trace_dump_write(stream, "=\'"); + trace_dump_escape(stream, value3); + trace_dump_write(stream, "\'>"); +} + + +static INLINE void +trace_dump_tag_end(struct trace_stream *stream, + const char *name) +{ + trace_dump_write(stream, "</"); + trace_dump_write(stream, name); + trace_dump_write(stream, ">"); +} + + +void trace_dump_trace_begin(struct trace_stream *stream, + unsigned version) +{ + trace_dump_write(stream, "<?xml version='1.0' encoding='UTF-8'?>\n"); + trace_dump_write(stream, "<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); + trace_dump_writef(stream, "<trace version='%u'>\n", version); +} + + +void trace_dump_trace_end(struct trace_stream *stream) +{ + trace_dump_write(stream, "</trace>\n"); +} + +void trace_dump_call_begin(struct trace_stream *stream, + const char *klass, const char *method) +{ + trace_dump_indent(stream, 1); + trace_dump_tag_begin2(stream, "call", "class", klass, "method", method); + trace_dump_newline(stream); +} + +void trace_dump_call_end(struct trace_stream *stream) +{ + trace_dump_indent(stream, 1); + trace_dump_tag_end(stream, "call"); + trace_dump_newline(stream); +} + +void trace_dump_arg_begin(struct trace_stream *stream, + const char *name) +{ + trace_dump_indent(stream, 2); + trace_dump_tag_begin1(stream, "arg", "name", name); +} + +void trace_dump_arg_end(struct trace_stream *stream) +{ + trace_dump_tag_end(stream, "arg"); + trace_dump_newline(stream); +} + +void trace_dump_ret_begin(struct trace_stream *stream) +{ + trace_dump_indent(stream, 2); + trace_dump_tag_begin(stream, "ret"); +} + +void trace_dump_ret_end(struct trace_stream *stream) +{ + trace_dump_tag_end(stream, "ret"); + trace_dump_newline(stream); +} + +void trace_dump_bool(struct trace_stream *stream, + int value) +{ + trace_dump_writef(stream, "<bool>%c</bool>", value ? '1' : '0'); +} + +void trace_dump_int(struct trace_stream *stream, + long int value) +{ + trace_dump_writef(stream, "<int>%li</int>", value); +} + +void trace_dump_uint(struct trace_stream *stream, + long unsigned value) +{ + trace_dump_writef(stream, "<uint>%lu</uint>", value); +} + +void trace_dump_float(struct trace_stream *stream, + double value) +{ + trace_dump_writef(stream, "<float>%g</float>", value); +} + +void trace_dump_string(struct trace_stream *stream, + const char *str) +{ + trace_dump_write(stream, "<string>"); + trace_dump_escape(stream, str); + trace_dump_write(stream, "</string>"); +} + +void trace_dump_array_begin(struct trace_stream *stream) +{ + trace_dump_write(stream, "<array>"); +} + +void trace_dump_array_end(struct trace_stream *stream) +{ + trace_dump_write(stream, "</array>"); +} + +void trace_dump_elem_begin(struct trace_stream *stream) +{ + trace_dump_write(stream, "<elem>"); +} + +void trace_dump_elem_end(struct trace_stream *stream) +{ + trace_dump_write(stream, "</elem>"); +} + +void trace_dump_struct_begin(struct trace_stream *stream, + const char *name) +{ + trace_dump_writef(stream, "<struct name='%s'>", name); +} + +void trace_dump_struct_end(struct trace_stream *stream) +{ + trace_dump_write(stream, "</struct>"); +} + +void trace_dump_member_begin(struct trace_stream *stream, + const char *name) +{ + trace_dump_writef(stream, "<member name='%s'>", name); +} + +void trace_dump_member_end(struct trace_stream *stream) +{ + trace_dump_write(stream, "</member>"); +} + +void trace_dump_ptr(struct trace_stream *stream, + const void *value) +{ + trace_dump_writef(stream, "<ptr>%p</ptr>", value); +} diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h new file mode 100644 index 00000000000..f0864865912 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump.h @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation streams (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Trace data dumping primitives. + */ + +#ifndef TR_DUMP_H +#define TR_DUMP_H + + +struct trace_stream; + + +void trace_dump_trace_begin(struct trace_stream *stream, unsigned version); +void trace_dump_trace_end(struct trace_stream *stream); +void trace_dump_call_begin(struct trace_stream *stream, const char *klass, const char *method); +void trace_dump_call_end(struct trace_stream *stream); +void trace_dump_arg_begin(struct trace_stream *stream, const char *name); +void trace_dump_arg_end(struct trace_stream *stream); +void trace_dump_ret_begin(struct trace_stream *stream); +void trace_dump_ret_end(struct trace_stream *stream); +void trace_dump_bool(struct trace_stream *stream, int value); +void trace_dump_int(struct trace_stream *stream, long int value); +void trace_dump_uint(struct trace_stream *stream, long unsigned value); +void trace_dump_float(struct trace_stream *stream, double value); +void trace_dump_string(struct trace_stream *stream, const char *str); +void trace_dump_array_begin(struct trace_stream *stream); +void trace_dump_array_end(struct trace_stream *stream); +void trace_dump_elem_begin(struct trace_stream *stream); +void trace_dump_elem_end(struct trace_stream *stream); +void trace_dump_struct_begin(struct trace_stream *stream, const char *name); +void trace_dump_struct_end(struct trace_stream *stream); +void trace_dump_member_begin(struct trace_stream *stream, const char *name); +void trace_dump_member_end(struct trace_stream *stream); +void trace_dump_ptr(struct trace_stream *stream, const void *value); + + +/* + * Code saving macros. + */ + +#define trace_dump_arg(_stream, _type, _arg) \ + do { \ + trace_dump_arg_begin(_stream, #_arg); \ + trace_dump_##_type(_stream, _arg); \ + trace_dump_arg_end(_stream); \ + } while(0) + +#define trace_dump_ret(_stream, _type, _arg) \ + do { \ + trace_dump_ret_begin(_stream); \ + trace_dump_##_type(_stream, _arg); \ + trace_dump_ret_end(_stream); \ + } while(0) + +#define trace_dump_array(_stream, _type, _obj, _size) \ + do { \ + unsigned long idx; \ + trace_dump_array_begin(_stream); \ + for(idx = 0; idx < _size; ++idx) { \ + trace_dump_elem_begin(_stream); \ + trace_dump_##_type(_stream, _obj[idx]); \ + trace_dump_elem_end(_stream); \ + } \ + trace_dump_array_end(_stream); \ + } while(0) + +#define trace_dump_member(_stream, _type, _obj, _member) \ + do { \ + trace_dump_member_begin(_stream, #_member); \ + trace_dump_##_type(_stream, _obj->_member); \ + trace_dump_member_end(_stream); \ + } while(0) + + +#endif /* TR_DUMP_H */ diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c new file mode 100644 index 00000000000..20a2026e1d0 --- /dev/null +++ b/src/gallium/drivers/trace/tr_screen.c @@ -0,0 +1,385 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" + +#include "tr_stream.h" +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_screen.h" + + +static const char * +trace_screen_get_name(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + const char *result; + + trace_dump_call_begin(stream, "pipe_screen", "get_name"); + + trace_dump_arg(stream, ptr, screen); + + result = screen->get_name(screen); + + trace_dump_ret(stream, string, result); + + trace_dump_call_end(stream); + + return result; +} + + +static const char * +trace_screen_get_vendor(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + const char *result; + + trace_dump_call_begin(stream, "pipe_screen", "get_vendor"); + + trace_dump_arg(stream, ptr, screen); + + result = screen->get_vendor(screen); + + trace_dump_ret(stream, string, result); + + trace_dump_call_end(stream); + + return result; +} + + +static int +trace_screen_get_param(struct pipe_screen *_screen, + int param) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + int result; + + trace_dump_call_begin(stream, "pipe_screen", "get_param"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, int, param); + + result = screen->get_param(screen, param); + + trace_dump_ret(stream, int, result); + + trace_dump_call_end(stream); + + return result; +} + + +static float +trace_screen_get_paramf(struct pipe_screen *_screen, + int param) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + float result; + + trace_dump_call_begin(stream, "pipe_screen", "get_paramf"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, int, param); + + result = screen->get_paramf(screen, param); + + trace_dump_ret(stream, float, result); + + trace_dump_call_end(stream); + + return result; +} + + +static boolean +trace_screen_is_format_supported(struct pipe_screen *_screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + boolean result; + + trace_dump_call_begin(stream, "pipe_screen", "is_format_supported"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, int, format); + trace_dump_arg(stream, int, target); + trace_dump_arg(stream, uint, tex_usage); + trace_dump_arg(stream, uint, geom_flags); + + result = screen->is_format_supported(screen, format, target, tex_usage, geom_flags); + + trace_dump_ret(stream, bool, result); + + trace_dump_call_end(stream); + + return result; +} + + +static struct pipe_texture * +trace_screen_texture_create(struct pipe_screen *_screen, + const struct pipe_texture *templat) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + struct pipe_texture *result; + + trace_dump_call_begin(stream, "pipe_screen", "texture_create"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, template, templat); + + result = screen->texture_create(screen, templat); + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static struct pipe_texture * +trace_screen_texture_blanket(struct pipe_screen *_screen, + const struct pipe_texture *templat, + const unsigned *ppitch, + struct pipe_buffer *buffer) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + unsigned pitch = *ppitch; + struct pipe_texture *result; + + trace_dump_call_begin(stream, "pipe_screen", "texture_blanket"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, template, templat); + trace_dump_arg(stream, uint, pitch); + trace_dump_arg(stream, ptr, buffer); + + result = screen->texture_blanket(screen, templat, ppitch, buffer); + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static void +trace_screen_texture_release(struct pipe_screen *_screen, + struct pipe_texture **ptexture) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + struct pipe_texture *texture = *ptexture; + + trace_dump_call_begin(stream, "pipe_screen", "texture_release"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, ptr, texture); + + screen->texture_release(screen, ptexture); + + trace_dump_call_end(stream); +} + +static struct pipe_surface * +trace_screen_get_tex_surface(struct pipe_screen *_screen, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + struct pipe_surface *result; + + trace_dump_call_begin(stream, "pipe_screen", "get_tex_surface"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, ptr, texture); + trace_dump_arg(stream, uint, face); + trace_dump_arg(stream, uint, level); + trace_dump_arg(stream, uint, zslice); + trace_dump_arg(stream, uint, usage); + + result = screen->get_tex_surface(screen, texture, face, level, zslice, usage); + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static void +trace_screen_tex_surface_release(struct pipe_screen *_screen, + struct pipe_surface **psurface) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + struct pipe_surface *surface = *psurface; + + trace_dump_call_begin(stream, "pipe_screen", "tex_surface_release"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, ptr, surface); + + screen->tex_surface_release(screen, psurface); + + trace_dump_call_end(stream); +} + + +static void * +trace_screen_surface_map(struct pipe_screen *_screen, + struct pipe_surface *surface, + unsigned flags) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + struct pipe_surface *result; + + trace_dump_call_begin(stream, "pipe_screen", "surface_map"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, ptr, surface); + trace_dump_arg(stream, uint, flags); + + result = screen->surface_map(screen, surface, flags); + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static void +trace_screen_surface_unmap(struct pipe_screen *_screen, + struct pipe_surface *surface) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin(stream, "pipe_screen", "surface_unmap"); + + trace_dump_arg(stream, ptr, screen); + trace_dump_arg(stream, ptr, surface); + + screen->surface_unmap(screen, surface); + + trace_dump_call_end(stream); +} + + +static void +trace_screen_destroy(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin(stream, "pipe_screen", "destroy"); + + trace_dump_arg(stream, ptr, screen); + + screen->destroy(screen); + + trace_dump_call_end(stream); + + trace_dump_trace_end(stream); + + trace_stream_close(tr_scr->stream); + + FREE(tr_scr); +} + + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen) +{ + struct trace_screen *tr_scr; + + if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) + return screen; + + tr_scr = CALLOC_STRUCT(trace_screen); + if(!tr_scr) + return NULL; + + tr_scr->base.winsys = screen->winsys; + tr_scr->base.destroy = trace_screen_destroy; + tr_scr->base.get_name = trace_screen_get_name; + tr_scr->base.get_vendor = trace_screen_get_vendor; + tr_scr->base.get_param = trace_screen_get_param; + tr_scr->base.get_paramf = trace_screen_get_paramf; + tr_scr->base.is_format_supported = trace_screen_is_format_supported; + tr_scr->base.texture_create = trace_screen_texture_create; + tr_scr->base.texture_blanket = trace_screen_texture_blanket; + tr_scr->base.texture_release = trace_screen_texture_release; + tr_scr->base.get_tex_surface = trace_screen_get_tex_surface; + tr_scr->base.tex_surface_release = trace_screen_tex_surface_release; + tr_scr->base.surface_map = trace_screen_surface_map; + tr_scr->base.surface_unmap = trace_screen_surface_unmap; + + tr_scr->screen = screen; + + tr_scr->stream = trace_stream_create("gallium", "trace"); + if(!tr_scr->stream) + return NULL; + + trace_dump_trace_begin(tr_scr->stream, 0); + + return &tr_scr->base; +} diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h new file mode 100644 index 00000000000..5d7d667f719 --- /dev/null +++ b/src/gallium/drivers/trace/tr_screen.h @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_SCREEN_H_ +#define TR_SCREEN_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_screen.h" + + +struct trace_stream; + + +struct trace_screen +{ + struct pipe_screen base; + + struct pipe_screen *screen; + + struct trace_stream *stream; + + unsigned event_no; +}; + + +static INLINE struct trace_screen * +trace_screen(struct pipe_screen *screen) +{ + assert(screen); + return (struct trace_screen *)screen; +} + + + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen); + + +#endif /* TR_SCREEN_H_ */ diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c new file mode 100644 index 00000000000..1e0e87f7fd8 --- /dev/null +++ b/src/gallium/drivers/trace/tr_state.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "tr_dump.h" +#include "tr_state.h" + + +void trace_dump_block(struct trace_stream *stream, + const struct pipe_format_block *block) +{ + trace_dump_struct_begin(stream, "pipe_format_block"); + trace_dump_member(stream, uint, block, size); + trace_dump_member(stream, uint, block, width); + trace_dump_member(stream, uint, block, height); + trace_dump_struct_end(stream); +} + + +void trace_dump_template(struct trace_stream *stream, + const struct pipe_texture *templat) +{ + trace_dump_struct_begin(stream, "pipe_texture"); + + trace_dump_member(stream, int, templat, target); + trace_dump_member(stream, int, templat, format); + + trace_dump_member_begin(stream, "width"); + trace_dump_array(stream, uint, templat->width, 1); + trace_dump_member_end(stream); + + trace_dump_member_begin(stream, "height"); + trace_dump_array(stream, uint, templat->height, 1); + trace_dump_member_end(stream); + + trace_dump_member_begin(stream, "block"); + trace_dump_block(stream, &templat->block); + trace_dump_member_end(stream); + + trace_dump_member(stream, uint, templat, last_level); + trace_dump_member(stream, uint, templat, tex_usage); + + trace_dump_struct_end(stream); +} + diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h new file mode 100644 index 00000000000..e52524ee90f --- /dev/null +++ b/src/gallium/drivers/trace/tr_state.h @@ -0,0 +1,44 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation streams (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_STATE_H +#define TR_STATE_H + + +struct trace_stream; + +struct pipe_format_block; +struct pipe_texture; + +void trace_dump_block(struct trace_stream *stream, + const struct pipe_format_block *block); + +void trace_dump_template(struct trace_stream *stream, + const struct pipe_texture *templat); + + +#endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/tr_stream.c b/src/gallium/drivers/trace/tr_stream.c new file mode 100644 index 00000000000..14cc257e154 --- /dev/null +++ b/src/gallium/drivers/trace/tr_stream.c @@ -0,0 +1,100 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <stdio.h> +#else +#error Unsupported platform +#endif + +#include "pipe/p_util.h" + +#include "tr_stream.h" + + +struct trace_stream +{ +#if defined(PIPE_OS_LINUX) + FILE *file; +#endif +}; + + +struct trace_stream * +trace_stream_create(const char *name, const char *ext) +{ + struct trace_stream *stream; + static unsigned file_no = 0; + char filename[1024]; + + stream = CALLOC_STRUCT(trace_stream); + if(!stream) + goto error1; + + snprintf(filename, sizeof(filename), "%s.%u.%s", name, file_no, ext); + +#if defined(PIPE_OS_LINUX) + stream->file = fopen(filename, "w"); + if(!stream->file) + goto error2; +#endif + + return stream; + +error2: + FREE(stream); +error1: + return NULL; +} + + +boolean +trace_stream_write(struct trace_stream *stream, const void *data, size_t size) +{ + if(!stream) + return FALSE; + +#if defined(PIPE_OS_LINUX) + return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; +#endif +} + + +void +trace_stream_close(struct trace_stream *stream) +{ + if(!stream) + return; + +#if defined(PIPE_OS_LINUX) + fclose(stream->file); +#endif + + FREE(stream); +} diff --git a/src/gallium/drivers/trace/tr_stream.h b/src/gallium/drivers/trace/tr_stream.h new file mode 100644 index 00000000000..d50fed26917 --- /dev/null +++ b/src/gallium/drivers/trace/tr_stream.h @@ -0,0 +1,52 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation streams (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Cross-platform sequential access stream abstraction. + */ + +#ifndef TR_STREAM_H +#define TR_STREAM_H + + +#include "pipe/p_compiler.h" + +struct trace_stream; + + +struct trace_stream * +trace_stream_create(const char *name, const char *ext); + +boolean +trace_stream_write(struct trace_stream *stream, const void *data, size_t size); + +void +trace_stream_close(struct trace_stream *stream); + + +#endif /* TR_STREAM_H */ -- cgit v1.2.3 From f6e0514736bb763813a49ae5542b2c8648641595 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Wed, 6 Aug 2008 21:39:25 +0100 Subject: trace: Add missing XSL and CSS. --- src/gallium/drivers/trace/trace.css | 72 +++++++++++++++++++++++ src/gallium/drivers/trace/trace.xsl | 114 ++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 src/gallium/drivers/trace/trace.css create mode 100644 src/gallium/drivers/trace/trace.xsl (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/trace.css b/src/gallium/drivers/trace/trace.css new file mode 100644 index 00000000000..d92c4337910 --- /dev/null +++ b/src/gallium/drivers/trace/trace.css @@ -0,0 +1,72 @@ +/**************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc. + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ****************************************************************************/ + +body { + font-family: verdana, sans-serif; + font-size: 11px; + font-weight: normal; + text-align : left; +} + +ul.calls { + list-style: none; + margin-left: 0px; + padding-left: 0px; +} + +ul.args { + display:inline; + list-style: none; + margin-left: 0px; + padding-left: 0px; +} + +ul.args li { + display:inline; +} + +ul.elems { + list-style: none; + margin-left: 2em; + padding-left: 0px; +} + +ul.elems li { + display:block; +} + +.fun { + font-weight: bold; +} + +.var { + font-style: italic; +} + +.typ { + display: none; +} + +.lit { + color: #0000ff; +} + +.ptr { + color: #008000; +} diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl new file mode 100644 index 00000000000..ace4020cf94 --- /dev/null +++ b/src/gallium/drivers/trace/trace.xsl @@ -0,0 +1,114 @@ +<?xml version="1.0"?> + +<!-- + +Copyright 2008 Tungsten Graphics, Inc. + +This program is free software: you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +!--> + +<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + + <xsl:output method="html" /> + + <xsl:strip-space elements="*" /> + + <xsl:template match="/trace"> + <html> + <head> + <title>Gallium Trace</title> + <link rel="stylesheet" type="text/css" href="trace.css"/> + </head> + <body> + <ul class="calls"> + <xsl:apply-templates/> + </ul> + </body> + </html> + </xsl:template> + + <xsl:template match="call"> + <li> + <span class="fun"> + <xsl:value-of select="@class"/> + <xsl:text>::</xsl:text> + <xsl:value-of select="@method"/> + </span> + <xsl:text>(</xsl:text> + <ul class="args"> + <xsl:apply-templates select="arg"/> + </ul> + <xsl:text>)</xsl:text> + <xsl:apply-templates select="ret"/> + </li> + </xsl:template> + + <xsl:template match="arg|member"> + <li> + <xsl:apply-templates select="@name"/> + <xsl:text> = </xsl:text> + <xsl:apply-templates /> + <xsl:if test="position() != last()"> + <xsl:text>, </xsl:text> + </xsl:if> + </li> + </xsl:template> + + <xsl:template match="ret"> + <xsl:text> = </xsl:text> + <xsl:apply-templates /> + </xsl:template> + + <xsl:template match="bool|int|uint"> + <span class="lit"> + <xsl:value-of select="text()"/> + </span> + </xsl:template> + + <xsl:template match="string"> + <span class="lit"> + <xsl:text>"</xsl:text> + <xsl:value-of select="text()"/> + <xsl:text>"</xsl:text> + </span> + </xsl:template> + + <xsl:template match="array|struct"> + <xsl:text>{</xsl:text> + <xsl:apply-templates /> + <xsl:text>}</xsl:text> + </xsl:template> + + <xsl:template match="elem"> + <li> + <xsl:apply-templates /> + <xsl:if test="position() != last()"> + <xsl:text>, </xsl:text> + </xsl:if> + </li> + </xsl:template> + + <xsl:template match="ptr"> + <span class="ptr"> + <xsl:value-of select="text()"/> + </span> + </xsl:template> + + <xsl:template match="@name"> + <span class="var"> + <xsl:value-of select="."/> + </span> + </xsl:template> +</xsl:transform> -- cgit v1.2.3 From f2e19c34e06dfc33557a481f764fc75a5aef15ff Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 7 Aug 2008 13:20:02 +0100 Subject: trace: Trace pipe context calls. --- src/gallium/drivers/trace/tr_context.c | 1017 +++++++++++++++++++++++++++++++- src/gallium/drivers/trace/tr_context.h | 10 +- src/gallium/drivers/trace/tr_dump.h | 7 + 3 files changed, 1027 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 67a67b3da62..6161e35fa5e 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -30,12 +30,1025 @@ #include "tr_stream.h" #include "tr_dump.h" #include "tr_state.h" +#include "tr_screen.h" #include "tr_context.h" +static INLINE void +trace_context_set_edgeflags(struct pipe_context *_pipe, + const unsigned *bitfield) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_edgeflags"); + + trace_dump_arg(stream, ptr, pipe); + /* FIXME: we don't know how big this array is */ + trace_dump_arg(stream, ptr, bitfield); + + pipe->set_edgeflags(pipe, bitfield);; + + trace_dump_call_end(stream); +} + + +static INLINE boolean +trace_context_draw_arrays(struct pipe_context *_pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_dump_call_begin(stream, "pipe_context", "draw_arrays"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, uint, mode); + trace_dump_arg(stream, uint, start); + trace_dump_arg(stream, uint, count); + + result = pipe->draw_arrays(pipe, mode, start, count);; + + trace_dump_ret(stream, bool, result); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE boolean +trace_context_draw_elements(struct pipe_context *_pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_dump_call_begin(stream, "pipe_context", "draw_elements"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, indexBuffer); + trace_dump_arg(stream, uint, indexSize); + trace_dump_arg(stream, uint, mode); + trace_dump_arg(stream, uint, start); + trace_dump_arg(stream, uint, count); + + result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);; + + trace_dump_ret(stream, bool, result); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE boolean +trace_context_draw_range_elements(struct pipe_context *_pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_dump_call_begin(stream, "pipe_context", "draw_range_elements"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, indexBuffer); + trace_dump_arg(stream, uint, indexSize); + trace_dump_arg(stream, uint, minIndex); + trace_dump_arg(stream, uint, maxIndex); + trace_dump_arg(stream, uint, mode); + trace_dump_arg(stream, uint, start); + trace_dump_arg(stream, uint, count); + + result = pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, minIndex, maxIndex, + mode, start, count); + + trace_dump_ret(stream, bool, result); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE struct pipe_query * +trace_context_create_query(struct pipe_context *_pipe, + unsigned query_type) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_query *result; + + trace_dump_call_begin(stream, "pipe_context", "create_query"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, uint, query_type); + + result = pipe->create_query(pipe, query_type);; + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE void +trace_context_destroy_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "destroy_query"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, query); + + pipe->destroy_query(pipe, query);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_begin_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "begin_query"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, query); + + pipe->begin_query(pipe, query);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_end_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "end_query"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, query); + + pipe->end_query(pipe, query); + + trace_dump_call_end(stream); +} + + +static INLINE boolean +trace_context_get_query_result(struct pipe_context *_pipe, + struct pipe_query *query, + boolean wait, + uint64 *presult) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + uint64 result; + boolean _result; + + trace_dump_call_begin(stream, "pipe_context", "get_query_result"); + + trace_dump_arg(stream, ptr, pipe); + + _result = pipe->get_query_result(pipe, query, wait, presult);; + result = *presult; + + trace_dump_arg(stream, uint, result); + trace_dump_ret(stream, bool, _result); + + trace_dump_call_end(stream); + + return _result; +} + + +static INLINE void * +trace_context_create_blend_state(struct pipe_context *_pipe, + const struct pipe_blend_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin(stream, "pipe_context", "create_blend_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + result = pipe->create_blend_state(pipe, state);; + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE void +trace_context_bind_blend_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "bind_blend_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->bind_blend_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_delete_blend_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "delete_blend_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->delete_blend_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void * +trace_context_create_sampler_state(struct pipe_context *_pipe, + const struct pipe_sampler_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin(stream, "pipe_context", "create_sampler_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + result = pipe->create_sampler_state(pipe, state);; + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE void +trace_context_bind_sampler_states(struct pipe_context *_pipe, + unsigned num_states, void **states) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "bind_sampler_states"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, uint, num_states); + trace_dump_arg_array(stream, ptr, states, num_states); + + pipe->bind_sampler_states(pipe, num_states, states);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_delete_sampler_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "delete_sampler_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->delete_sampler_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void * +trace_context_create_rasterizer_state(struct pipe_context *_pipe, + const struct pipe_rasterizer_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin(stream, "pipe_context", "create_rasterizer_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + result = pipe->create_rasterizer_state(pipe, state);; + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE void +trace_context_bind_rasterizer_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "bind_rasterizer_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->bind_rasterizer_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_delete_rasterizer_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "delete_rasterizer_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->delete_rasterizer_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void * +trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin(stream, "pipe_context", "create_depth_stencil_alpha_state"); + + result = pipe->create_depth_stencil_alpha_state(pipe, state);; + + trace_dump_ret(stream, ptr, result); + trace_dump_arg(stream, ptr, state); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE void +trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "bind_depth_stencil_alpha_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->bind_depth_stencil_alpha_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "delete_depth_stencil_alpha_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->delete_depth_stencil_alpha_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void * +trace_context_create_fs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin(stream, "pipe_context", "create_fs_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + result = pipe->create_fs_state(pipe, state);; + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE void +trace_context_bind_fs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "bind_fs_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->bind_fs_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_delete_fs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "delete_fs_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->delete_fs_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void * +trace_context_create_vs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin(stream, "pipe_context", "create_vs_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + result = pipe->create_vs_state(pipe, state);; + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static INLINE void +trace_context_bind_vs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "bind_vs_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->bind_vs_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_delete_vs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "delete_vs_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->delete_vs_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_blend_color(struct pipe_context *_pipe, + const struct pipe_blend_color *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_blend_color"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->set_blend_color(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_clip_state(struct pipe_context *_pipe, + const struct pipe_clip_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_clip_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->set_clip_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_constant_buffer(struct pipe_context *_pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buffer) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_constant_buffer"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, uint, shader); + trace_dump_arg(stream, uint, index); + trace_dump_arg(stream, ptr, buffer); + + pipe->set_constant_buffer(pipe, shader, index, buffer);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_framebuffer_state(struct pipe_context *_pipe, + const struct pipe_framebuffer_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_framebuffer_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->set_framebuffer_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_polygon_stipple(struct pipe_context *_pipe, + const struct pipe_poly_stipple *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_polygon_stipple"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->set_polygon_stipple(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_scissor_state(struct pipe_context *_pipe, + const struct pipe_scissor_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_scissor_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->set_scissor_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_viewport_state(struct pipe_context *_pipe, + const struct pipe_viewport_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_viewport_state"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, state); + + pipe->set_viewport_state(pipe, state);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_sampler_textures"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, uint, num_textures); + trace_dump_arg_array(stream, ptr, textures, num_textures); + + pipe->set_sampler_textures(pipe, num_textures, textures);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_vertex_buffers(struct pipe_context *_pipe, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_vertex_buffers"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, uint, num_buffers); + trace_dump_arg(stream, ptr, buffers); + //trace_dump_arg_array(stream, ptr, buffers, num_buffers); + + pipe->set_vertex_buffers(pipe, num_buffers, buffers);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_set_vertex_elements(struct pipe_context *_pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "set_vertex_elements"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, uint, num_elements); + trace_dump_arg(stream, ptr, elements); + //trace_dump_arg_array(stream, ptr, elements, num_elements); + + pipe->set_vertex_elements(pipe, num_elements, elements);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_surface_copy(struct pipe_context *_pipe, + boolean do_flip, + struct pipe_surface *dest, + unsigned destx, unsigned desty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "surface_copy"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, bool, do_flip); + trace_dump_arg(stream, ptr, dest); + trace_dump_arg(stream, uint, destx); + trace_dump_arg(stream, uint, desty); + trace_dump_arg(stream, ptr, src); + trace_dump_arg(stream, uint, srcx); + trace_dump_arg(stream, uint, srcy); + trace_dump_arg(stream, uint, width); + trace_dump_arg(stream, uint, height); + + pipe->surface_copy(pipe, do_flip, + dest, destx, desty, + src, srcx, srcy, width, height); + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_surface_fill(struct pipe_context *_pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "surface_fill"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, dst); + trace_dump_arg(stream, uint, dstx); + trace_dump_arg(stream, uint, dsty); + trace_dump_arg(stream, uint, width); + trace_dump_arg(stream, uint, height); + + pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_clear(struct pipe_context *_pipe, + struct pipe_surface *surface, + unsigned clearValue) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "clear"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, ptr, surface); + trace_dump_arg(stream, uint, clearValue); + + pipe->clear(pipe, surface, clearValue);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_flush(struct pipe_context *_pipe, + unsigned flags, + struct pipe_fence_handle **fence) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "flush"); + + trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(stream, uint, flags); + trace_dump_arg(stream, ptr, fence); + + pipe->flush(pipe, flags, fence);; + + trace_dump_call_end(stream); +} + + +static INLINE void +trace_context_destroy(struct pipe_context *_pipe) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_screen *tr_scr = trace_screen(_pipe->screen); + struct trace_stream *stream = tr_scr->stream; + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin(stream, "pipe_context", "destroy"); + + trace_dump_arg(stream, ptr, pipe); + + pipe->destroy(pipe); + + trace_dump_call_end(stream); + + FREE(tr_ctx); +} + + struct pipe_context * trace_context_create(struct pipe_context *pipe) { - /* TODO */ - return pipe; + struct trace_context *tr_ctx; + + if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) + return pipe; + + tr_ctx = CALLOC_STRUCT(trace_context); + if(!tr_ctx) + return NULL; + + tr_ctx->base.winsys = pipe->winsys; + tr_ctx->base.screen = pipe->screen; + tr_ctx->base.destroy = trace_context_destroy; + tr_ctx->base.set_edgeflags = trace_context_set_edgeflags; + tr_ctx->base.draw_arrays = trace_context_draw_arrays; + tr_ctx->base.draw_elements = trace_context_draw_elements; + tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; + tr_ctx->base.create_query = trace_context_create_query; + tr_ctx->base.destroy_query = trace_context_destroy_query; + tr_ctx->base.begin_query = trace_context_begin_query; + tr_ctx->base.end_query = trace_context_end_query; + tr_ctx->base.get_query_result = trace_context_get_query_result; + tr_ctx->base.create_blend_state = trace_context_create_blend_state; + tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; + tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; + tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; + tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states; + tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; + tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; + tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; + tr_ctx->base.delete_rasterizer_state = trace_context_delete_rasterizer_state; + tr_ctx->base.create_depth_stencil_alpha_state = trace_context_create_depth_stencil_alpha_state; + tr_ctx->base.bind_depth_stencil_alpha_state = trace_context_bind_depth_stencil_alpha_state; + tr_ctx->base.delete_depth_stencil_alpha_state = trace_context_delete_depth_stencil_alpha_state; + tr_ctx->base.create_fs_state = trace_context_create_fs_state; + tr_ctx->base.bind_fs_state = trace_context_bind_fs_state; + tr_ctx->base.delete_fs_state = trace_context_delete_fs_state; + tr_ctx->base.create_vs_state = trace_context_create_vs_state; + tr_ctx->base.bind_vs_state = trace_context_bind_vs_state; + tr_ctx->base.delete_vs_state = trace_context_delete_vs_state; + tr_ctx->base.set_blend_color = trace_context_set_blend_color; + tr_ctx->base.set_clip_state = trace_context_set_clip_state; + tr_ctx->base.set_constant_buffer = trace_context_set_constant_buffer; + tr_ctx->base.set_framebuffer_state = trace_context_set_framebuffer_state; + tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; + tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; + tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; + tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures; + tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; + tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; + tr_ctx->base.surface_copy = trace_context_surface_copy; + tr_ctx->base.surface_fill = trace_context_surface_fill; + tr_ctx->base.clear = trace_context_clear; + tr_ctx->base.flush = trace_context_flush; + + tr_ctx->pipe = pipe; + + return &tr_ctx->base; } diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 75b8d11a7ac..80fb5980d6a 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -38,21 +38,21 @@ struct trace_context { struct pipe_context base; - /* TODO */ + struct pipe_context *pipe; }; static INLINE struct trace_context * -trace_context(struct pipe_context *context) +trace_context(struct pipe_context *pipe) { - assert(context); - return (struct trace_context *)context; + assert(pipe); + return (struct trace_context *)pipe; } struct pipe_context * -trace_context_create(struct pipe_context *context); +trace_context_create(struct pipe_context *pipe); #endif /* TR_CONTEXT_H_ */ diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index f0864865912..0be0812ce81 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -91,6 +91,13 @@ void trace_dump_ptr(struct trace_stream *stream, const void *value); trace_dump_array_end(_stream); \ } while(0) +#define trace_dump_arg_array(_stream, _type, _arg, _size) \ + do { \ + trace_dump_arg_begin(_stream, #_arg); \ + trace_dump_array(_stream, _type, _arg, _size); \ + trace_dump_arg_end(_stream); \ + } while(0) + #define trace_dump_member(_stream, _type, _obj, _member) \ do { \ trace_dump_member_begin(_stream, #_member); \ -- cgit v1.2.3 From 9dee60969df7ff263e05430e69ef26982fe2bd94 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 7 Aug 2008 18:55:05 +0100 Subject: trace: Dump state. --- src/gallium/drivers/trace/tr_context.c | 37 +-- src/gallium/drivers/trace/tr_dump.c | 10 +- src/gallium/drivers/trace/tr_dump.h | 31 ++- src/gallium/drivers/trace/tr_state.c | 433 ++++++++++++++++++++++++++++++++- src/gallium/drivers/trace/tr_state.h | 57 ++++- src/gallium/drivers/trace/trace.xsl | 6 + 6 files changed, 549 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 6161e35fa5e..ee8ad5eb97c 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -280,7 +280,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "create_blend_state"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, blend_state, state); result = pipe->create_blend_state(pipe, state);; @@ -347,7 +347,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, result = pipe->create_sampler_state(pipe, state);; - trace_dump_ret(stream, ptr, result); + trace_dump_ret(stream, sampler_state, result); trace_dump_call_end(stream); @@ -409,7 +409,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "create_rasterizer_state"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, rasterizer_state, state); result = pipe->create_rasterizer_state(pipe, state);; @@ -476,7 +476,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, result = pipe->create_depth_stencil_alpha_state(pipe, state);; trace_dump_ret(stream, ptr, result); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, depth_stencil_alpha_state, state); trace_dump_call_end(stream); @@ -537,7 +537,7 @@ trace_context_create_fs_state(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "create_fs_state"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, shader_state, state); result = pipe->create_fs_state(pipe, state);; @@ -602,7 +602,7 @@ trace_context_create_vs_state(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "create_vs_state"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, shader_state, state); result = pipe->create_vs_state(pipe, state);; @@ -666,7 +666,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "set_blend_color"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, blend_color, state); pipe->set_blend_color(pipe, state);; @@ -686,7 +686,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "set_clip_state"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, clip_state, state); pipe->set_clip_state(pipe, state);; @@ -709,7 +709,7 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, trace_dump_arg(stream, ptr, pipe); trace_dump_arg(stream, uint, shader); trace_dump_arg(stream, uint, index); - trace_dump_arg(stream, ptr, buffer); + trace_dump_arg(stream, constant_buffer, buffer); pipe->set_constant_buffer(pipe, shader, index, buffer);; @@ -729,7 +729,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "set_framebuffer_state"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, framebuffer_state, state); pipe->set_framebuffer_state(pipe, state);; @@ -749,7 +749,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "set_polygon_stipple"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, poly_stipple, state); pipe->set_polygon_stipple(pipe, state);; @@ -769,7 +769,7 @@ trace_context_set_scissor_state(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "set_scissor_state"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, scissor_state, state); pipe->set_scissor_state(pipe, state);; @@ -789,7 +789,7 @@ trace_context_set_viewport_state(struct pipe_context *_pipe, trace_dump_call_begin(stream, "pipe_context", "set_viewport_state"); trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(stream, viewport_state, state); pipe->set_viewport_state(pipe, state);; @@ -833,8 +833,10 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, trace_dump_arg(stream, ptr, pipe); trace_dump_arg(stream, uint, num_buffers); - trace_dump_arg(stream, ptr, buffers); - //trace_dump_arg_array(stream, ptr, buffers, num_buffers); + + trace_dump_arg_begin(stream, "buffers"); + trace_dump_struct_array(stream, vertex_buffer, buffers, num_buffers); + trace_dump_arg_end(stream); pipe->set_vertex_buffers(pipe, num_buffers, buffers);; @@ -857,7 +859,10 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe, trace_dump_arg(stream, ptr, pipe); trace_dump_arg(stream, uint, num_elements); trace_dump_arg(stream, ptr, elements); - //trace_dump_arg_array(stream, ptr, elements, num_elements); + + trace_dump_arg_begin(stream, "elements"); + trace_dump_struct_array(stream, vertex_element, elements, num_elements); + trace_dump_arg_end(stream); pipe->set_vertex_elements(pipe, num_elements, elements);; diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 30c09863634..0591b9fc336 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -322,8 +322,16 @@ void trace_dump_member_end(struct trace_stream *stream) trace_dump_write(stream, "</member>"); } +void trace_dump_null(struct trace_stream *stream) +{ + trace_dump_write(stream, "<null/>"); +} + void trace_dump_ptr(struct trace_stream *stream, const void *value) { - trace_dump_writef(stream, "<ptr>%p</ptr>", value); + if(value) + trace_dump_writef(stream, "<ptr>%p</ptr>", value); + else + trace_dump_null(stream); } diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 0be0812ce81..5eeac8d3c5e 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -34,6 +34,9 @@ #define TR_DUMP_H +#include "pipe/p_util.h" + + struct trace_stream; @@ -58,6 +61,7 @@ void trace_dump_struct_begin(struct trace_stream *stream, const char *name); void trace_dump_struct_end(struct trace_stream *stream); void trace_dump_member_begin(struct trace_stream *stream, const char *name); void trace_dump_member_end(struct trace_stream *stream); +void trace_dump_null(struct trace_stream *stream); void trace_dump_ptr(struct trace_stream *stream, const void *value); @@ -83,14 +87,33 @@ void trace_dump_ptr(struct trace_stream *stream, const void *value); do { \ unsigned long idx; \ trace_dump_array_begin(_stream); \ - for(idx = 0; idx < _size; ++idx) { \ + for(idx = 0; idx < (_size); ++idx) { \ + trace_dump_elem_begin(_stream); \ + trace_dump_##_type(_stream, (_obj)[idx]); \ + trace_dump_elem_end(_stream); \ + } \ + trace_dump_array_end(_stream); \ + } while(0) + +#define trace_dump_struct_array(_stream, _type, _obj, _size) \ + do { \ + unsigned long idx; \ + trace_dump_array_begin(_stream); \ + for(idx = 0; idx < (_size); ++idx) { \ trace_dump_elem_begin(_stream); \ - trace_dump_##_type(_stream, _obj[idx]); \ + trace_dump_##_type(_stream, &(_obj)[idx]); \ trace_dump_elem_end(_stream); \ } \ trace_dump_array_end(_stream); \ } while(0) +#define trace_dump_member(_stream, _type, _obj, _member) \ + do { \ + trace_dump_member_begin(_stream, #_member); \ + trace_dump_##_type(_stream, (_obj)->_member); \ + trace_dump_member_end(_stream); \ + } while(0) + #define trace_dump_arg_array(_stream, _type, _arg, _size) \ do { \ trace_dump_arg_begin(_stream, #_arg); \ @@ -98,10 +121,10 @@ void trace_dump_ptr(struct trace_stream *stream, const void *value); trace_dump_arg_end(_stream); \ } while(0) -#define trace_dump_member(_stream, _type, _obj, _member) \ +#define trace_dump_member_array(_stream, _type, _obj, _member) \ do { \ trace_dump_member_begin(_stream, #_member); \ - trace_dump_##_type(_stream, _obj->_member); \ + trace_dump_array(_stream, _type, (_obj)->_member, sizeof((_obj)->_member)/sizeof((_obj)->_member[0])); \ trace_dump_member_end(_stream); \ } while(0) diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 1e0e87f7fd8..f18f5c611ed 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -27,12 +27,19 @@ #include "pipe/p_compiler.h" -#include "pipe/p_state.h" +#include "tgsi/tgsi_parse.h" #include "tr_dump.h" #include "tr_state.h" +void trace_dump_format(struct trace_stream *stream, + enum pipe_format format) +{ + trace_dump_int(stream, format); +} + + void trace_dump_block(struct trace_stream *stream, const struct pipe_format_block *block) { @@ -44,9 +51,32 @@ void trace_dump_block(struct trace_stream *stream, } +void trace_dump_buffer(struct trace_stream *stream, + const struct pipe_buffer *buffer) +{ + if(!buffer) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_buffer"); + trace_dump_member(stream, uint, buffer, alignment); + trace_dump_member(stream, uint, buffer, usage); + trace_dump_member(stream, uint, buffer, size); + /* TODO: buffer data */ + trace_dump_struct_end(stream); +} + + void trace_dump_template(struct trace_stream *stream, const struct pipe_texture *templat) { + assert(templat); + if(!templat) { + trace_dump_null(stream); + return; + } + trace_dump_struct_begin(stream, "pipe_texture"); trace_dump_member(stream, int, templat, target); @@ -70,3 +100,404 @@ void trace_dump_template(struct trace_stream *stream, trace_dump_struct_end(stream); } + +void trace_dump_rasterizer_state(struct trace_stream *stream, + const struct pipe_rasterizer_state *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_rasterizer_state"); + + trace_dump_member(stream, bool, state, flatshade); + trace_dump_member(stream, bool, state, light_twoside); + trace_dump_member(stream, uint, state, front_winding); + trace_dump_member(stream, uint, state, cull_mode); + trace_dump_member(stream, uint, state, fill_cw); + trace_dump_member(stream, uint, state, fill_ccw); + trace_dump_member(stream, bool, state, offset_cw); + trace_dump_member(stream, bool, state, offset_ccw); + trace_dump_member(stream, bool, state, scissor); + trace_dump_member(stream, bool, state, poly_smooth); + trace_dump_member(stream, bool, state, poly_stipple_enable); + trace_dump_member(stream, bool, state, point_smooth); + trace_dump_member(stream, bool, state, point_sprite); + trace_dump_member(stream, bool, state, point_size_per_vertex); + trace_dump_member(stream, bool, state, multisample); + trace_dump_member(stream, bool, state, line_smooth); + trace_dump_member(stream, bool, state, line_stipple_enable); + trace_dump_member(stream, uint, state, line_stipple_factor); + trace_dump_member(stream, uint, state, line_stipple_pattern); + trace_dump_member(stream, bool, state, line_last_pixel); + trace_dump_member(stream, bool, state, bypass_clipping); + trace_dump_member(stream, bool, state, bypass_vs); + trace_dump_member(stream, bool, state, origin_lower_left); + trace_dump_member(stream, bool, state, flatshade_first); + trace_dump_member(stream, bool, state, gl_rasterization_rules); + + trace_dump_member(stream, float, state, line_width); + trace_dump_member(stream, float, state, point_size); + trace_dump_member(stream, float, state, point_size_min); + trace_dump_member(stream, float, state, point_size_max); + trace_dump_member(stream, float, state, offset_units); + trace_dump_member(stream, float, state, offset_scale); + + trace_dump_member_array(stream, uint, state, sprite_coord_mode); + + trace_dump_struct_end(stream); +} + + +void trace_dump_poly_stipple(struct trace_stream *stream, + const struct pipe_poly_stipple *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_poly_stipple"); + + trace_dump_member_array(stream, uint, state, stipple); + + trace_dump_struct_end(stream); +} + + +void trace_dump_viewport_state(struct trace_stream *stream, + const struct pipe_viewport_state *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_viewport_state"); + + trace_dump_member_array(stream, float, state, scale); + trace_dump_member_array(stream, float, state, translate); + + trace_dump_struct_end(stream); +} + + +void trace_dump_scissor_state(struct trace_stream *stream, + const struct pipe_scissor_state *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_scissor_state"); + + trace_dump_member(stream, uint, state, minx); + trace_dump_member(stream, uint, state, miny); + trace_dump_member(stream, uint, state, maxx); + trace_dump_member(stream, uint, state, maxy); + + trace_dump_struct_end(stream); +} + + +void trace_dump_clip_state(struct trace_stream *stream, + const struct pipe_clip_state *state) +{ + unsigned i; + + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_scissor_state"); + + trace_dump_member_begin(stream, "ucp"); + trace_dump_array_begin(stream); + for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) + trace_dump_array(stream, float, state->ucp[i], 4); + trace_dump_array_end(stream); + trace_dump_member_end(stream); + + trace_dump_member(stream, uint, state, nr); + + trace_dump_struct_end(stream); +} + + +void trace_dump_constant_buffer(struct trace_stream *stream, + const struct pipe_constant_buffer *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_constant_buffer"); + + trace_dump_member(stream, buffer, state, buffer); + trace_dump_member(stream, uint, state, size); + + trace_dump_struct_end(stream); +} + + +void trace_dump_shader_state(struct trace_stream *stream, + const struct pipe_shader_state *state) +{ + uint32_t *p = (uint32_t *)state->tokens; + unsigned n = tgsi_num_tokens(state->tokens); + + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + assert(sizeof(struct tgsi_token) == 4); + + trace_dump_struct_begin(stream, "pipe_shader_state"); + + trace_dump_member_begin(stream, "tokens"); + trace_dump_array(stream, uint, p, n); + trace_dump_member_end(stream); + + trace_dump_struct_end(stream); +} + + +void trace_dump_depth_stencil_alpha_state(struct trace_stream *stream, + const struct pipe_depth_stencil_alpha_state *state) +{ + unsigned i; + + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_depth_stencil_alpha_state"); + + trace_dump_member_begin(stream, "depth"); + trace_dump_struct_begin(stream, ""); + trace_dump_member(stream, bool, &state->depth, enabled); + trace_dump_member(stream, bool, &state->depth, writemask); + trace_dump_member(stream, uint, &state->depth, func); + trace_dump_member(stream, bool, &state->depth, occlusion_count); + trace_dump_struct_end(stream); + trace_dump_member_end(stream); + + trace_dump_member_begin(stream, "stencil"); + trace_dump_array_begin(stream); + for(i = 0; i < Elements(state->stencil); ++i) { + trace_dump_elem_begin(stream); + trace_dump_struct_begin(stream, ""); + trace_dump_member(stream, bool, &state->stencil[i], enabled); + trace_dump_member(stream, uint, &state->stencil[i], func); + trace_dump_member(stream, uint, &state->stencil[i], fail_op); + trace_dump_member(stream, uint, &state->stencil[i], zpass_op); + trace_dump_member(stream, uint, &state->stencil[i], zfail_op); + trace_dump_member(stream, uint, &state->stencil[i], ref_value); + trace_dump_member(stream, uint, &state->stencil[i], value_mask); + trace_dump_member(stream, uint, &state->stencil[i], write_mask); + trace_dump_struct_end(stream); + trace_dump_elem_end(stream); + } + trace_dump_array_end(stream); + trace_dump_member_end(stream); + + trace_dump_member_begin(stream, "alpha"); + trace_dump_struct_begin(stream, ""); + trace_dump_member(stream, bool, &state->alpha, enabled); + trace_dump_member(stream, uint, &state->alpha, func); + trace_dump_member(stream, float, &state->alpha, ref); + trace_dump_struct_end(stream); + trace_dump_member_end(stream); + + trace_dump_struct_end(stream); +} + + +void trace_dump_blend_state(struct trace_stream *stream, + const struct pipe_blend_state *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_blend_state"); + + trace_dump_member(stream, bool, state, blend_enable); + + trace_dump_member(stream, uint, state, rgb_func); + trace_dump_member(stream, uint, state, rgb_src_factor); + trace_dump_member(stream, uint, state, rgb_dst_factor); + + trace_dump_member(stream, uint, state, alpha_func); + trace_dump_member(stream, uint, state, alpha_src_factor); + trace_dump_member(stream, uint, state, alpha_dst_factor); + + trace_dump_member(stream, bool, state, logicop_enable); + trace_dump_member(stream, uint, state, logicop_func); + + trace_dump_member(stream, uint, state, colormask); + trace_dump_member(stream, bool, state, dither); + + trace_dump_struct_end(stream); +} + + +void trace_dump_blend_color(struct trace_stream *stream, + const struct pipe_blend_color *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_blend_color"); + + trace_dump_member_array(stream, float, state, color); + + trace_dump_struct_end(stream); +} + + +void trace_dump_framebuffer_state(struct trace_stream *stream, + const struct pipe_framebuffer_state *state) +{ + trace_dump_struct_begin(stream, "pipe_framebuffer_state"); + + trace_dump_member(stream, uint, state, width); + trace_dump_member(stream, uint, state, height); + trace_dump_member(stream, uint, state, num_cbufs); + trace_dump_member_array(stream, ptr, state, cbufs); + trace_dump_member(stream, ptr, state, zsbuf); + + trace_dump_struct_end(stream); +} + + +void trace_dump_sampler_state(struct trace_stream *stream, + const struct pipe_sampler_state *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_sampler_state"); + + trace_dump_member(stream, uint, state, wrap_s); + trace_dump_member(stream, uint, state, wrap_t); + trace_dump_member(stream, uint, state, wrap_r); + trace_dump_member(stream, uint, state, min_img_filter); + trace_dump_member(stream, uint, state, min_mip_filter); + trace_dump_member(stream, uint, state, mag_img_filter); + trace_dump_member(stream, bool, state, compare_mode); + trace_dump_member(stream, uint, state, compare_func); + trace_dump_member(stream, bool, state, normalized_coords); + trace_dump_member(stream, uint, state, prefilter); + trace_dump_member(stream, float, state, shadow_ambient); + trace_dump_member(stream, float, state, lod_bias); + trace_dump_member(stream, float, state, min_lod); + trace_dump_member(stream, float, state, max_lod); + trace_dump_member_array(stream, float, state, border_color); + trace_dump_member(stream, float, state, max_anisotropy); + + trace_dump_struct_end(stream); +} + + +void trace_dump_surface(struct trace_stream *stream, + const struct pipe_surface *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_surface"); + + trace_dump_member(stream, buffer, state, buffer); + trace_dump_member(stream, format, state, format); + trace_dump_member(stream, uint, state, status); + trace_dump_member(stream, uint, state, clear_value); + trace_dump_member(stream, uint, state, width); + trace_dump_member(stream, uint, state, height); + + trace_dump_member_begin(stream, "block"); + trace_dump_block(stream, &state->block); + trace_dump_member_end(stream); + + trace_dump_member(stream, uint, state, nblocksx); + trace_dump_member(stream, uint, state, nblocksy); + trace_dump_member(stream, uint, state, stride); + trace_dump_member(stream, uint, state, layout); + trace_dump_member(stream, uint, state, offset); + trace_dump_member(stream, uint, state, refcount); + trace_dump_member(stream, uint, state, usage); + + trace_dump_member(stream, ptr, state, texture); + trace_dump_member(stream, uint, state, face); + trace_dump_member(stream, uint, state, level); + trace_dump_member(stream, uint, state, zslice); + + trace_dump_struct_end(stream); +} + + +void trace_dump_vertex_buffer(struct trace_stream *stream, + const struct pipe_vertex_buffer *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_vertex_buffer"); + + trace_dump_member(stream, uint, state, pitch); + trace_dump_member(stream, uint, state, max_index); + trace_dump_member(stream, uint, state, buffer_offset); + trace_dump_member(stream, buffer, state, buffer); + + trace_dump_struct_end(stream); +} + + +void trace_dump_vertex_element(struct trace_stream *stream, + const struct pipe_vertex_element *state) +{ + assert(state); + if(!state) { + trace_dump_null(stream); + return; + } + + trace_dump_struct_begin(stream, "pipe_vertex_element"); + + trace_dump_member(stream, uint, state, src_offset); + + trace_dump_member(stream, uint, state, vertex_buffer_index); + trace_dump_member(stream, uint, state, nr_components); + + trace_dump_member(stream, format, state, src_format); + + trace_dump_struct_end(stream); +} diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index e52524ee90f..c1df63db6af 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -28,11 +28,13 @@ #ifndef TR_STATE_H #define TR_STATE_H +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" -struct trace_stream; -struct pipe_format_block; -struct pipe_texture; +void trace_dump_format(struct trace_stream *stream, + enum pipe_format format); void trace_dump_block(struct trace_stream *stream, const struct pipe_format_block *block); @@ -41,4 +43,53 @@ void trace_dump_template(struct trace_stream *stream, const struct pipe_texture *templat); +void trace_dump_rasterizer_state(struct trace_stream *stream, + const struct pipe_rasterizer_state *state); + +void trace_dump_poly_stipple(struct trace_stream *stream, + const struct pipe_poly_stipple *state); + +void trace_dump_viewport_state(struct trace_stream *stream, + const struct pipe_viewport_state *state); + +void trace_dump_scissor_state(struct trace_stream *stream, + const struct pipe_scissor_state *state); + +void trace_dump_clip_state(struct trace_stream *stream, + const struct pipe_clip_state *state); + +void trace_dump_constant_buffer(struct trace_stream *stream, + const struct pipe_constant_buffer *state); + +void trace_dump_token(struct trace_stream *stream, + const struct tgsi_token *token); + +void trace_dump_shader_state(struct trace_stream *stream, + const struct pipe_shader_state *state); + +void trace_dump_depth_stencil_alpha_state(struct trace_stream *stream, + const struct pipe_depth_stencil_alpha_state *state); + +void trace_dump_blend_state(struct trace_stream *stream, + const struct pipe_blend_state *state); + +void trace_dump_blend_color(struct trace_stream *stream, + const struct pipe_blend_color *state); + +void trace_dump_framebuffer_state(struct trace_stream *stream, + const struct pipe_framebuffer_state *state); + +void trace_dump_sampler_state(struct trace_stream *stream, + const struct pipe_sampler_state *state); + +void trace_dump_surface(struct trace_stream *stream, + const struct pipe_surface *state); + +void trace_dump_vertex_buffer(struct trace_stream *stream, + const struct pipe_vertex_buffer *state); + +void trace_dump_vertex_element(struct trace_stream *stream, + const struct pipe_vertex_element *state); + + #endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl index ace4020cf94..194c8b2efdb 100644 --- a/src/gallium/drivers/trace/trace.xsl +++ b/src/gallium/drivers/trace/trace.xsl @@ -100,6 +100,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. </li> </xsl:template> + <xsl:template match="null"> + <span class="ptr"> + <xsl:text>NULL</xsl:text> + </span> + </xsl:template> + <xsl:template match="ptr"> <span class="ptr"> <xsl:value-of select="text()"/> -- cgit v1.2.3 From b1ff7dac537947d412bf423a73e7eacd76f90d84 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 7 Aug 2008 13:30:49 -0600 Subject: gallium: new/better debug code (disabled) --- src/gallium/drivers/softpipe/sp_setup.c | 4 ++-- src/gallium/drivers/softpipe/sp_tex_sample.c | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 4321ca46f42..b48eec730b2 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -284,9 +284,9 @@ static void print_vertex(const struct setup_context *setup, const float (*v)[4]) { int i; - debug_printf("Vertex: (%p)\n", v); + debug_printf(" Vertex: (%p)\n", v); for (i = 0; i < setup->quad.nr_attrs; i++) { - debug_printf(" %d: %f %f %f %f\n", i, + debug_printf(" %d: %f %f %f %f\n", i, v[i][0], v[i][1], v[i][2], v[i][3]); } } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 8e392695336..5d7a8cc61a4 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -619,6 +619,13 @@ get_texel(struct tgsi_sampler *sampler, rgba[1][j] = tile->data.color[ty][tx][1]; rgba[2][j] = tile->data.color[ty][tx][2]; rgba[3][j] = tile->data.color[ty][tx][3]; + if (0) + { + char fmt[100]; + pf_sprint_name( fmt, sampler->texture->format); + printf("Get texel %f %f %f %f from %s\n", + rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], fmt); + } } } -- cgit v1.2.3 From ae2195caf56d2eb782475254c68858a25ee7c857 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 7 Aug 2008 13:31:57 -0600 Subject: gallium: s/printf/debug_printf/ --- src/gallium/drivers/softpipe/sp_tex_sample.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 5d7a8cc61a4..508db5aa619 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -623,8 +623,8 @@ get_texel(struct tgsi_sampler *sampler, { char fmt[100]; pf_sprint_name( fmt, sampler->texture->format); - printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], fmt); + debug_printf("Get texel %f %f %f %f from %s\n", + rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], fmt); } } } -- cgit v1.2.3 From 8fb55dab783f2de5111e7440093e1458fce5fb3d Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 7 Aug 2008 13:35:21 -0600 Subject: gallium: s/pf_sprint_name/pf_name/ --- src/gallium/drivers/softpipe/sp_tex_sample.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 508db5aa619..01f4d0ca811 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -621,10 +621,9 @@ get_texel(struct tgsi_sampler *sampler, rgba[3][j] = tile->data.color[ty][tx][3]; if (0) { - char fmt[100]; - pf_sprint_name( fmt, sampler->texture->format); debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], fmt); + rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], + pf_name(sampler->texture->format)); } } } -- cgit v1.2.3 From ce2137a6a4c2648a77b9697a3aa0479c9c61bacc Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 7 Aug 2008 19:46:39 +0100 Subject: trace: Dump format names. --- src/gallium/drivers/trace/tr_dump.c | 8 ++++++++ src/gallium/drivers/trace/tr_dump.h | 1 + src/gallium/drivers/trace/tr_screen.c | 2 +- src/gallium/drivers/trace/tr_state.c | 4 ++-- src/gallium/drivers/trace/trace.xsl | 2 +- 5 files changed, 13 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 0591b9fc336..359a903ab91 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -280,6 +280,14 @@ void trace_dump_string(struct trace_stream *stream, trace_dump_write(stream, "</string>"); } +void trace_dump_enum(struct trace_stream *stream, + const char *value) +{ + trace_dump_write(stream, "<enum>"); + trace_dump_escape(stream, value); + trace_dump_write(stream, "</enum>"); +} + void trace_dump_array_begin(struct trace_stream *stream) { trace_dump_write(stream, "<array>"); diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 5eeac8d3c5e..7b15da30330 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -53,6 +53,7 @@ void trace_dump_int(struct trace_stream *stream, long int value); void trace_dump_uint(struct trace_stream *stream, long unsigned value); void trace_dump_float(struct trace_stream *stream, double value); void trace_dump_string(struct trace_stream *stream, const char *str); +void trace_dump_enum(struct trace_stream *stream, const char *value); void trace_dump_array_begin(struct trace_stream *stream); void trace_dump_array_end(struct trace_stream *stream); void trace_dump_elem_begin(struct trace_stream *stream); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 20a2026e1d0..3a48654609a 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -140,7 +140,7 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, trace_dump_call_begin(stream, "pipe_screen", "is_format_supported"); trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, int, format); + trace_dump_arg(stream, format, format); trace_dump_arg(stream, int, target); trace_dump_arg(stream, uint, tex_usage); trace_dump_arg(stream, uint, geom_flags); diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index f18f5c611ed..f17006dd816 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -36,7 +36,7 @@ void trace_dump_format(struct trace_stream *stream, enum pipe_format format) { - trace_dump_int(stream, format); + trace_dump_enum(stream, pf_name(format) ); } @@ -80,7 +80,7 @@ void trace_dump_template(struct trace_stream *stream, trace_dump_struct_begin(stream, "pipe_texture"); trace_dump_member(stream, int, templat, target); - trace_dump_member(stream, int, templat, format); + trace_dump_member(stream, format, templat, format); trace_dump_member_begin(stream, "width"); trace_dump_array(stream, uint, templat->width, 1); diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl index 194c8b2efdb..3c84c4c35af 100644 --- a/src/gallium/drivers/trace/trace.xsl +++ b/src/gallium/drivers/trace/trace.xsl @@ -71,7 +71,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. <xsl:apply-templates /> </xsl:template> - <xsl:template match="bool|int|uint"> + <xsl:template match="bool|int|uint|enum"> <span class="lit"> <xsl:value-of select="text()"/> </span> -- cgit v1.2.3 From 05e90964cfd99bb8cad2d9146ab042730052b8a4 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 7 Aug 2008 19:58:20 +0100 Subject: trace: Merge the CSS into the XSL. --- src/gallium/drivers/trace/trace.css | 72 ------------------------------------- src/gallium/drivers/trace/trace.xsl | 58 ++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 74 deletions(-) delete mode 100644 src/gallium/drivers/trace/trace.css (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/trace.css b/src/gallium/drivers/trace/trace.css deleted file mode 100644 index d92c4337910..00000000000 --- a/src/gallium/drivers/trace/trace.css +++ /dev/null @@ -1,72 +0,0 @@ -/**************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc. - * - * This program is free software: you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ****************************************************************************/ - -body { - font-family: verdana, sans-serif; - font-size: 11px; - font-weight: normal; - text-align : left; -} - -ul.calls { - list-style: none; - margin-left: 0px; - padding-left: 0px; -} - -ul.args { - display:inline; - list-style: none; - margin-left: 0px; - padding-left: 0px; -} - -ul.args li { - display:inline; -} - -ul.elems { - list-style: none; - margin-left: 2em; - padding-left: 0px; -} - -ul.elems li { - display:block; -} - -.fun { - font-weight: bold; -} - -.var { - font-style: italic; -} - -.typ { - display: none; -} - -.lit { - color: #0000ff; -} - -.ptr { - color: #008000; -} diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl index 3c84c4c35af..5a876e0f216 100644 --- a/src/gallium/drivers/trace/trace.xsl +++ b/src/gallium/drivers/trace/trace.xsl @@ -29,8 +29,62 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. <html> <head> <title>Gallium Trace</title> - <link rel="stylesheet" type="text/css" href="trace.css"/> </head> + <style> + body { + font-family: verdana, sans-serif; + font-size: 11px; + font-weight: normal; + text-align : left; + } + + ul.calls { + list-style: none; + margin-left: 0px; + padding-left: 0px; + } + + ul.args { + display:inline; + list-style: none; + margin-left: 0px; + padding-left: 0px; + } + + ul.args li { + display:inline; + } + + ul.elems { + list-style: none; + margin-left: 2em; + padding-left: 0px; + } + + ul.elems li { + display:block; + } + + .fun { + font-weight: bold; + } + + .var { + font-style: italic; + } + + .typ { + display: none; + } + + .lit { + color: #0000ff; + } + + .ptr { + color: #008000; + } + </style> <body> <ul class="calls"> <xsl:apply-templates/> @@ -71,7 +125,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. <xsl:apply-templates /> </xsl:template> - <xsl:template match="bool|int|uint|enum"> + <xsl:template match="bool|int|uint|float|enum"> <span class="lit"> <xsl:value-of select="text()"/> </span> -- cgit v1.2.3 From 6a82ea2ed21c47749676e765311baed97f5dea9f Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 7 Aug 2008 20:57:03 +0100 Subject: trace: Simplify HTML output. --- src/gallium/drivers/trace/trace.xsl | 47 ++++++------------------------------- 1 file changed, 7 insertions(+), 40 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl index 5a876e0f216..13039d44c42 100644 --- a/src/gallium/drivers/trace/trace.xsl +++ b/src/gallium/drivers/trace/trace.xsl @@ -38,33 +38,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. text-align : left; } - ul.calls { - list-style: none; - margin-left: 0px; - padding-left: 0px; - } - - ul.args { - display:inline; - list-style: none; - margin-left: 0px; - padding-left: 0px; - } - - ul.args li { - display:inline; - } - - ul.elems { - list-style: none; - margin-left: 2em; - padding-left: 0px; - } - - ul.elems li { - display:block; - } - .fun { font-weight: bold; } @@ -86,9 +59,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. } </style> <body> - <ul class="calls"> + <ol class="calls"> <xsl:apply-templates/> - </ul> + </ol> </body> </html> </xsl:template> @@ -101,23 +74,19 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. <xsl:value-of select="@method"/> </span> <xsl:text>(</xsl:text> - <ul class="args"> - <xsl:apply-templates select="arg"/> - </ul> + <xsl:apply-templates select="arg"/> <xsl:text>)</xsl:text> <xsl:apply-templates select="ret"/> </li> </xsl:template> <xsl:template match="arg|member"> - <li> <xsl:apply-templates select="@name"/> <xsl:text> = </xsl:text> <xsl:apply-templates /> <xsl:if test="position() != last()"> <xsl:text>, </xsl:text> </xsl:if> - </li> </xsl:template> <xsl:template match="ret"> @@ -146,12 +115,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. </xsl:template> <xsl:template match="elem"> - <li> - <xsl:apply-templates /> - <xsl:if test="position() != last()"> - <xsl:text>, </xsl:text> - </xsl:if> - </li> + <xsl:apply-templates /> + <xsl:if test="position() != last()"> + <xsl:text>, </xsl:text> + </xsl:if> </xsl:template> <xsl:template match="null"> -- cgit v1.2.3 From fdb7dc889f4251183915c811566ced083fdac40d Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 7 Aug 2008 15:05:13 -0600 Subject: softpipe: add support for PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_Z24X8_UNORM --- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 14 ++++++++++++++ src/gallium/drivers/softpipe/sp_tile_cache.c | 2 ++ 2 files changed, 16 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 33888abcc5e..0c82692c6ee 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -104,6 +104,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) } } break; + case PIPE_FORMAT_X8Z24_UNORM: + /* fall-through */ case PIPE_FORMAT_S8Z24_UNORM: { float scale = (float) ((1 << 24) - 1); @@ -119,6 +121,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) } } break; + case PIPE_FORMAT_Z24X8_UNORM: + /* fall-through */ case PIPE_FORMAT_Z24S8_UNORM: { float scale = (float) ((1 << 24) - 1); @@ -209,6 +213,9 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) tile->data.depth16[y][x] = (ushort) bzzzz[j]; } break; + case PIPE_FORMAT_X8Z24_UNORM: + /* fall-through */ + /* (yes, this falls through to a different case than above) */ case PIPE_FORMAT_Z32_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->x0 % TILE_SIZE + (j & 1); @@ -234,6 +241,13 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) tile->data.depth32[y][x] = z24s8; } break; + case PIPE_FORMAT_Z24X8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->x0 % TILE_SIZE + (j & 1); + int y = quad->y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = bzzzz[j] << 8; + } + break; default: assert(0); } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 5d10234945f..57c12ffe335 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -170,7 +170,9 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, PIPE_BUFFER_USAGE_CPU_WRITE); tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || + ps->format == PIPE_FORMAT_X8Z24_UNORM || ps->format == PIPE_FORMAT_Z24S8_UNORM || + ps->format == PIPE_FORMAT_Z24X8_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM || ps->format == PIPE_FORMAT_Z32_UNORM || ps->format == PIPE_FORMAT_S8_UNORM); -- cgit v1.2.3 From be36f7869e8ecc4b00e414557a9699ba373e6bdd Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 7 Aug 2008 23:32:45 +0100 Subject: i965: Remove extraneous arg to debug_printf. --- src/gallium/drivers/i965simple/brw_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index f00eb34f92b..3079485180b 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -188,7 +188,7 @@ extern int BRW_DEBUG; } while(0) #define PRINT(...) do { \ - debug_printf(brw->pipe.winsys, __VA_ARGS__); \ + debug_printf(__VA_ARGS__); \ } while(0) struct brw_state_flags { -- cgit v1.2.3 From 9dcb956a0618931c97693f7c74493cf296cfe86c Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 8 Aug 2008 12:23:21 +0100 Subject: gallium: Add destroy callback to all *_winsys interfaces. For consistency and to simplify these objects' destruction. --- src/gallium/drivers/cell/ppu/cell_screen.c | 5 +++++ src/gallium/drivers/i915simple/i915_context.c | 3 +++ src/gallium/drivers/i915simple/i915_screen.c | 5 +++++ src/gallium/drivers/i915simple/i915_winsys.h | 2 ++ src/gallium/drivers/i965simple/brw_context.c | 3 +++ src/gallium/drivers/i965simple/brw_screen.c | 5 +++++ src/gallium/drivers/i965simple/brw_winsys.h | 2 ++ src/gallium/drivers/softpipe/sp_screen.c | 5 +++++ src/gallium/include/pipe/p_winsys.h | 2 ++ 9 files changed, 32 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index cf9b68b695f..2bf441a0c5f 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -132,6 +132,11 @@ cell_is_format_supported( struct pipe_screen *screen, static void cell_destroy_screen( struct pipe_screen *screen ) { + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + FREE(screen); } diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 4c01b8d5b17..e3d19017b5d 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -44,6 +44,9 @@ static void i915_destroy( struct pipe_context *pipe ) struct i915_context *i915 = i915_context( pipe ); draw_destroy( i915->draw ); + + if(i915->winsys) + i915->winsys->destroy(i915->winsys); FREE( i915 ); } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 4b1b8af7da8..0afa17bed85 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -193,6 +193,11 @@ i915_is_format_supported( struct pipe_screen *screen, static void i915_destroy_screen( struct pipe_screen *screen ) { + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + FREE(screen); } diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index 9afaa16a62c..81904c2a742 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -75,6 +75,8 @@ struct pipe_screen; */ struct i915_winsys { + void (*destroy)( struct i915_winsys *sws ); + /** * Get the current batch buffer from the winsys. */ diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index a276cc0535d..8326f7b9c40 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -52,6 +52,9 @@ static void brw_destroy(struct pipe_context *pipe) { struct brw_context *brw = brw_context(pipe); + if(brw->winsys->destroy) + brw->winsys->destroy(brw->winsys); + FREE(brw); } diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index 6d8f24d1c49..fadfbf94ab3 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -206,6 +206,11 @@ brw_is_format_supported( struct pipe_screen *screen, static void brw_destroy_screen( struct pipe_screen *screen ) { + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + FREE(screen); } diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h index b67bd737502..ec1e400418f 100644 --- a/src/gallium/drivers/i965simple/brw_winsys.h +++ b/src/gallium/drivers/i965simple/brw_winsys.h @@ -112,6 +112,8 @@ enum brw_cache_id { */ struct brw_winsys { + void (*destroy)(struct brw_winsys *); + /** * Reserve space on batch buffer. * diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index ceb5616b5d0..f6b3d7ac24f 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -139,6 +139,11 @@ softpipe_is_format_supported( struct pipe_screen *screen, static void softpipe_destroy_screen( struct pipe_screen *screen ) { + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + FREE(screen); } diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 7ebc2851928..5d18291dc6c 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -62,6 +62,8 @@ struct pipe_surface; */ struct pipe_winsys { + void (*destroy)( struct pipe_winsys *ws ); + /** Returns name of this winsys interface */ const char *(*get_name)( struct pipe_winsys *ws ); -- cgit v1.2.3 From 74d649d9a9e85be6dfec170b018b31626d40914b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 8 Aug 2008 12:31:23 +0100 Subject: trace: Prevent from internal calls from pipe_context to pipe_screen from being traced. --- src/gallium/drivers/trace/tr_context.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index ee8ad5eb97c..242a03ccb04 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -999,11 +999,14 @@ trace_context_destroy(struct pipe_context *_pipe) struct pipe_context * trace_context_create(struct pipe_context *pipe) { + struct trace_screen *tr_scr; struct trace_context *tr_ctx; if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) return pipe; + tr_scr = trace_screen(pipe->screen); + tr_ctx = CALLOC_STRUCT(trace_context); if(!tr_ctx) return NULL; @@ -1055,5 +1058,8 @@ trace_context_create(struct pipe_context *pipe) tr_ctx->pipe = pipe; + /* We don't want to trace the pipe calls */ + pipe->screen = tr_scr->screen; + return &tr_ctx->base; } -- cgit v1.2.3 From fcfe63805d1a4b1815dec7d85a21772d02ac12b8 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 8 Aug 2008 22:13:13 +0100 Subject: trace: Remove unused code. --- src/gallium/drivers/trace/tr_screen.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index 5d7d667f719..40b844778fe 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -44,8 +44,6 @@ struct trace_screen struct pipe_screen *screen; struct trace_stream *stream; - - unsigned event_no; }; -- cgit v1.2.3 From 6c7aff209ca3b310008dd345836ebc020d2db004 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 8 Aug 2008 23:11:56 +0100 Subject: trace: Trace pipe_winsys calls. --- src/gallium/drivers/trace/SConscript | 1 + src/gallium/drivers/trace/tr_screen.c | 9 +- src/gallium/drivers/trace/tr_winsys.c | 410 ++++++++++++++++++++++++++++++++++ src/gallium/drivers/trace/tr_winsys.h | 63 ++++++ src/gallium/winsys/xlib/xm_winsys.c | 7 +- 5 files changed, 482 insertions(+), 8 deletions(-) create mode 100644 src/gallium/drivers/trace/tr_winsys.c create mode 100644 src/gallium/drivers/trace/tr_winsys.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript index 30225b5a54c..35507e21e4c 100644 --- a/src/gallium/drivers/trace/SConscript +++ b/src/gallium/drivers/trace/SConscript @@ -10,6 +10,7 @@ trace = env.ConvenienceLibrary( 'tr_screen.c', 'tr_state.c', 'tr_stream.c', + 'tr_winsys.c', ]) Export('trace') \ No newline at end of file diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 3a48654609a..b40d56bcff3 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -30,6 +30,7 @@ #include "tr_stream.h" #include "tr_dump.h" #include "tr_state.h" +#include "tr_winsys.h" #include "tr_screen.h" @@ -338,10 +339,6 @@ trace_screen_destroy(struct pipe_screen *_screen) trace_dump_call_end(stream); - trace_dump_trace_end(stream); - - trace_stream_close(tr_scr->stream); - FREE(tr_scr); } @@ -375,11 +372,9 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->screen = screen; - tr_scr->stream = trace_stream_create("gallium", "trace"); + tr_scr->stream = trace_winsys(screen->winsys)->stream; if(!tr_scr->stream) return NULL; - trace_dump_trace_begin(tr_scr->stream, 0); - return &tr_scr->base; } diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c new file mode 100644 index 00000000000..f1ef7cbebb6 --- /dev/null +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -0,0 +1,410 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" + +#include "tr_stream.h" +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_winsys.h" + + +static const char * +trace_winsys_get_name(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + const char *result; + + trace_dump_call_begin(stream, "pipe_winsys", "get_name"); + + trace_dump_arg(stream, ptr, winsys); + + result = winsys->get_name(winsys); + + trace_dump_ret(stream, string, result); + + trace_dump_call_end(stream); + + return result; +} + + +static void +trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, + struct pipe_surface *surface, + void *context_private) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + + trace_dump_call_begin(stream, "pipe_winsys", "flush_frontbuffer"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, surface); + trace_dump_arg(stream, ptr, context_private); + + winsys->flush_frontbuffer(winsys, surface, context_private); + + trace_dump_call_end(stream); +} + + +static struct pipe_surface * +trace_winsys_surface_alloc(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_surface *result; + + trace_dump_call_begin(stream, "pipe_winsys", "surface_alloc"); + + trace_dump_arg(stream, ptr, winsys); + + result = winsys->surface_alloc(winsys); + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static int +trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys, + struct pipe_surface *surface, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + trace_dump_call_begin(stream, "pipe_winsys", "surface_alloc_storage"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, surface); + trace_dump_arg(stream, uint, width); + trace_dump_arg(stream, uint, height); + trace_dump_arg(stream, format, format); + trace_dump_arg(stream, uint, flags); + trace_dump_arg(stream, uint, tex_usage); + + result = winsys->surface_alloc_storage(winsys, + surface, + width, height, + format, + flags, + tex_usage); + + trace_dump_ret(stream, int, result); + + trace_dump_call_end(stream); + + return result; +} + + +static void +trace_winsys_surface_release(struct pipe_winsys *_winsys, + struct pipe_surface **psurface) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_surface *surface = *psurface; + + trace_dump_call_begin(stream, "pipe_winsys", "surface_release"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, surface); + + winsys->surface_release(winsys, psurface); + + trace_dump_call_end(stream); +} + + +static struct pipe_buffer * +trace_winsys_buffer_create(struct pipe_winsys *_winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_buffer *result; + + trace_dump_call_begin(stream, "pipe_winsys", "buffer_create"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, uint, alignment); + trace_dump_arg(stream, uint, usage); + trace_dump_arg(stream, uint, size); + + result = winsys->buffer_create(winsys, alignment, usage, size); + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static struct pipe_buffer * +trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, + void *ptr, + unsigned bytes) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_buffer *result; + + trace_dump_call_begin(stream, "pipe_winsys", "user_buffer_create"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, ptr); + trace_dump_arg(stream, uint, bytes); + + result = winsys->user_buffer_create(winsys, ptr, bytes); + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static void * +trace_winsys_buffer_map(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer, + unsigned usage) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + void *result; + + trace_dump_call_begin(stream, "pipe_winsys", "buffer_map"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, buffer); + trace_dump_arg(stream, uint, usage); + + result = winsys->buffer_map(winsys, buffer, usage); + + trace_dump_ret(stream, ptr, result); + + trace_dump_call_end(stream); + + return result; +} + + +static void +trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + + trace_dump_call_begin(stream, "pipe_winsys", "buffer_unmap"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, buffer); + + winsys->buffer_unmap(winsys, buffer); + + trace_dump_call_end(stream); +} + + +static void +trace_winsys_buffer_destroy(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + + trace_dump_call_begin(stream, "pipe_winsys", "buffer_destroy"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, buffer); + + winsys->buffer_destroy(winsys, buffer); + + trace_dump_call_end(stream); +} + + +static void +trace_winsys_fence_reference(struct pipe_winsys *_winsys, + struct pipe_fence_handle **pdst, + struct pipe_fence_handle *src) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_fence_handle *dst = *pdst; + + trace_dump_call_begin(stream, "pipe_winsys", "fence_reference"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, dst); + trace_dump_arg(stream, ptr, src); + + winsys->fence_reference(winsys, pdst, src); + + trace_dump_call_end(stream); +} + + +static int +trace_winsys_fence_signalled(struct pipe_winsys *_winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + trace_dump_call_begin(stream, "pipe_winsys", "fence_signalled"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, fence); + trace_dump_arg(stream, uint, flag); + + result = winsys->fence_signalled(winsys, fence, flag); + + trace_dump_ret(stream, int, result); + + trace_dump_call_end(stream); + + return result; +} + + +static int +trace_winsys_fence_finish(struct pipe_winsys *_winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + trace_dump_call_begin(stream, "pipe_winsys", "fence_finish"); + + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, fence); + trace_dump_arg(stream, uint, flag); + + result = winsys->fence_finish(winsys, fence, flag); + + trace_dump_ret(stream, int, result); + + trace_dump_call_end(stream); + + return result; +} + + +static void +trace_winsys_destroy(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct trace_stream *stream = tr_ws->stream; + struct pipe_winsys *winsys = tr_ws->winsys; + + trace_dump_call_begin(stream, "pipe_winsys", "destroy"); + + trace_dump_arg(stream, ptr, winsys); + + winsys->destroy(winsys); + + trace_dump_call_end(stream); + + trace_dump_trace_end(stream); + + trace_stream_close(tr_ws->stream); + + FREE(tr_ws); +} + + +struct pipe_winsys * +trace_winsys_create(struct pipe_winsys *winsys) +{ + struct trace_winsys *tr_ws; + + if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) + return winsys; + + tr_ws = CALLOC_STRUCT(trace_winsys); + if(!tr_ws) + return NULL; + + tr_ws->base.destroy = trace_winsys_destroy; + tr_ws->base.get_name = trace_winsys_get_name; + tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer; + tr_ws->base.surface_alloc = trace_winsys_surface_alloc; + tr_ws->base.surface_alloc_storage = trace_winsys_surface_alloc_storage; + tr_ws->base.surface_release = trace_winsys_surface_release; + tr_ws->base.buffer_create = trace_winsys_buffer_create; + tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create; + tr_ws->base.buffer_map = trace_winsys_buffer_map; + tr_ws->base.buffer_unmap = trace_winsys_buffer_unmap; + tr_ws->base.buffer_destroy = trace_winsys_buffer_destroy; + tr_ws->base.fence_reference = trace_winsys_fence_reference; + tr_ws->base.fence_signalled = trace_winsys_fence_signalled; + tr_ws->base.fence_finish = trace_winsys_fence_finish; + + tr_ws->winsys = winsys; + + tr_ws->stream = trace_stream_create("gallium", "trace"); + if(!tr_ws->stream) + return NULL; + + trace_dump_trace_begin(tr_ws->stream, 0); + + return &tr_ws->base; +} diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h new file mode 100644 index 00000000000..353b0ea7b6b --- /dev/null +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_WINSYS_H_ +#define TR_WINSYS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" + + +struct trace_stream; + + +struct trace_winsys +{ + struct pipe_winsys base; + + struct pipe_winsys *winsys; + + struct trace_stream *stream; +}; + + +static INLINE struct trace_winsys * +trace_winsys(struct pipe_winsys *winsys) +{ + assert(winsys); + return (struct trace_winsys *)winsys; +} + + + +struct pipe_winsys * +trace_winsys_create(struct pipe_winsys *winsys); + + +#endif /* TR_WINSYS_H_ */ diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 5a01b6167ba..6071a5ad5e4 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -55,6 +55,7 @@ #endif #ifdef GALLIUM_TRACE +#include "trace/tr_winsys.h" #include "trace/tr_screen.h" #include "trace/tr_context.h" #endif @@ -650,7 +651,11 @@ xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis) ws->base.get_name = xm_get_name; } - return &ws->base; +#ifdef GALLIUM_TRACE + return trace_winsys_create(&ws->base); +#else + return &ws->base; +#endif } -- cgit v1.2.3 From b65259de6c0a2e77550bbef6b291c6d09dfb5867 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 8 Aug 2008 23:53:53 +0100 Subject: trace: Allow to dump binary data. --- src/gallium/drivers/trace/tr_dump.c | 19 ++++++++++++++++++ src/gallium/drivers/trace/tr_dump.h | 1 + src/gallium/drivers/trace/tr_state.c | 38 +++++++++++------------------------- src/gallium/drivers/trace/trace.xsl | 6 ++++++ 4 files changed, 37 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 359a903ab91..f545de30f4f 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -272,6 +272,25 @@ void trace_dump_float(struct trace_stream *stream, trace_dump_writef(stream, "<float>%g</float>", value); } +void trace_dump_bytes(struct trace_stream *stream, + const void *data, + long unsigned size) +{ + static char hex_table[] = "0123456789ABCDE"; + const uint8_t *p = data; + long unsigned i; + trace_dump_write(stream, "<bytes>"); + for(i = 0; i < size; ++i) { + uint8_t byte = *p++; + char str[3]; + str[0] = hex_table[byte >> 4]; + str[1] = hex_table[byte & 0xf]; + str[2] = 0; + trace_dump_write(stream, str); + } + trace_dump_write(stream, "</bytes>"); +} + void trace_dump_string(struct trace_stream *stream, const char *str) { diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 7b15da30330..b2367c3288a 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -52,6 +52,7 @@ void trace_dump_bool(struct trace_stream *stream, int value); void trace_dump_int(struct trace_stream *stream, long int value); void trace_dump_uint(struct trace_stream *stream, long unsigned value); void trace_dump_float(struct trace_stream *stream, double value); +void trace_dump_bytes(struct trace_stream *stream, const void *data, long unsigned size); void trace_dump_string(struct trace_stream *stream, const char *str); void trace_dump_enum(struct trace_stream *stream, const char *value); void trace_dump_array_begin(struct trace_stream *stream); diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index f17006dd816..e0e48185fac 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -51,23 +51,6 @@ void trace_dump_block(struct trace_stream *stream, } -void trace_dump_buffer(struct trace_stream *stream, - const struct pipe_buffer *buffer) -{ - if(!buffer) { - trace_dump_null(stream); - return; - } - - trace_dump_struct_begin(stream, "pipe_buffer"); - trace_dump_member(stream, uint, buffer, alignment); - trace_dump_member(stream, uint, buffer, usage); - trace_dump_member(stream, uint, buffer, size); - /* TODO: buffer data */ - trace_dump_struct_end(stream); -} - - void trace_dump_template(struct trace_stream *stream, const struct pipe_texture *templat) { @@ -162,7 +145,11 @@ void trace_dump_poly_stipple(struct trace_stream *stream, trace_dump_struct_begin(stream, "pipe_poly_stipple"); - trace_dump_member_array(stream, uint, state, stipple); + trace_dump_member_begin(stream, "stipple"); + trace_dump_bytes(stream, + state->stipple, + sizeof(state->stipple)); + trace_dump_member_end(stream); trace_dump_struct_end(stream); } @@ -243,7 +230,7 @@ void trace_dump_constant_buffer(struct trace_stream *stream, trace_dump_struct_begin(stream, "pipe_constant_buffer"); - trace_dump_member(stream, buffer, state, buffer); + trace_dump_member(stream, ptr, state, buffer); trace_dump_member(stream, uint, state, size); trace_dump_struct_end(stream); @@ -253,21 +240,18 @@ void trace_dump_constant_buffer(struct trace_stream *stream, void trace_dump_shader_state(struct trace_stream *stream, const struct pipe_shader_state *state) { - uint32_t *p = (uint32_t *)state->tokens; - unsigned n = tgsi_num_tokens(state->tokens); - assert(state); if(!state) { trace_dump_null(stream); return; } - assert(sizeof(struct tgsi_token) == 4); - trace_dump_struct_begin(stream, "pipe_shader_state"); trace_dump_member_begin(stream, "tokens"); - trace_dump_array(stream, uint, p, n); + trace_dump_bytes(stream, + state->tokens, + sizeof(struct tgsi_token) * tgsi_num_tokens(state->tokens)); trace_dump_member_end(stream); trace_dump_struct_end(stream); @@ -433,7 +417,7 @@ void trace_dump_surface(struct trace_stream *stream, trace_dump_struct_begin(stream, "pipe_surface"); - trace_dump_member(stream, buffer, state, buffer); + trace_dump_member(stream, ptr, state, buffer); trace_dump_member(stream, format, state, format); trace_dump_member(stream, uint, state, status); trace_dump_member(stream, uint, state, clear_value); @@ -475,7 +459,7 @@ void trace_dump_vertex_buffer(struct trace_stream *stream, trace_dump_member(stream, uint, state, pitch); trace_dump_member(stream, uint, state, max_index); trace_dump_member(stream, uint, state, buffer_offset); - trace_dump_member(stream, buffer, state, buffer); + trace_dump_member(stream, ptr, state, buffer); trace_dump_struct_end(stream); } diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl index 13039d44c42..991e5bf9350 100644 --- a/src/gallium/drivers/trace/trace.xsl +++ b/src/gallium/drivers/trace/trace.xsl @@ -100,6 +100,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. </span> </xsl:template> + <xsl:template match="bytes"> + <span class="lit"> + <xsl:text>...</xsl:text> + </span> + </xsl:template> + <xsl:template match="string"> <span class="lit"> <xsl:text>"</xsl:text> -- cgit v1.2.3 From 696067e781977ad54bb31b3843355701124f1b22 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 9 Aug 2008 11:52:53 +0100 Subject: trace: Dump shaders as text. --- src/gallium/drivers/trace/tr_state.c | 9 ++++---- src/gallium/drivers/trace/trace.xsl | 40 +++++++++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index e0e48185fac..9ffe77146d9 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -27,7 +27,7 @@ #include "pipe/p_compiler.h" -#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" #include "tr_dump.h" #include "tr_state.h" @@ -240,18 +240,19 @@ void trace_dump_constant_buffer(struct trace_stream *stream, void trace_dump_shader_state(struct trace_stream *stream, const struct pipe_shader_state *state) { + static char str[8192]; assert(state); if(!state) { trace_dump_null(stream); return; } + tgsi_dump_str(state->tokens, 0, str, sizeof(str)); + trace_dump_struct_begin(stream, "pipe_shader_state"); trace_dump_member_begin(stream, "tokens"); - trace_dump_bytes(stream, - state->tokens, - sizeof(struct tgsi_token) * tgsi_num_tokens(state->tokens)); + trace_dump_string(stream, str); trace_dump_member_end(stream); trace_dump_struct_end(stream); diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl index 991e5bf9350..9cd621e7ab9 100644 --- a/src/gallium/drivers/trace/trace.xsl +++ b/src/gallium/drivers/trace/trace.xsl @@ -109,7 +109,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. <xsl:template match="string"> <span class="lit"> <xsl:text>"</xsl:text> - <xsl:value-of select="text()"/> + <xsl:call-template name="break"> + <xsl:with-param name="text" select="text()"/> + </xsl:call-template> <xsl:text>"</xsl:text> </span> </xsl:template> @@ -144,4 +146,40 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. <xsl:value-of select="."/> </span> </xsl:template> + + <xsl:template name="break"> + <xsl:param name="text" select="."/> + <xsl:choose> + <xsl:when test="contains($text, '
')"> + <xsl:value-of select="substring-before($text, '
')"/> + <br/> + <xsl:call-template name="break"> + <xsl:with-param name="text" select="substring-after($text, '
')"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template name="replace"> + <xsl:param name="text"/> + <xsl:param name="from"/> + <xsl:param name="to"/> + <xsl:choose> + <xsl:when test="contains($text,$from)"> + <xsl:value-of select="concat(substring-before($text,$from),$to)"/> + <xsl:call-template name="replace"> + <xsl:with-param name="text" select="substring-after($text,$from)"/> + <xsl:with-param name="from" select="$from"/> + <xsl:with-param name="to" select="$to"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + </xsl:transform> -- cgit v1.2.3 From 376f2cbb190389807c8ba6df401e06743ead9eb8 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 9 Aug 2008 11:53:56 +0100 Subject: trace: Prevent tracing internal pipe driver calls. --- src/gallium/drivers/trace/tr_context.c | 9 ++++----- src/gallium/drivers/trace/tr_screen.c | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 242a03ccb04..47a217ec7c7 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -30,6 +30,7 @@ #include "tr_stream.h" #include "tr_dump.h" #include "tr_state.h" +#include "tr_winsys.h" #include "tr_screen.h" #include "tr_context.h" @@ -999,14 +1000,11 @@ trace_context_destroy(struct pipe_context *_pipe) struct pipe_context * trace_context_create(struct pipe_context *pipe) { - struct trace_screen *tr_scr; struct trace_context *tr_ctx; if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) return pipe; - tr_scr = trace_screen(pipe->screen); - tr_ctx = CALLOC_STRUCT(trace_context); if(!tr_ctx) return NULL; @@ -1058,8 +1056,9 @@ trace_context_create(struct pipe_context *pipe) tr_ctx->pipe = pipe; - /* We don't want to trace the pipe calls */ - pipe->screen = tr_scr->screen; + /* We don't want to trace the internal pipe calls */ + pipe->winsys = trace_winsys(pipe->winsys)->winsys; + pipe->screen = trace_screen(pipe->screen)->screen; return &tr_ctx->base; } diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index b40d56bcff3..de885abae2c 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -376,5 +376,8 @@ trace_screen_create(struct pipe_screen *screen) if(!tr_scr->stream) return NULL; + /* We don't want to trace the internal pipe calls */ + screen->winsys = trace_winsys(screen->winsys)->winsys; + return &tr_scr->base; } -- cgit v1.2.3 From 5549d35db5323829702099af6e53a8dd7c451524 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 9 Aug 2008 11:54:35 +0100 Subject: trace: Dump writes to pipe_buffers. --- src/gallium/drivers/trace/tr_winsys.c | 54 ++++++++++++++++++++++++++++++++++- src/gallium/drivers/trace/tr_winsys.h | 3 ++ 2 files changed, 56 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index f1ef7cbebb6..964da5677b2 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "pipe/p_util.h" +#include "util/u_hash_table.h" #include "tr_stream.h" #include "tr_dump.h" @@ -33,6 +34,18 @@ #include "tr_winsys.h" +static unsigned trace_buffer_hash(void *buffer) +{ + return (unsigned)(uintptr_t)buffer; +} + + +static int trace_buffer_compare(void *buffer1, void *buffer2) +{ + return (char *)buffer2 - (char *)buffer1; +} + + static const char * trace_winsys_get_name(struct pipe_winsys *_winsys) { @@ -232,6 +245,13 @@ trace_winsys_buffer_map(struct pipe_winsys *_winsys, trace_dump_call_end(stream); + if(result) { + if(usage & PIPE_BUFFER_USAGE_CPU_WRITE) { + assert(!hash_table_get(tr_ws->buffer_maps, buffer)); + hash_table_set(tr_ws->buffer_maps, buffer, result); + } + } + return result; } @@ -243,6 +263,30 @@ trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, struct trace_winsys *tr_ws = trace_winsys(_winsys); struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; + const void *map; + + map = hash_table_get(tr_ws->buffer_maps, buffer); + if(map) { + trace_dump_call_begin(stream, "", "memcpy"); + + trace_dump_arg_begin(stream, "dst"); + trace_dump_ptr(stream, map); + trace_dump_arg_end(stream); + + trace_dump_arg_begin(stream, "src"); + trace_dump_bytes(stream, map, buffer->size); + trace_dump_arg_end(stream); + + trace_dump_arg_begin(stream, "size"); + trace_dump_uint(stream, buffer->size); + trace_dump_arg_end(stream); + + trace_dump_call_end(stream); + + winsys->buffer_unmap(winsys, buffer); + + hash_table_remove(tr_ws->buffer_maps, buffer); + } trace_dump_call_begin(stream, "pipe_winsys", "buffer_unmap"); @@ -365,6 +409,8 @@ trace_winsys_destroy(struct pipe_winsys *_winsys) trace_dump_trace_end(stream); + hash_table_destroy(tr_ws->buffer_maps); + trace_stream_close(tr_ws->stream); FREE(tr_ws); @@ -399,11 +445,17 @@ trace_winsys_create(struct pipe_winsys *winsys) tr_ws->base.fence_finish = trace_winsys_fence_finish; tr_ws->winsys = winsys; - + tr_ws->stream = trace_stream_create("gallium", "trace"); if(!tr_ws->stream) return NULL; + tr_ws->buffer_maps = hash_table_create(trace_buffer_hash, + trace_buffer_compare); + if(!tr_ws->buffer_maps) + return NULL; + + trace_dump_trace_begin(tr_ws->stream, 0); return &tr_ws->base; diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h index 353b0ea7b6b..a3576da867e 100644 --- a/src/gallium/drivers/trace/tr_winsys.h +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -34,6 +34,7 @@ #include "pipe/p_winsys.h" +struct hash_table; struct trace_stream; @@ -44,6 +45,8 @@ struct trace_winsys struct pipe_winsys *winsys; struct trace_stream *stream; + + struct hash_table *buffer_maps; }; -- cgit v1.2.3 From df4228deddea36b9d5b41ea395a216137e046205 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Mon, 11 Aug 2008 16:14:42 +1000 Subject: nouveau: pf_sprint_name -> pf_name --- src/gallium/drivers/nv04/nv04_fragtex.c | 4 +--- src/gallium/drivers/nv30/nv30_fragtex.c | 3 +-- src/gallium/drivers/nv30/nv30_vbo.c | 8 ++------ src/gallium/drivers/nv40/nv40_fragtex.c | 4 +--- src/gallium/drivers/nv40/nv40_vbo.c | 8 ++------ src/gallium/drivers/nv50/nv50_state_validate.c | 14 ++++---------- src/gallium/drivers/nv50/nv50_vbo.c | 5 ++--- 7 files changed, 13 insertions(+), 33 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c index 3db673cd2c1..1b866aae199 100644 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -51,7 +51,6 @@ static uint32_t nv04_fragtex_format(uint pipe_format) { struct nv04_texture_format *tf = nv04_texture_formats; - char fs[128]; int i; for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) { @@ -60,8 +59,7 @@ nv04_fragtex_format(uint pipe_format) tf++; } - pf_sprint_name(fs, pipe_format); - NOUVEAU_ERR("unknown texture format %s\n", fs); + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); return 0; } diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 4242f86f76f..91246f5ca0a 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -75,8 +75,7 @@ nv30_fragtex_format(uint pipe_format) tf++; } - pf_sprint_name(fs, pipe_format); - NOUVEAU_ERR("unknown texture format %s\n", fs); + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); return NULL; } diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index d930557f6bb..b1c73793bf2 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -14,8 +14,6 @@ static INLINE int nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) { - char fs[128]; - switch (pipe) { case PIPE_FORMAT_R32_FLOAT: case PIPE_FORMAT_R32G32_FLOAT: @@ -36,8 +34,7 @@ nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) *fmt = NV34TCL_VTXFMT_TYPE_USHORT; break; default: - pf_sprint_name(fs, pipe); - NOUVEAU_ERR("Unknown format %s\n", fs); + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); return 1; } @@ -63,8 +60,7 @@ nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) *ncomp = 4; break; default: - pf_sprint_name(fs, pipe); - NOUVEAU_ERR("Unknown format %s\n", fs); + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); return 1; } diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 2d45c2545cd..566d5a8d5ba 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -45,7 +45,6 @@ static struct nv40_texture_format * nv40_fragtex_format(uint pipe_format) { struct nv40_texture_format *tf = nv40_texture_formats; - char fs[128]; while (tf->defined) { if (tf->pipe == pipe_format) @@ -53,8 +52,7 @@ nv40_fragtex_format(uint pipe_format) tf++; } - pf_sprint_name(fs, pipe_format); - NOUVEAU_ERR("unknown texture format %s\n", fs); + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); return NULL; } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 93669e6192f..755d5586b7b 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -14,8 +14,6 @@ static INLINE int nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) { - char fs[128]; - switch (pipe) { case PIPE_FORMAT_R32_FLOAT: case PIPE_FORMAT_R32G32_FLOAT: @@ -36,8 +34,7 @@ nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) *fmt = NV40TCL_VTXFMT_TYPE_USHORT; break; default: - pf_sprint_name(fs, pipe); - NOUVEAU_ERR("Unknown format %s\n", fs); + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); return 1; } @@ -63,8 +60,7 @@ nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) *ncomp = 4; break; default: - pf_sprint_name(fs, pipe); - NOUVEAU_ERR("Unknown format %s\n", fs); + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); return 1; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 8f9ee05acc8..198e25f4482 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -58,11 +58,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, 0xe8); break; default: - { - char fmt[128]; - pf_sprint_name(fmt, fb->cbufs[i]->format); - NOUVEAU_ERR("AIIII unknown format %s\n", fmt); - } + NOUVEAU_ERR("AIIII unknown format %s\n", + pf_name(fb->cbufs[i]->format)); so_data(so, 0xe6); break; } @@ -96,11 +93,8 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data(so, 0x15); break; default: - { - char fmt[128]; - pf_sprint_name(fmt, fb->zsbuf->format); - NOUVEAU_ERR("AIIII unknown format %s\n", fmt); - } + NOUVEAU_ERR("AIIII unknown format %s\n", + pf_name(fb->zsbuf->format)); so_data(so, 0x16); break; } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 8e420ad172e..c94531723bc 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -215,9 +215,8 @@ nv50_vbo_validate(struct nv50_context *nv50) break; default: { - char fmt[128]; - pf_sprint_name(fmt, ve->src_format); - NOUVEAU_ERR("invalid vbo format %s\n", fmt); + NOUVEAU_ERR("invalid vbo format %s\n", + pf_name(ve->src_format)); assert(0); return; } -- cgit v1.2.3 From d506fc0acf68c861c4e160a3d931a3f81757242b Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 11 Aug 2008 15:01:35 -0600 Subject: gallium: debug/print vertex tweak --- src/gallium/drivers/softpipe/sp_setup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index b48eec730b2..14aacd73c23 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1272,10 +1272,7 @@ void setup_prepare( struct setup_context *setup ) } /* Note: nr_attrs is only used for debugging (vertex printing) */ - { - const struct sp_fragment_shader *fs = setup->softpipe->fs; - setup->quad.nr_attrs = fs->info.num_inputs + 1; /* +1 for vert pos */ - } + setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); sp->quad.first->begin(sp->quad.first); -- cgit v1.2.3 From 66ef96f6dc4cd898edb862e45965795fac46caed Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Tue, 12 Aug 2008 11:30:50 +0200 Subject: softpipe: Include missing header. --- src/gallium/drivers/softpipe/sp_setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 14aacd73c23..b7f2f16307e 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -39,6 +39,7 @@ #include "sp_quad.h" #include "sp_state.h" #include "sp_prim_setup.h" +#include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_util.h" -- cgit v1.2.3 From 91f6032919f8e5718004bb7ac0ee2b015fb403d7 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sun, 10 Aug 2008 16:24:15 +0100 Subject: trace: Trace texture depth. --- src/gallium/drivers/trace/tr_state.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 9ffe77146d9..3c7b007bfec 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -73,6 +73,10 @@ void trace_dump_template(struct trace_stream *stream, trace_dump_array(stream, uint, templat->height, 1); trace_dump_member_end(stream); + trace_dump_member_begin(stream, "depth"); + trace_dump_array(stream, uint, templat->depth, 1); + trace_dump_member_end(stream); + trace_dump_member_begin(stream, "block"); trace_dump_block(stream, &templat->block); trace_dump_member_end(stream); -- cgit v1.2.3 From 94cf4f15c3a94311ffeb670459e285d2c70a5d7e Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sun, 10 Aug 2008 16:24:43 +0100 Subject: trace: Trace winsys/screen/context creation. --- src/gallium/drivers/trace/tr_context.c | 135 +++++++++++++-------------------- src/gallium/drivers/trace/tr_context.h | 5 ++ src/gallium/drivers/trace/tr_screen.c | 13 +++- src/gallium/drivers/trace/tr_winsys.c | 8 +- 4 files changed, 71 insertions(+), 90 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 47a217ec7c7..c9c15b2bb9e 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -40,8 +40,7 @@ trace_context_set_edgeflags(struct pipe_context *_pipe, const unsigned *bitfield) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_edgeflags"); @@ -61,8 +60,7 @@ trace_context_draw_arrays(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; boolean result; @@ -90,8 +88,7 @@ trace_context_draw_elements(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; boolean result; @@ -125,8 +122,7 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; boolean result; @@ -159,8 +155,7 @@ trace_context_create_query(struct pipe_context *_pipe, unsigned query_type) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; struct pipe_query *result; @@ -184,8 +179,7 @@ trace_context_destroy_query(struct pipe_context *_pipe, struct pipe_query *query) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "destroy_query"); @@ -204,8 +198,7 @@ trace_context_begin_query(struct pipe_context *_pipe, struct pipe_query *query) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "begin_query"); @@ -224,8 +217,7 @@ trace_context_end_query(struct pipe_context *_pipe, struct pipe_query *query) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "end_query"); @@ -246,8 +238,7 @@ trace_context_get_query_result(struct pipe_context *_pipe, uint64 *presult) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; uint64 result; boolean _result; @@ -273,8 +264,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe, const struct pipe_blend_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; @@ -296,8 +286,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "bind_blend_state"); @@ -316,8 +305,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "delete_blend_state"); @@ -336,8 +324,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, const struct pipe_sampler_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; @@ -361,8 +348,7 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe, unsigned num_states, void **states) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "bind_sampler_states"); @@ -382,8 +368,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "delete_sampler_state"); @@ -402,8 +387,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe, const struct pipe_rasterizer_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; @@ -427,8 +411,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "bind_rasterizer_state"); @@ -447,8 +430,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "delete_rasterizer_state"); @@ -467,8 +449,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, const struct pipe_depth_stencil_alpha_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; @@ -490,8 +471,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "bind_depth_stencil_alpha_state"); @@ -510,8 +490,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "delete_depth_stencil_alpha_state"); @@ -530,8 +509,7 @@ trace_context_create_fs_state(struct pipe_context *_pipe, const struct pipe_shader_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; @@ -555,8 +533,7 @@ trace_context_bind_fs_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "bind_fs_state"); @@ -575,8 +552,7 @@ trace_context_delete_fs_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "delete_fs_state"); @@ -595,8 +571,7 @@ trace_context_create_vs_state(struct pipe_context *_pipe, const struct pipe_shader_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; @@ -620,8 +595,7 @@ trace_context_bind_vs_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "bind_vs_state"); @@ -640,8 +614,7 @@ trace_context_delete_vs_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "delete_vs_state"); @@ -660,8 +633,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe, const struct pipe_blend_color *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_blend_color"); @@ -680,8 +652,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe, const struct pipe_clip_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_clip_state"); @@ -701,8 +672,7 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, const struct pipe_constant_buffer *buffer) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_constant_buffer"); @@ -723,8 +693,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, const struct pipe_framebuffer_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_framebuffer_state"); @@ -743,8 +712,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe, const struct pipe_poly_stipple *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_polygon_stipple"); @@ -763,8 +731,7 @@ trace_context_set_scissor_state(struct pipe_context *_pipe, const struct pipe_scissor_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_scissor_state"); @@ -783,8 +750,7 @@ trace_context_set_viewport_state(struct pipe_context *_pipe, const struct pipe_viewport_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_viewport_state"); @@ -804,8 +770,7 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe, struct pipe_texture **textures) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_sampler_textures"); @@ -826,8 +791,7 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, const struct pipe_vertex_buffer *buffers) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_vertex_buffers"); @@ -851,8 +815,7 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe, const struct pipe_vertex_element *elements) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "set_vertex_elements"); @@ -881,8 +844,7 @@ trace_context_surface_copy(struct pipe_context *_pipe, unsigned width, unsigned height) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "surface_copy"); @@ -914,8 +876,7 @@ trace_context_surface_fill(struct pipe_context *_pipe, unsigned value) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "surface_fill"); @@ -939,8 +900,7 @@ trace_context_clear(struct pipe_context *_pipe, unsigned clearValue) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "clear"); @@ -961,8 +921,7 @@ trace_context_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "flush"); @@ -981,8 +940,7 @@ static INLINE void trace_context_destroy(struct pipe_context *_pipe) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_screen *tr_scr = trace_screen(_pipe->screen); - struct trace_stream *stream = tr_scr->stream; + struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; trace_dump_call_begin(stream, "pipe_context", "destroy"); @@ -1000,6 +958,7 @@ trace_context_destroy(struct pipe_context *_pipe) struct pipe_context * trace_context_create(struct pipe_context *pipe) { + struct trace_stream *stream; struct trace_context *tr_ctx; if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) @@ -1055,10 +1014,18 @@ trace_context_create(struct pipe_context *pipe) tr_ctx->base.flush = trace_context_flush; tr_ctx->pipe = pipe; + tr_ctx->stream = stream = trace_winsys(pipe->winsys)->stream; /* We don't want to trace the internal pipe calls */ pipe->winsys = trace_winsys(pipe->winsys)->winsys; pipe->screen = trace_screen(pipe->screen)->screen; + trace_dump_call_begin(stream, "", "pipe_context_create"); + trace_dump_arg_begin(stream, "screen"); + trace_dump_ptr(stream, pipe->screen); + trace_dump_arg_end(stream); + trace_dump_ret(stream, ptr, pipe); + trace_dump_call_end(stream); + return &tr_ctx->base; } diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 80fb5980d6a..2c0b0c72e45 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -34,11 +34,16 @@ #include "pipe/p_context.h" +struct trace_stream; + + struct trace_context { struct pipe_context base; struct pipe_context *pipe; + + struct trace_stream *stream; }; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index de885abae2c..27c218039ec 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -346,6 +346,7 @@ trace_screen_destroy(struct pipe_screen *_screen) struct pipe_screen * trace_screen_create(struct pipe_screen *screen) { + struct trace_stream *stream; struct trace_screen *tr_scr; if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) @@ -371,13 +372,17 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.surface_unmap = trace_screen_surface_unmap; tr_scr->screen = screen; - - tr_scr->stream = trace_winsys(screen->winsys)->stream; - if(!tr_scr->stream) - return NULL; + tr_scr->stream = stream = trace_winsys(screen->winsys)->stream; /* We don't want to trace the internal pipe calls */ screen->winsys = trace_winsys(screen->winsys)->winsys; + trace_dump_call_begin(stream, "", "pipe_screen_create"); + trace_dump_arg_begin(stream, "winsys"); + trace_dump_ptr(stream, screen->winsys); + trace_dump_arg_end(stream); + trace_dump_ret(stream, ptr, screen); + trace_dump_call_end(stream); + return &tr_scr->base; } diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 964da5677b2..128e502ffc3 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -420,6 +420,7 @@ trace_winsys_destroy(struct pipe_winsys *_winsys) struct pipe_winsys * trace_winsys_create(struct pipe_winsys *winsys) { + struct trace_stream *stream; struct trace_winsys *tr_ws; if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) @@ -446,7 +447,7 @@ trace_winsys_create(struct pipe_winsys *winsys) tr_ws->winsys = winsys; - tr_ws->stream = trace_stream_create("gallium", "trace"); + tr_ws->stream = stream = trace_stream_create("gallium", "trace"); if(!tr_ws->stream) return NULL; @@ -455,8 +456,11 @@ trace_winsys_create(struct pipe_winsys *winsys) if(!tr_ws->buffer_maps) return NULL; - trace_dump_trace_begin(tr_ws->stream, 0); + trace_dump_call_begin(stream, "", "pipe_winsys_create"); + trace_dump_ret(stream, ptr, winsys); + trace_dump_call_end(stream); + return &tr_ws->base; } -- cgit v1.2.3 From e5a606883f24980dd8e2378c25e6fb3b8f1937ce Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sun, 10 Aug 2008 18:51:23 +0100 Subject: trace: Fix hexadecimal dumping. --- src/gallium/drivers/trace/tr_dump.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index f545de30f4f..5269c4ddc84 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -276,17 +276,16 @@ void trace_dump_bytes(struct trace_stream *stream, const void *data, long unsigned size) { - static char hex_table[] = "0123456789ABCDE"; + static const char hex_table[16] = "0123456789ABCDEF"; const uint8_t *p = data; long unsigned i; trace_dump_write(stream, "<bytes>"); for(i = 0; i < size; ++i) { uint8_t byte = *p++; - char str[3]; - str[0] = hex_table[byte >> 4]; - str[1] = hex_table[byte & 0xf]; - str[2] = 0; - trace_dump_write(stream, str); + char hex[2]; + hex[0] = hex_table[byte >> 4]; + hex[1] = hex_table[byte & 0xf]; + trace_stream_write(stream, hex, 2); } trace_dump_write(stream, "</bytes>"); } -- cgit v1.2.3 From a318325b516b53fc321669453e4abe53c51f917a Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sun, 10 Aug 2008 18:54:10 +0100 Subject: trace: Zero the buffers to avoid dumping uninitialized memory. --- src/gallium/drivers/trace/tr_winsys.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 128e502ffc3..60a69626c01 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "pipe/p_util.h" +#include "pipe/p_state.h" #include "util/u_hash_table.h" #include "tr_stream.h" @@ -178,7 +179,7 @@ trace_winsys_buffer_create(struct pipe_winsys *_winsys, struct trace_winsys *tr_ws = trace_winsys(_winsys); struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; - struct pipe_buffer *result; + struct pipe_buffer *buffer; trace_dump_call_begin(stream, "pipe_winsys", "buffer_create"); @@ -187,13 +188,23 @@ trace_winsys_buffer_create(struct pipe_winsys *_winsys, trace_dump_arg(stream, uint, usage); trace_dump_arg(stream, uint, size); - result = winsys->buffer_create(winsys, alignment, usage, size); + buffer = winsys->buffer_create(winsys, alignment, usage, size); - trace_dump_ret(stream, ptr, result); + trace_dump_ret(stream, ptr, buffer); trace_dump_call_end(stream); + + /* Zero the buffer to avoid dumping uninitialized memory */ + if(buffer->usage & PIPE_BUFFER_USAGE_CPU_WRITE) { + void *map; + map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + if(map) { + memset(map, 0, buffer->size); + winsys->buffer_unmap(winsys, buffer); + } + } - return result; + return buffer; } -- cgit v1.2.3 From 8dbb3011a11ff3b6e8ebcf19e64ed4fbef17356b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 11 Aug 2008 00:15:07 +0100 Subject: trace: Replace buffer_map+memcpy+buffer_unmap by buffer_write --- src/gallium/drivers/trace/tr_winsys.c | 39 +++++++---------------------------- 1 file changed, 8 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 60a69626c01..0a43f1f7f46 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -240,30 +240,18 @@ trace_winsys_buffer_map(struct pipe_winsys *_winsys, unsigned usage) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; - void *result; - - trace_dump_call_begin(stream, "pipe_winsys", "buffer_map"); - - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, buffer); - trace_dump_arg(stream, uint, usage); - - result = winsys->buffer_map(winsys, buffer, usage); - - trace_dump_ret(stream, ptr, result); - - trace_dump_call_end(stream); + void *map; - if(result) { + map = winsys->buffer_map(winsys, buffer, usage); + if(map) { if(usage & PIPE_BUFFER_USAGE_CPU_WRITE) { assert(!hash_table_get(tr_ws->buffer_maps, buffer)); - hash_table_set(tr_ws->buffer_maps, buffer, result); + hash_table_set(tr_ws->buffer_maps, buffer, map); } } - return result; + return map; } @@ -278,13 +266,11 @@ trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, map = hash_table_get(tr_ws->buffer_maps, buffer); if(map) { - trace_dump_call_begin(stream, "", "memcpy"); + trace_dump_call_begin(stream, "pipe_winsys", "buffer_write"); - trace_dump_arg_begin(stream, "dst"); - trace_dump_ptr(stream, map); - trace_dump_arg_end(stream); + trace_dump_arg(stream, ptr, buffer); - trace_dump_arg_begin(stream, "src"); + trace_dump_arg_begin(stream, "data"); trace_dump_bytes(stream, map, buffer->size); trace_dump_arg_end(stream); @@ -294,19 +280,10 @@ trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, trace_dump_call_end(stream); - winsys->buffer_unmap(winsys, buffer); - hash_table_remove(tr_ws->buffer_maps, buffer); } - trace_dump_call_begin(stream, "pipe_winsys", "buffer_unmap"); - - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, buffer); - winsys->buffer_unmap(winsys, buffer); - - trace_dump_call_end(stream); } -- cgit v1.2.3 From 166b939d523a8683f1aee819f73e002e627a49ba Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 11 Aug 2008 16:56:08 +0100 Subject: trace: Dump pipe_{depth,stencil,alpha}_state names. --- src/gallium/drivers/trace/tr_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 3c7b007bfec..961705b9eec 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -277,7 +277,7 @@ void trace_dump_depth_stencil_alpha_state(struct trace_stream *stream, trace_dump_struct_begin(stream, "pipe_depth_stencil_alpha_state"); trace_dump_member_begin(stream, "depth"); - trace_dump_struct_begin(stream, ""); + trace_dump_struct_begin(stream, "pipe_depth_state"); trace_dump_member(stream, bool, &state->depth, enabled); trace_dump_member(stream, bool, &state->depth, writemask); trace_dump_member(stream, uint, &state->depth, func); @@ -289,7 +289,7 @@ void trace_dump_depth_stencil_alpha_state(struct trace_stream *stream, trace_dump_array_begin(stream); for(i = 0; i < Elements(state->stencil); ++i) { trace_dump_elem_begin(stream); - trace_dump_struct_begin(stream, ""); + trace_dump_struct_begin(stream, "pipe_stencil_state"); trace_dump_member(stream, bool, &state->stencil[i], enabled); trace_dump_member(stream, uint, &state->stencil[i], func); trace_dump_member(stream, uint, &state->stencil[i], fail_op); @@ -305,7 +305,7 @@ void trace_dump_depth_stencil_alpha_state(struct trace_stream *stream, trace_dump_member_end(stream); trace_dump_member_begin(stream, "alpha"); - trace_dump_struct_begin(stream, ""); + trace_dump_struct_begin(stream, "pipe_alpha_state"); trace_dump_member(stream, bool, &state->alpha, enabled); trace_dump_member(stream, uint, &state->alpha, func); trace_dump_member(stream, float, &state->alpha, ref); -- cgit v1.2.3 From d4d8f803884584525a36713b76ce04802658bd0f Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 11 Aug 2008 17:08:27 +0100 Subject: trace: Fix create_depth_stencil_alpha_state trace. --- src/gallium/drivers/trace/tr_context.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index c9c15b2bb9e..1df6842b714 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -457,8 +457,10 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, result = pipe->create_depth_stencil_alpha_state(pipe, state);; - trace_dump_ret(stream, ptr, result); + trace_dump_arg(stream, ptr, pipe); trace_dump_arg(stream, depth_stencil_alpha_state, state); + + trace_dump_ret(stream, ptr, result); trace_dump_call_end(stream); -- cgit v1.2.3 From e54fa77d130582ee48b699917324040ef254ed74 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 11 Aug 2008 18:09:36 +0100 Subject: trace: Dump polygon stipple state as an array. --- src/gallium/drivers/trace/tr_state.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 961705b9eec..e074ae7abc1 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -150,9 +150,10 @@ void trace_dump_poly_stipple(struct trace_stream *stream, trace_dump_struct_begin(stream, "pipe_poly_stipple"); trace_dump_member_begin(stream, "stipple"); - trace_dump_bytes(stream, + trace_dump_array(stream, + uint, state->stipple, - sizeof(state->stipple)); + Elements(state->stipple)); trace_dump_member_end(stream); trace_dump_struct_end(stream); -- cgit v1.2.3 From 7b8fa937eb9bbc63a59f2db399781c32b20f339f Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 11 Aug 2008 18:29:04 +0100 Subject: trace: Fix create_blend_state dump. --- src/gallium/drivers/trace/tr_context.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 1df6842b714..3e098128c96 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -275,6 +275,8 @@ trace_context_create_blend_state(struct pipe_context *_pipe, result = pipe->create_blend_state(pipe, state);; + trace_dump_ret(stream, ptr, result); + trace_dump_call_end(stream); return result; -- cgit v1.2.3 From 4a77fadd102264f1bf2caec3263e193eb831dade Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 12 Aug 2008 11:32:42 +0100 Subject: trace: More dump fixes. --- src/gallium/drivers/trace/tr_context.c | 1 - src/gallium/drivers/trace/tr_winsys.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 3e098128c96..f82126370cf 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -826,7 +826,6 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe, trace_dump_arg(stream, ptr, pipe); trace_dump_arg(stream, uint, num_elements); - trace_dump_arg(stream, ptr, elements); trace_dump_arg_begin(stream, "elements"); trace_dump_struct_array(stream, vertex_element, elements, num_elements); diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 0a43f1f7f46..eec84a19816 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -268,6 +268,8 @@ trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, if(map) { trace_dump_call_begin(stream, "pipe_winsys", "buffer_write"); + trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(stream, ptr, buffer); trace_dump_arg_begin(stream, "data"); -- cgit v1.2.3 From e8e75c8c865bb5bbff9db2682b130c8d147f3a38 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Wed, 13 Aug 2008 11:10:58 +0200 Subject: cell: KILP is a predicated discard, KIL is a conditional discard. --- src/gallium/drivers/cell/spu/spu_exec.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 96393732ed8..42e5022f30b 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -603,8 +603,8 @@ store_dest( * Kill fragment if any of the four values is less than zero. */ static void -exec_kilp(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_kil(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst) { uint uniquemask; uint chan_index; @@ -640,6 +640,20 @@ exec_kilp(struct spu_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + /* TODO: build kilmask from CC mask */ + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} /* * Fetch a texel using STR texture coordinates. @@ -1336,8 +1350,7 @@ exec_instruction( break; case TGSI_OPCODE_KIL: - /* for enabled ExecMask bits, set the killed bit */ - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + exec_kil (mach, inst); break; case TGSI_OPCODE_PK2H: -- cgit v1.2.3 From db38708c43d7e9bbc744893ad2e9c2a77e9ec15c Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Wed, 13 Aug 2008 11:13:46 +0200 Subject: i915: Swap meanings of KIL and KILP. --- src/gallium/drivers/i915simple/i915_fpc_translate.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 04507ab8adc..64432982c42 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -560,18 +560,6 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_KIL: - /* unconditional kill */ - assert(0); /* not tested yet */ -#if 0 - src0 = src_vector(p, &inst->FullSrcRegisters[0]); - tmp = i915_get_utemp(p); - - i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ - 0, src0, T0_TEXKILL); -#endif - break; - - case TGSI_OPCODE_KILP: /* kill if src[0].x < 0 || src[0].y < 0 ... */ src0 = src_vector(p, &inst->FullSrcRegisters[0]); tmp = i915_get_utemp(p); @@ -584,6 +572,10 @@ i915_translate_instruction(struct i915_fp_compile *p, T0_TEXKILL); /* opcode */ break; + case TGSI_OPCODE_KILP: + assert(0); /* not tested yet */ + break; + case TGSI_OPCODE_LG2: src0 = src_vector(p, &inst->FullSrcRegisters[0]); -- cgit v1.2.3 From dbec107c2549c00120ca6d67282f2a1a4bd8bdbd Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 14 Aug 2008 00:22:49 +1000 Subject: nv30/nv40: KIL/KILP swapped meanings --- src/gallium/drivers/nv30/nv30_fragprog.c | 4 ++-- src/gallium/drivers/nv40/nv40_fragprog.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 68058264e39..d3693fdf567 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -494,10 +494,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, case TGSI_OPCODE_FRC: arith(fpc, sat, FRC, dst, mask, src[0], none, none); break; - case TGSI_OPCODE_KIL: + case TGSI_OPCODE_KILP: arith(fpc, 0, KIL, none, 0, none, none, none); break; - case TGSI_OPCODE_KILP: + case TGSI_OPCODE_KIL: dst = nv30_sr(NV30SR_NONE, 0); dst.cc_update = 1; arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index a361509e8f1..91dcbebda0d 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -533,10 +533,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, case TGSI_OPCODE_FRC: arith(fpc, sat, FRC, dst, mask, src[0], none, none); break; - case TGSI_OPCODE_KIL: + case TGSI_OPCODE_KILP: arith(fpc, 0, KIL, none, 0, none, none, none); break; - case TGSI_OPCODE_KILP: + case TGSI_OPCODE_KIL: dst = nv40_sr(NV40SR_NONE, 0); dst.cc_update = 1; arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); -- cgit v1.2.3 From 73467e1080d94e6d5cfb52f71c845c09a78adcd9 Mon Sep 17 00:00:00 2001 From: Alan Hourihane <alanh@tungstengraphics.com> Date: Thu, 14 Aug 2008 09:52:15 +0100 Subject: check for winsys->destroy before calling --- src/gallium/drivers/i915simple/i915_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index e3d19017b5d..e2bf5ab678f 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -45,7 +45,7 @@ static void i915_destroy( struct pipe_context *pipe ) draw_destroy( i915->draw ); - if(i915->winsys) + if(i915->winsys->destroy) i915->winsys->destroy(i915->winsys); FREE( i915 ); -- cgit v1.2.3 From 3c90678ea69ee8be832e16d42a1b8049a49535e3 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 14 Aug 2008 10:46:38 +0100 Subject: trace: Separate the trace screen/context vs the original screen/context. --- src/gallium/drivers/trace/tr_context.c | 13 +++++-------- src/gallium/drivers/trace/tr_context.h | 3 ++- src/gallium/drivers/trace/tr_screen.c | 24 ++++++++++++++++++------ src/gallium/drivers/trace/tr_winsys.c | 23 +++++++---------------- src/gallium/drivers/trace/tr_winsys.h | 3 ++- 5 files changed, 34 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index f82126370cf..e43fc01c557 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -959,7 +959,8 @@ trace_context_destroy(struct pipe_context *_pipe) struct pipe_context * -trace_context_create(struct pipe_context *pipe) +trace_context_create(struct pipe_screen *screen, + struct pipe_context *pipe) { struct trace_stream *stream; struct trace_context *tr_ctx; @@ -971,8 +972,8 @@ trace_context_create(struct pipe_context *pipe) if(!tr_ctx) return NULL; - tr_ctx->base.winsys = pipe->winsys; - tr_ctx->base.screen = pipe->screen; + tr_ctx->base.winsys = screen->winsys; + tr_ctx->base.screen = screen; tr_ctx->base.destroy = trace_context_destroy; tr_ctx->base.set_edgeflags = trace_context_set_edgeflags; tr_ctx->base.draw_arrays = trace_context_draw_arrays; @@ -1017,11 +1018,7 @@ trace_context_create(struct pipe_context *pipe) tr_ctx->base.flush = trace_context_flush; tr_ctx->pipe = pipe; - tr_ctx->stream = stream = trace_winsys(pipe->winsys)->stream; - - /* We don't want to trace the internal pipe calls */ - pipe->winsys = trace_winsys(pipe->winsys)->winsys; - pipe->screen = trace_screen(pipe->screen)->screen; + tr_ctx->stream = stream = trace_screen(screen)->stream; trace_dump_call_begin(stream, "", "pipe_context_create"); trace_dump_arg_begin(stream, "screen"); diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 2c0b0c72e45..1aa822ba02e 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -57,7 +57,8 @@ trace_context(struct pipe_context *pipe) struct pipe_context * -trace_context_create(struct pipe_context *pipe); +trace_context_create(struct pipe_screen *screen, + struct pipe_context *pipe); #endif /* TR_CONTEXT_H_ */ diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 27c218039ec..0e253123aee 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -338,7 +338,11 @@ trace_screen_destroy(struct pipe_screen *_screen) screen->destroy(screen); trace_dump_call_end(stream); - + + trace_dump_trace_end(stream); + + trace_stream_close(stream); + FREE(tr_scr); } @@ -348,6 +352,7 @@ trace_screen_create(struct pipe_screen *screen) { struct trace_stream *stream; struct trace_screen *tr_scr; + struct pipe_winsys *winsys; if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) return screen; @@ -356,7 +361,17 @@ trace_screen_create(struct pipe_screen *screen) if(!tr_scr) return NULL; - tr_scr->base.winsys = screen->winsys; + tr_scr->stream = stream = trace_stream_create("gallium", "trace"); + if(!tr_scr->stream) + return NULL; + + trace_dump_trace_begin(stream, 0); + + winsys = trace_winsys_create(stream, screen->winsys); + if(!winsys) + return NULL; + + tr_scr->base.winsys = winsys; tr_scr->base.destroy = trace_screen_destroy; tr_scr->base.get_name = trace_screen_get_name; tr_scr->base.get_vendor = trace_screen_get_vendor; @@ -372,10 +387,7 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.surface_unmap = trace_screen_surface_unmap; tr_scr->screen = screen; - tr_scr->stream = stream = trace_winsys(screen->winsys)->stream; - - /* We don't want to trace the internal pipe calls */ - screen->winsys = trace_winsys(screen->winsys)->winsys; + tr_scr->stream = stream = trace_winsys(winsys)->stream; trace_dump_call_begin(stream, "", "pipe_screen_create"); trace_dump_arg_begin(stream, "winsys"); diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index eec84a19816..524049148d0 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -393,28 +393,24 @@ trace_winsys_destroy(struct pipe_winsys *_winsys) trace_dump_arg(stream, ptr, winsys); - winsys->destroy(winsys); + /* + winsys->destroy(winsys); + */ trace_dump_call_end(stream); - trace_dump_trace_end(stream); - hash_table_destroy(tr_ws->buffer_maps); - trace_stream_close(tr_ws->stream); - FREE(tr_ws); } struct pipe_winsys * -trace_winsys_create(struct pipe_winsys *winsys) +trace_winsys_create(struct trace_stream *stream, + struct pipe_winsys *winsys) { - struct trace_stream *stream; - struct trace_winsys *tr_ws; - if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) - return winsys; + struct trace_winsys *tr_ws; tr_ws = CALLOC_STRUCT(trace_winsys); if(!tr_ws) @@ -436,18 +432,13 @@ trace_winsys_create(struct pipe_winsys *winsys) tr_ws->base.fence_finish = trace_winsys_fence_finish; tr_ws->winsys = winsys; - - tr_ws->stream = stream = trace_stream_create("gallium", "trace"); - if(!tr_ws->stream) - return NULL; + tr_ws->stream = stream; tr_ws->buffer_maps = hash_table_create(trace_buffer_hash, trace_buffer_compare); if(!tr_ws->buffer_maps) return NULL; - trace_dump_trace_begin(tr_ws->stream, 0); - trace_dump_call_begin(stream, "", "pipe_winsys_create"); trace_dump_ret(stream, ptr, winsys); trace_dump_call_end(stream); diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h index a3576da867e..704d2c57c86 100644 --- a/src/gallium/drivers/trace/tr_winsys.h +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -60,7 +60,8 @@ trace_winsys(struct pipe_winsys *winsys) struct pipe_winsys * -trace_winsys_create(struct pipe_winsys *winsys); +trace_winsys_create(struct trace_stream *stream, + struct pipe_winsys *winsys); #endif /* TR_WINSYS_H_ */ -- cgit v1.2.3 From 19aee90179135387c3236c38b207cc47176226ad Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 14 Aug 2008 11:00:49 +0100 Subject: trace: Update status. --- src/gallium/drivers/trace/README | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 81da610bd5e..4b763f2488c 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -12,7 +12,7 @@ To use do ensure the right libGL.so is being picked by doing - ldd `which glxingo` + ldd `which glxinfo` and then try running @@ -31,7 +31,9 @@ further transform it by doing echo '</trace>' >> gallium.??.trace -This is still work in progress. +This is still work in progress, namely: +- surface writes are not traced +- no way to know the start/end of a frame -- Jose Fonseca <jrfonseca@tungstengraphics.com> -- cgit v1.2.3 From a7ea6bae4e618e28636909ac2db7783632bc81b8 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 14 Aug 2008 16:36:52 +0200 Subject: nv30: does not support mirror clamp, only mirror repeat --- src/gallium/drivers/nv30/nv30_screen.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 0ffcd772351..d5514c2aba5 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -55,6 +55,10 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 0; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; case NOUVEAU_CAP_HW_VTXBUF: case NOUVEAU_CAP_HW_IDXBUF: return 1; -- cgit v1.2.3 From a145c107c12715105e14bb56b245eeb660cf433a Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 14 Aug 2008 16:52:51 +0200 Subject: nv30: disable setting nv40 RECT bit, this is not the same on nv30, plus gallium does not support rectangle textures currently, only full POT or NPOT --- src/gallium/drivers/nv30/nv30_state.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 8d88d6c806e..77b0c9e08b0 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -127,8 +127,10 @@ nv30_sampler_state_create(struct pipe_context *pipe, in sampler state structure, and set appropriate format in nvxx_fragtex_build() */ - if (!cso->normalized_coords) - ps->fmt |= (1<<14) /*NV34TCL_TX_FORMAT_RECT*/; + /*NV34TCL_TX_FORMAT_RECT*/ + /*if (!cso->normalized_coords) { + ps->fmt |= (1<<14) ; + }*/ ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | -- cgit v1.2.3 From d4c199d05691878bbc4c72a06d3042ef00ff38e0 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Thu, 14 Aug 2008 17:24:06 +0200 Subject: nv30: set mipmap min/max lod accordingly --- src/gallium/drivers/nv30/nv30_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 77b0c9e08b0..eceb5353152 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -199,10 +199,10 @@ nv30_sampler_state_create(struct pipe_context *pipe, ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 7; + ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 19; + ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; } if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { -- cgit v1.2.3 From 196167e9d5c84f9f6dfe6f15b3e2f2c3ec6825dc Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 14 Aug 2008 12:50:52 +0100 Subject: trace: Make stream a global variable. This not only simplifies the code, but allows to use atexit() to ensure the log is closed when applications don't exit cleanly. --- src/gallium/drivers/trace/README | 11 +- src/gallium/drivers/trace/tr_context.c | 497 +++++++++++++++------------------ src/gallium/drivers/trace/tr_context.h | 5 - src/gallium/drivers/trace/tr_dump.c | 337 +++++++++++----------- src/gallium/drivers/trace/tr_dump.h | 120 ++++---- src/gallium/drivers/trace/tr_screen.c | 196 ++++++------- src/gallium/drivers/trace/tr_screen.h | 5 - src/gallium/drivers/trace/tr_state.c | 497 ++++++++++++++++----------------- src/gallium/drivers/trace/tr_state.h | 59 ++-- src/gallium/drivers/trace/tr_stream.h | 5 +- src/gallium/drivers/trace/tr_winsys.c | 185 ++++++------ src/gallium/drivers/trace/tr_winsys.h | 6 +- 12 files changed, 906 insertions(+), 1017 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 4b763f2488c..5752448b934 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -20,16 +20,7 @@ and then try running which should create a gallium.*.trace file, which is an XML file. You can view copying trace.xsl and trace.css to the same directory, and opening with a -XSLT capable browser like Firefox or Internet Explorer. It often happens that -the trace file was not properly terminated, and a - - </trace> - -closing tag is missing from the file end. Add it before try to open or -further transform it by doing - - echo '</trace>' >> gallium.??.trace - +XSLT capable browser like Firefox or Internet Explorer. This is still work in progress, namely: - surface writes are not traced diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index e43fc01c557..868b4f010d0 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -26,12 +26,10 @@ **************************************************************************/ #include "pipe/p_util.h" +#include "pipe/p_screen.h" -#include "tr_stream.h" #include "tr_dump.h" #include "tr_state.h" -#include "tr_winsys.h" -#include "tr_screen.h" #include "tr_context.h" @@ -40,18 +38,17 @@ trace_context_set_edgeflags(struct pipe_context *_pipe, const unsigned *bitfield) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_edgeflags"); + trace_dump_call_begin("pipe_context", "set_edgeflags"); - trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(ptr, pipe); /* FIXME: we don't know how big this array is */ - trace_dump_arg(stream, ptr, bitfield); + trace_dump_arg(ptr, bitfield); pipe->set_edgeflags(pipe, bitfield);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -60,22 +57,21 @@ trace_context_draw_arrays(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; boolean result; - trace_dump_call_begin(stream, "pipe_context", "draw_arrays"); + trace_dump_call_begin("pipe_context", "draw_arrays"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, uint, mode); - trace_dump_arg(stream, uint, start); - trace_dump_arg(stream, uint, count); + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); result = pipe->draw_arrays(pipe, mode, start, count);; - trace_dump_ret(stream, bool, result); + trace_dump_ret(bool, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -88,24 +84,23 @@ trace_context_draw_elements(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; boolean result; - trace_dump_call_begin(stream, "pipe_context", "draw_elements"); + trace_dump_call_begin("pipe_context", "draw_elements"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, indexBuffer); - trace_dump_arg(stream, uint, indexSize); - trace_dump_arg(stream, uint, mode); - trace_dump_arg(stream, uint, start); - trace_dump_arg(stream, uint, count); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, indexBuffer); + trace_dump_arg(uint, indexSize); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);; - trace_dump_ret(stream, bool, result); + trace_dump_ret(bool, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -122,29 +117,28 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; boolean result; - trace_dump_call_begin(stream, "pipe_context", "draw_range_elements"); + trace_dump_call_begin("pipe_context", "draw_range_elements"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, indexBuffer); - trace_dump_arg(stream, uint, indexSize); - trace_dump_arg(stream, uint, minIndex); - trace_dump_arg(stream, uint, maxIndex); - trace_dump_arg(stream, uint, mode); - trace_dump_arg(stream, uint, start); - trace_dump_arg(stream, uint, count); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, indexBuffer); + trace_dump_arg(uint, indexSize); + trace_dump_arg(uint, minIndex); + trace_dump_arg(uint, maxIndex); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); result = pipe->draw_range_elements(pipe, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); - trace_dump_ret(stream, bool, result); + trace_dump_ret(bool, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -155,20 +149,19 @@ trace_context_create_query(struct pipe_context *_pipe, unsigned query_type) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; struct pipe_query *result; - trace_dump_call_begin(stream, "pipe_context", "create_query"); + trace_dump_call_begin("pipe_context", "create_query"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, uint, query_type); + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, query_type); result = pipe->create_query(pipe, query_type);; - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -179,17 +172,16 @@ trace_context_destroy_query(struct pipe_context *_pipe, struct pipe_query *query) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "destroy_query"); + trace_dump_call_begin("pipe_context", "destroy_query"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, query); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); pipe->destroy_query(pipe, query);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -198,17 +190,16 @@ trace_context_begin_query(struct pipe_context *_pipe, struct pipe_query *query) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "begin_query"); + trace_dump_call_begin("pipe_context", "begin_query"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, query); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); pipe->begin_query(pipe, query);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -217,17 +208,16 @@ trace_context_end_query(struct pipe_context *_pipe, struct pipe_query *query) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "end_query"); + trace_dump_call_begin("pipe_context", "end_query"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, query); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); pipe->end_query(pipe, query); - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -238,22 +228,21 @@ trace_context_get_query_result(struct pipe_context *_pipe, uint64 *presult) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; uint64 result; boolean _result; - trace_dump_call_begin(stream, "pipe_context", "get_query_result"); + trace_dump_call_begin("pipe_context", "get_query_result"); - trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(ptr, pipe); _result = pipe->get_query_result(pipe, query, wait, presult);; result = *presult; - trace_dump_arg(stream, uint, result); - trace_dump_ret(stream, bool, _result); + trace_dump_arg(uint, result); + trace_dump_ret(bool, _result); - trace_dump_call_end(stream); + trace_dump_call_end(); return _result; } @@ -264,20 +253,19 @@ trace_context_create_blend_state(struct pipe_context *_pipe, const struct pipe_blend_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; - trace_dump_call_begin(stream, "pipe_context", "create_blend_state"); + trace_dump_call_begin("pipe_context", "create_blend_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, blend_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(blend_state, state); result = pipe->create_blend_state(pipe, state);; - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -288,17 +276,16 @@ trace_context_bind_blend_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "bind_blend_state"); + trace_dump_call_begin("pipe_context", "bind_blend_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->bind_blend_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -307,17 +294,16 @@ trace_context_delete_blend_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "delete_blend_state"); + trace_dump_call_begin("pipe_context", "delete_blend_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->delete_blend_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -326,20 +312,19 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, const struct pipe_sampler_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; - trace_dump_call_begin(stream, "pipe_context", "create_sampler_state"); + trace_dump_call_begin("pipe_context", "create_sampler_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); result = pipe->create_sampler_state(pipe, state);; - trace_dump_ret(stream, sampler_state, result); + trace_dump_ret(sampler_state, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -350,18 +335,17 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe, unsigned num_states, void **states) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "bind_sampler_states"); + trace_dump_call_begin("pipe_context", "bind_sampler_states"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, uint, num_states); - trace_dump_arg_array(stream, ptr, states, num_states); + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_states); + trace_dump_arg_array(ptr, states, num_states); pipe->bind_sampler_states(pipe, num_states, states);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -370,17 +354,16 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "delete_sampler_state"); + trace_dump_call_begin("pipe_context", "delete_sampler_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->delete_sampler_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -389,20 +372,19 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe, const struct pipe_rasterizer_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; - trace_dump_call_begin(stream, "pipe_context", "create_rasterizer_state"); + trace_dump_call_begin("pipe_context", "create_rasterizer_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, rasterizer_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(rasterizer_state, state); result = pipe->create_rasterizer_state(pipe, state);; - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -413,17 +395,16 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "bind_rasterizer_state"); + trace_dump_call_begin("pipe_context", "bind_rasterizer_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->bind_rasterizer_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -432,17 +413,16 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "delete_rasterizer_state"); + trace_dump_call_begin("pipe_context", "delete_rasterizer_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->delete_rasterizer_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -451,20 +431,19 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, const struct pipe_depth_stencil_alpha_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; - trace_dump_call_begin(stream, "pipe_context", "create_depth_stencil_alpha_state"); + trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state"); result = pipe->create_depth_stencil_alpha_state(pipe, state);; - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, depth_stencil_alpha_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(depth_stencil_alpha_state, state); - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -475,17 +454,16 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "bind_depth_stencil_alpha_state"); + trace_dump_call_begin("pipe_context", "bind_depth_stencil_alpha_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->bind_depth_stencil_alpha_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -494,17 +472,16 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "delete_depth_stencil_alpha_state"); + trace_dump_call_begin("pipe_context", "delete_depth_stencil_alpha_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->delete_depth_stencil_alpha_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -513,20 +490,19 @@ trace_context_create_fs_state(struct pipe_context *_pipe, const struct pipe_shader_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; - trace_dump_call_begin(stream, "pipe_context", "create_fs_state"); + trace_dump_call_begin("pipe_context", "create_fs_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, shader_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(shader_state, state); result = pipe->create_fs_state(pipe, state);; - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -537,17 +513,16 @@ trace_context_bind_fs_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "bind_fs_state"); + trace_dump_call_begin("pipe_context", "bind_fs_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->bind_fs_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -556,17 +531,16 @@ trace_context_delete_fs_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "delete_fs_state"); + trace_dump_call_begin("pipe_context", "delete_fs_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->delete_fs_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -575,20 +549,19 @@ trace_context_create_vs_state(struct pipe_context *_pipe, const struct pipe_shader_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; void * result; - trace_dump_call_begin(stream, "pipe_context", "create_vs_state"); + trace_dump_call_begin("pipe_context", "create_vs_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, shader_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(shader_state, state); result = pipe->create_vs_state(pipe, state);; - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -599,17 +572,16 @@ trace_context_bind_vs_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "bind_vs_state"); + trace_dump_call_begin("pipe_context", "bind_vs_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->bind_vs_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -618,17 +590,16 @@ trace_context_delete_vs_state(struct pipe_context *_pipe, void *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "delete_vs_state"); + trace_dump_call_begin("pipe_context", "delete_vs_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); pipe->delete_vs_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -637,17 +608,16 @@ trace_context_set_blend_color(struct pipe_context *_pipe, const struct pipe_blend_color *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_blend_color"); + trace_dump_call_begin("pipe_context", "set_blend_color"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, blend_color, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(blend_color, state); pipe->set_blend_color(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -656,17 +626,16 @@ trace_context_set_clip_state(struct pipe_context *_pipe, const struct pipe_clip_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_clip_state"); + trace_dump_call_begin("pipe_context", "set_clip_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, clip_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(clip_state, state); pipe->set_clip_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -676,19 +645,18 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, const struct pipe_constant_buffer *buffer) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_constant_buffer"); + trace_dump_call_begin("pipe_context", "set_constant_buffer"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, uint, shader); - trace_dump_arg(stream, uint, index); - trace_dump_arg(stream, constant_buffer, buffer); + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, shader); + trace_dump_arg(uint, index); + trace_dump_arg(constant_buffer, buffer); pipe->set_constant_buffer(pipe, shader, index, buffer);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -697,17 +665,16 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, const struct pipe_framebuffer_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_framebuffer_state"); + trace_dump_call_begin("pipe_context", "set_framebuffer_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, framebuffer_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(framebuffer_state, state); pipe->set_framebuffer_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -716,17 +683,16 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe, const struct pipe_poly_stipple *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_polygon_stipple"); + trace_dump_call_begin("pipe_context", "set_polygon_stipple"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, poly_stipple, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(poly_stipple, state); pipe->set_polygon_stipple(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -735,17 +701,16 @@ trace_context_set_scissor_state(struct pipe_context *_pipe, const struct pipe_scissor_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_scissor_state"); + trace_dump_call_begin("pipe_context", "set_scissor_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, scissor_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(scissor_state, state); pipe->set_scissor_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -754,17 +719,16 @@ trace_context_set_viewport_state(struct pipe_context *_pipe, const struct pipe_viewport_state *state) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_viewport_state"); + trace_dump_call_begin("pipe_context", "set_viewport_state"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, viewport_state, state); + trace_dump_arg(ptr, pipe); + trace_dump_arg(viewport_state, state); pipe->set_viewport_state(pipe, state);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -774,18 +738,17 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe, struct pipe_texture **textures) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_sampler_textures"); + trace_dump_call_begin("pipe_context", "set_sampler_textures"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, uint, num_textures); - trace_dump_arg_array(stream, ptr, textures, num_textures); + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_textures); + trace_dump_arg_array(ptr, textures, num_textures); pipe->set_sampler_textures(pipe, num_textures, textures);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -795,21 +758,20 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, const struct pipe_vertex_buffer *buffers) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_vertex_buffers"); + trace_dump_call_begin("pipe_context", "set_vertex_buffers"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, uint, num_buffers); + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_buffers); - trace_dump_arg_begin(stream, "buffers"); - trace_dump_struct_array(stream, vertex_buffer, buffers, num_buffers); - trace_dump_arg_end(stream); + trace_dump_arg_begin("buffers"); + trace_dump_struct_array(vertex_buffer, buffers, num_buffers); + trace_dump_arg_end(); pipe->set_vertex_buffers(pipe, num_buffers, buffers);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -819,21 +781,20 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe, const struct pipe_vertex_element *elements) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "set_vertex_elements"); + trace_dump_call_begin("pipe_context", "set_vertex_elements"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, uint, num_elements); + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_elements); - trace_dump_arg_begin(stream, "elements"); - trace_dump_struct_array(stream, vertex_element, elements, num_elements); - trace_dump_arg_end(stream); + trace_dump_arg_begin("elements"); + trace_dump_struct_array(vertex_element, elements, num_elements); + trace_dump_arg_end(); pipe->set_vertex_elements(pipe, num_elements, elements);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -847,27 +808,26 @@ trace_context_surface_copy(struct pipe_context *_pipe, unsigned width, unsigned height) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "surface_copy"); + trace_dump_call_begin("pipe_context", "surface_copy"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, bool, do_flip); - trace_dump_arg(stream, ptr, dest); - trace_dump_arg(stream, uint, destx); - trace_dump_arg(stream, uint, desty); - trace_dump_arg(stream, ptr, src); - trace_dump_arg(stream, uint, srcx); - trace_dump_arg(stream, uint, srcy); - trace_dump_arg(stream, uint, width); - trace_dump_arg(stream, uint, height); + trace_dump_arg(ptr, pipe); + trace_dump_arg(bool, do_flip); + trace_dump_arg(ptr, dest); + trace_dump_arg(uint, destx); + trace_dump_arg(uint, desty); + trace_dump_arg(ptr, src); + trace_dump_arg(uint, srcx); + trace_dump_arg(uint, srcy); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); pipe->surface_copy(pipe, do_flip, dest, destx, desty, src, srcx, srcy, width, height); - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -879,21 +839,20 @@ trace_context_surface_fill(struct pipe_context *_pipe, unsigned value) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "surface_fill"); + trace_dump_call_begin("pipe_context", "surface_fill"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, dst); - trace_dump_arg(stream, uint, dstx); - trace_dump_arg(stream, uint, dsty); - trace_dump_arg(stream, uint, width); - trace_dump_arg(stream, uint, height); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, dst); + trace_dump_arg(uint, dstx); + trace_dump_arg(uint, dsty); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -903,18 +862,17 @@ trace_context_clear(struct pipe_context *_pipe, unsigned clearValue) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "clear"); + trace_dump_call_begin("pipe_context", "clear"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, ptr, surface); - trace_dump_arg(stream, uint, clearValue); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, surface); + trace_dump_arg(uint, clearValue); pipe->clear(pipe, surface, clearValue);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -924,18 +882,17 @@ trace_context_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "flush"); + trace_dump_call_begin("pipe_context", "flush"); - trace_dump_arg(stream, ptr, pipe); - trace_dump_arg(stream, uint, flags); - trace_dump_arg(stream, ptr, fence); + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, flags); + trace_dump_arg(ptr, fence); pipe->flush(pipe, flags, fence);; - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -943,16 +900,15 @@ static INLINE void trace_context_destroy(struct pipe_context *_pipe) { struct trace_context *tr_ctx = trace_context(_pipe); - struct trace_stream *stream = tr_ctx->stream; struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin(stream, "pipe_context", "destroy"); + trace_dump_call_begin("pipe_context", "destroy"); - trace_dump_arg(stream, ptr, pipe); + trace_dump_arg(ptr, pipe); pipe->destroy(pipe); - trace_dump_call_end(stream); + trace_dump_call_end(); FREE(tr_ctx); } @@ -962,15 +918,14 @@ struct pipe_context * trace_context_create(struct pipe_screen *screen, struct pipe_context *pipe) { - struct trace_stream *stream; struct trace_context *tr_ctx; - if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) - return pipe; + if(!pipe) + goto error1; tr_ctx = CALLOC_STRUCT(trace_context); if(!tr_ctx) - return NULL; + goto error1; tr_ctx->base.winsys = screen->winsys; tr_ctx->base.screen = screen; @@ -1018,14 +973,16 @@ trace_context_create(struct pipe_screen *screen, tr_ctx->base.flush = trace_context_flush; tr_ctx->pipe = pipe; - tr_ctx->stream = stream = trace_screen(screen)->stream; - trace_dump_call_begin(stream, "", "pipe_context_create"); - trace_dump_arg_begin(stream, "screen"); - trace_dump_ptr(stream, pipe->screen); - trace_dump_arg_end(stream); - trace_dump_ret(stream, ptr, pipe); - trace_dump_call_end(stream); + trace_dump_call_begin("", "pipe_context_create"); + trace_dump_arg_begin("screen"); + trace_dump_ptr(pipe->screen); + trace_dump_arg_end(); + trace_dump_ret(ptr, pipe); + trace_dump_call_end(); return &tr_ctx->base; + +error1: + return pipe; } diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 1aa822ba02e..679371e3105 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -34,16 +34,11 @@ #include "pipe/p_context.h" -struct trace_stream; - - struct trace_context { struct pipe_context base; struct pipe_context *pipe; - - struct trace_stream *stream; }; diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 5269c4ddc84..6ebb639b7c7 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -38,6 +38,11 @@ * @author Jose Fonseca <jrfonseca@tungstengraphics.com> */ +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <stdlib.h> +#endif #include "pipe/p_compiler.h" #include "util/u_string.h" @@ -46,318 +51,328 @@ #include "tr_dump.h" +static struct trace_stream *stream = NULL; + + static INLINE void -trace_dump_write(struct trace_stream *stream, const char *s) +trace_dump_write(const char *buf, size_t size) { - trace_stream_write(stream, s, strlen(s)); + if(stream) + trace_stream_write(stream, buf, size); } static INLINE void -trace_dump_writef(struct trace_stream *stream, const char *format, ...) +trace_dump_writes(const char *s) { - char buf[1024]; + trace_dump_write(s, strlen(s)); +} + + +static INLINE void +trace_dump_writef(const char *format, ...) +{ + static char buf[1024]; + unsigned len; va_list ap; va_start(ap, format); - util_vsnprintf(buf, sizeof(buf), format, ap); + len = util_vsnprintf(buf, sizeof(buf), format, ap); va_end(ap); - trace_dump_write(stream, buf); + trace_dump_write(buf, len); } static INLINE void -trace_dump_escape(struct trace_stream *stream, const char *str) +trace_dump_escape(const char *str) { const unsigned char *p = (const unsigned char *)str; unsigned char c; while((c = *p++) != 0) { if(c == '<') - trace_dump_write(stream, "<"); + trace_dump_writes("<"); else if(c == '>') - trace_dump_write(stream, ">"); + trace_dump_writes(">"); else if(c == '&') - trace_dump_write(stream, "&"); + trace_dump_writes("&"); else if(c == '\'') - trace_dump_write(stream, "'"); + trace_dump_writes("'"); else if(c == '\"') - trace_dump_write(stream, """); + trace_dump_writes("""); else if(c >= 0x20 && c <= 0x7e) - trace_dump_writef(stream, "%c", c); + trace_dump_writef("%c", c); else - trace_dump_writef(stream, "&#%u;", c); + trace_dump_writef("&#%u;", c); } } static INLINE void -trace_dump_indent(struct trace_stream *stream, unsigned level) +trace_dump_indent(unsigned level) { unsigned i; for(i = 0; i < level; ++i) - trace_dump_write(stream, "\t"); + trace_dump_writes("\t"); } static INLINE void -trace_dump_newline(struct trace_stream *stream) +trace_dump_newline(void) { - trace_dump_write(stream, "\n"); + trace_dump_writes("\n"); } static INLINE void -trace_dump_tag(struct trace_stream *stream, - const char *name) +trace_dump_tag(const char *name) { - trace_dump_write(stream, "<"); - trace_dump_write(stream, name); - trace_dump_write(stream, "/>"); + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes("/>"); } static INLINE void -trace_dump_tag_begin(struct trace_stream *stream, - const char *name) +trace_dump_tag_begin(const char *name) { - trace_dump_write(stream, "<"); - trace_dump_write(stream, name); - trace_dump_write(stream, ">"); + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(">"); } static INLINE void -trace_dump_tag_begin1(struct trace_stream *stream, - const char *name, +trace_dump_tag_begin1(const char *name, const char *attr1, const char *value1) { - trace_dump_write(stream, "<"); - trace_dump_write(stream, name); - trace_dump_write(stream, " "); - trace_dump_write(stream, attr1); - trace_dump_write(stream, "='"); - trace_dump_escape(stream, value1); - trace_dump_write(stream, "'>"); + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("='"); + trace_dump_escape(value1); + trace_dump_writes("'>"); } static INLINE void -trace_dump_tag_begin2(struct trace_stream *stream, - const char *name, +trace_dump_tag_begin2(const char *name, const char *attr1, const char *value1, const char *attr2, const char *value2) { - trace_dump_write(stream, "<"); - trace_dump_write(stream, name); - trace_dump_write(stream, " "); - trace_dump_write(stream, attr1); - trace_dump_write(stream, "=\'"); - trace_dump_escape(stream, value1); - trace_dump_write(stream, "\' "); - trace_dump_write(stream, attr2); - trace_dump_write(stream, "=\'"); - trace_dump_escape(stream, value2); - trace_dump_write(stream, "\'>"); + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("=\'"); + trace_dump_escape(value1); + trace_dump_writes("\' "); + trace_dump_writes(attr2); + trace_dump_writes("=\'"); + trace_dump_escape(value2); + trace_dump_writes("\'>"); } static INLINE void -trace_dump_tag_begin3(struct trace_stream *stream, - const char *name, +trace_dump_tag_begin3(const char *name, const char *attr1, const char *value1, const char *attr2, const char *value2, const char *attr3, const char *value3) { - trace_dump_write(stream, "<"); - trace_dump_write(stream, name); - trace_dump_write(stream, " "); - trace_dump_write(stream, attr1); - trace_dump_write(stream, "=\'"); - trace_dump_escape(stream, value1); - trace_dump_write(stream, "\' "); - trace_dump_write(stream, attr2); - trace_dump_write(stream, "=\'"); - trace_dump_escape(stream, value2); - trace_dump_write(stream, "\' "); - trace_dump_write(stream, attr3); - trace_dump_write(stream, "=\'"); - trace_dump_escape(stream, value3); - trace_dump_write(stream, "\'>"); + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("=\'"); + trace_dump_escape(value1); + trace_dump_writes("\' "); + trace_dump_writes(attr2); + trace_dump_writes("=\'"); + trace_dump_escape(value2); + trace_dump_writes("\' "); + trace_dump_writes(attr3); + trace_dump_writes("=\'"); + trace_dump_escape(value3); + trace_dump_writes("\'>"); } static INLINE void -trace_dump_tag_end(struct trace_stream *stream, - const char *name) -{ - trace_dump_write(stream, "</"); - trace_dump_write(stream, name); - trace_dump_write(stream, ">"); -} - - -void trace_dump_trace_begin(struct trace_stream *stream, - unsigned version) -{ - trace_dump_write(stream, "<?xml version='1.0' encoding='UTF-8'?>\n"); - trace_dump_write(stream, "<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); - trace_dump_writef(stream, "<trace version='%u'>\n", version); -} - - -void trace_dump_trace_end(struct trace_stream *stream) -{ - trace_dump_write(stream, "</trace>\n"); +trace_dump_tag_end(const char *name) +{ + trace_dump_writes("</"); + trace_dump_writes(name); + trace_dump_writes(">"); +} + +boolean trace_dump_trace_begin() +{ + if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) + return FALSE; + + stream = trace_stream_create("gallium", "trace"); + if(!stream) + return FALSE; + + trace_dump_writes("<?xml version='1.0' encoding='UTF-8'?>\n"); + trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); + trace_dump_writes("<trace version='0.1'>\n"); + +#if defined(PIPE_OS_LINUX) + /* Linux applications rarely cleanup GL / Gallium resources so catch + * application exit here */ + atexit(trace_dump_trace_end); +#endif + + return TRUE; +} + +void trace_dump_trace_end(void) +{ + if(stream) { + trace_dump_writes("</trace>\n"); + trace_stream_close(stream); + stream = NULL; + } } -void trace_dump_call_begin(struct trace_stream *stream, - const char *klass, const char *method) +void trace_dump_call_begin(const char *klass, const char *method) { - trace_dump_indent(stream, 1); - trace_dump_tag_begin2(stream, "call", "class", klass, "method", method); - trace_dump_newline(stream); + trace_dump_indent(1); + trace_dump_tag_begin2("call", "class", klass, "method", method); + trace_dump_newline(); } -void trace_dump_call_end(struct trace_stream *stream) +void trace_dump_call_end(void) { - trace_dump_indent(stream, 1); - trace_dump_tag_end(stream, "call"); - trace_dump_newline(stream); + trace_dump_indent(1); + trace_dump_tag_end("call"); + trace_dump_newline(); } -void trace_dump_arg_begin(struct trace_stream *stream, - const char *name) +void trace_dump_arg_begin(const char *name) { - trace_dump_indent(stream, 2); - trace_dump_tag_begin1(stream, "arg", "name", name); + trace_dump_indent(2); + trace_dump_tag_begin1("arg", "name", name); } -void trace_dump_arg_end(struct trace_stream *stream) +void trace_dump_arg_end(void) { - trace_dump_tag_end(stream, "arg"); - trace_dump_newline(stream); + trace_dump_tag_end("arg"); + trace_dump_newline(); } -void trace_dump_ret_begin(struct trace_stream *stream) +void trace_dump_ret_begin(void) { - trace_dump_indent(stream, 2); - trace_dump_tag_begin(stream, "ret"); + trace_dump_indent(2); + trace_dump_tag_begin("ret"); } -void trace_dump_ret_end(struct trace_stream *stream) +void trace_dump_ret_end(void) { - trace_dump_tag_end(stream, "ret"); - trace_dump_newline(stream); + trace_dump_tag_end("ret"); + trace_dump_newline(); } -void trace_dump_bool(struct trace_stream *stream, - int value) +void trace_dump_bool(int value) { - trace_dump_writef(stream, "<bool>%c</bool>", value ? '1' : '0'); + trace_dump_writef("<bool>%c</bool>", value ? '1' : '0'); } -void trace_dump_int(struct trace_stream *stream, - long int value) +void trace_dump_int(long int value) { - trace_dump_writef(stream, "<int>%li</int>", value); + trace_dump_writef("<int>%li</int>", value); } -void trace_dump_uint(struct trace_stream *stream, - long unsigned value) +void trace_dump_uint(long unsigned value) { - trace_dump_writef(stream, "<uint>%lu</uint>", value); + trace_dump_writef("<uint>%lu</uint>", value); } -void trace_dump_float(struct trace_stream *stream, - double value) +void trace_dump_float(double value) { - trace_dump_writef(stream, "<float>%g</float>", value); + trace_dump_writef("<float>%g</float>", value); } -void trace_dump_bytes(struct trace_stream *stream, - const void *data, +void trace_dump_bytes(const void *data, long unsigned size) { static const char hex_table[16] = "0123456789ABCDEF"; const uint8_t *p = data; long unsigned i; - trace_dump_write(stream, "<bytes>"); + trace_dump_writes("<bytes>"); for(i = 0; i < size; ++i) { uint8_t byte = *p++; char hex[2]; hex[0] = hex_table[byte >> 4]; hex[1] = hex_table[byte & 0xf]; - trace_stream_write(stream, hex, 2); + trace_dump_write(hex, 2); } - trace_dump_write(stream, "</bytes>"); + trace_dump_writes("</bytes>"); } -void trace_dump_string(struct trace_stream *stream, - const char *str) +void trace_dump_string(const char *str) { - trace_dump_write(stream, "<string>"); - trace_dump_escape(stream, str); - trace_dump_write(stream, "</string>"); + trace_dump_writes("<string>"); + trace_dump_escape(str); + trace_dump_writes("</string>"); } -void trace_dump_enum(struct trace_stream *stream, - const char *value) +void trace_dump_enum(const char *value) { - trace_dump_write(stream, "<enum>"); - trace_dump_escape(stream, value); - trace_dump_write(stream, "</enum>"); + trace_dump_writes("<enum>"); + trace_dump_escape(value); + trace_dump_writes("</enum>"); } -void trace_dump_array_begin(struct trace_stream *stream) +void trace_dump_array_begin(void) { - trace_dump_write(stream, "<array>"); + trace_dump_writes("<array>"); } -void trace_dump_array_end(struct trace_stream *stream) +void trace_dump_array_end(void) { - trace_dump_write(stream, "</array>"); + trace_dump_writes("</array>"); } -void trace_dump_elem_begin(struct trace_stream *stream) +void trace_dump_elem_begin(void) { - trace_dump_write(stream, "<elem>"); + trace_dump_writes("<elem>"); } -void trace_dump_elem_end(struct trace_stream *stream) +void trace_dump_elem_end(void) { - trace_dump_write(stream, "</elem>"); + trace_dump_writes("</elem>"); } -void trace_dump_struct_begin(struct trace_stream *stream, - const char *name) +void trace_dump_struct_begin(const char *name) { - trace_dump_writef(stream, "<struct name='%s'>", name); + trace_dump_writef("<struct name='%s'>", name); } -void trace_dump_struct_end(struct trace_stream *stream) +void trace_dump_struct_end(void) { - trace_dump_write(stream, "</struct>"); + trace_dump_writes("</struct>"); } -void trace_dump_member_begin(struct trace_stream *stream, - const char *name) +void trace_dump_member_begin(const char *name) { - trace_dump_writef(stream, "<member name='%s'>", name); + trace_dump_writef("<member name='%s'>", name); } -void trace_dump_member_end(struct trace_stream *stream) +void trace_dump_member_end(void) { - trace_dump_write(stream, "</member>"); + trace_dump_writes("</member>"); } -void trace_dump_null(struct trace_stream *stream) +void trace_dump_null(void) { - trace_dump_write(stream, "<null/>"); + trace_dump_writes("<null/>"); } -void trace_dump_ptr(struct trace_stream *stream, - const void *value) +void trace_dump_ptr(const void *value) { if(value) - trace_dump_writef(stream, "<ptr>%p</ptr>", value); + trace_dump_writef("<ptr>%p</ptr>", value); else - trace_dump_null(stream); + trace_dump_null(); } diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index b2367c3288a..0beb1023b15 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -4,7 +4,7 @@ * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation streams (the + * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to @@ -34,100 +34,98 @@ #define TR_DUMP_H +#include "pipe/p_compiler.h" #include "pipe/p_util.h" -struct trace_stream; - - -void trace_dump_trace_begin(struct trace_stream *stream, unsigned version); -void trace_dump_trace_end(struct trace_stream *stream); -void trace_dump_call_begin(struct trace_stream *stream, const char *klass, const char *method); -void trace_dump_call_end(struct trace_stream *stream); -void trace_dump_arg_begin(struct trace_stream *stream, const char *name); -void trace_dump_arg_end(struct trace_stream *stream); -void trace_dump_ret_begin(struct trace_stream *stream); -void trace_dump_ret_end(struct trace_stream *stream); -void trace_dump_bool(struct trace_stream *stream, int value); -void trace_dump_int(struct trace_stream *stream, long int value); -void trace_dump_uint(struct trace_stream *stream, long unsigned value); -void trace_dump_float(struct trace_stream *stream, double value); -void trace_dump_bytes(struct trace_stream *stream, const void *data, long unsigned size); -void trace_dump_string(struct trace_stream *stream, const char *str); -void trace_dump_enum(struct trace_stream *stream, const char *value); -void trace_dump_array_begin(struct trace_stream *stream); -void trace_dump_array_end(struct trace_stream *stream); -void trace_dump_elem_begin(struct trace_stream *stream); -void trace_dump_elem_end(struct trace_stream *stream); -void trace_dump_struct_begin(struct trace_stream *stream, const char *name); -void trace_dump_struct_end(struct trace_stream *stream); -void trace_dump_member_begin(struct trace_stream *stream, const char *name); -void trace_dump_member_end(struct trace_stream *stream); -void trace_dump_null(struct trace_stream *stream); -void trace_dump_ptr(struct trace_stream *stream, const void *value); +boolean trace_dump_trace_begin(void); +void trace_dump_trace_end(void); +void trace_dump_call_begin(const char *klass, const char *method); +void trace_dump_call_end(void); +void trace_dump_arg_begin(const char *name); +void trace_dump_arg_end(void); +void trace_dump_ret_begin(void); +void trace_dump_ret_end(void); +void trace_dump_bool(int value); +void trace_dump_int(long int value); +void trace_dump_uint(long unsigned value); +void trace_dump_float(double value); +void trace_dump_bytes(const void *data, long unsigned size); +void trace_dump_string(const char *str); +void trace_dump_enum(const char *value); +void trace_dump_array_begin(void); +void trace_dump_array_end(void); +void trace_dump_elem_begin(void); +void trace_dump_elem_end(void); +void trace_dump_struct_begin(const char *name); +void trace_dump_struct_end(void); +void trace_dump_member_begin(const char *name); +void trace_dump_member_end(void); +void trace_dump_null(void); +void trace_dump_ptr(const void *value); /* * Code saving macros. */ -#define trace_dump_arg(_stream, _type, _arg) \ +#define trace_dump_arg(_type, _arg) \ do { \ - trace_dump_arg_begin(_stream, #_arg); \ - trace_dump_##_type(_stream, _arg); \ - trace_dump_arg_end(_stream); \ + trace_dump_arg_begin(#_arg); \ + trace_dump_##_type(_arg); \ + trace_dump_arg_end(); \ } while(0) -#define trace_dump_ret(_stream, _type, _arg) \ +#define trace_dump_ret(_type, _arg) \ do { \ - trace_dump_ret_begin(_stream); \ - trace_dump_##_type(_stream, _arg); \ - trace_dump_ret_end(_stream); \ + trace_dump_ret_begin(); \ + trace_dump_##_type(_arg); \ + trace_dump_ret_end(); \ } while(0) -#define trace_dump_array(_stream, _type, _obj, _size) \ +#define trace_dump_array(_type, _obj, _size) \ do { \ unsigned long idx; \ - trace_dump_array_begin(_stream); \ + trace_dump_array_begin(); \ for(idx = 0; idx < (_size); ++idx) { \ - trace_dump_elem_begin(_stream); \ - trace_dump_##_type(_stream, (_obj)[idx]); \ - trace_dump_elem_end(_stream); \ + trace_dump_elem_begin(); \ + trace_dump_##_type((_obj)[idx]); \ + trace_dump_elem_end(); \ } \ - trace_dump_array_end(_stream); \ + trace_dump_array_end(); \ } while(0) -#define trace_dump_struct_array(_stream, _type, _obj, _size) \ +#define trace_dump_struct_array(_type, _obj, _size) \ do { \ unsigned long idx; \ - trace_dump_array_begin(_stream); \ + trace_dump_array_begin(); \ for(idx = 0; idx < (_size); ++idx) { \ - trace_dump_elem_begin(_stream); \ - trace_dump_##_type(_stream, &(_obj)[idx]); \ - trace_dump_elem_end(_stream); \ + trace_dump_elem_begin(); \ + trace_dump_##_type(&(_obj)[idx]); \ + trace_dump_elem_end(); \ } \ - trace_dump_array_end(_stream); \ + trace_dump_array_end(); \ } while(0) -#define trace_dump_member(_stream, _type, _obj, _member) \ +#define trace_dump_member(_type, _obj, _member) \ do { \ - trace_dump_member_begin(_stream, #_member); \ - trace_dump_##_type(_stream, (_obj)->_member); \ - trace_dump_member_end(_stream); \ + trace_dump_member_begin(#_member); \ + trace_dump_##_type((_obj)->_member); \ + trace_dump_member_end(); \ } while(0) -#define trace_dump_arg_array(_stream, _type, _arg, _size) \ +#define trace_dump_arg_array(_type, _arg, _size) \ do { \ - trace_dump_arg_begin(_stream, #_arg); \ - trace_dump_array(_stream, _type, _arg, _size); \ - trace_dump_arg_end(_stream); \ + trace_dump_arg_begin(#_arg); \ + trace_dump_array(_type, _arg, _size); \ + trace_dump_arg_end(); \ } while(0) -#define trace_dump_member_array(_stream, _type, _obj, _member) \ +#define trace_dump_member_array(_type, _obj, _member) \ do { \ - trace_dump_member_begin(_stream, #_member); \ - trace_dump_array(_stream, _type, (_obj)->_member, sizeof((_obj)->_member)/sizeof((_obj)->_member[0])); \ - trace_dump_member_end(_stream); \ + trace_dump_member_begin(#_member); \ + trace_dump_array(_type, (_obj)->_member, sizeof((_obj)->_member)/sizeof((_obj)->_member[0])); \ + trace_dump_member_end(); \ } while(0) diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 0e253123aee..68165a4553b 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -27,7 +27,6 @@ #include "pipe/p_util.h" -#include "tr_stream.h" #include "tr_dump.h" #include "tr_state.h" #include "tr_winsys.h" @@ -38,19 +37,18 @@ static const char * trace_screen_get_name(struct pipe_screen *_screen) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; const char *result; - trace_dump_call_begin(stream, "pipe_screen", "get_name"); + trace_dump_call_begin("pipe_screen", "get_name"); - trace_dump_arg(stream, ptr, screen); + trace_dump_arg(ptr, screen); result = screen->get_name(screen); - trace_dump_ret(stream, string, result); + trace_dump_ret(string, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -60,19 +58,18 @@ static const char * trace_screen_get_vendor(struct pipe_screen *_screen) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; const char *result; - trace_dump_call_begin(stream, "pipe_screen", "get_vendor"); + trace_dump_call_begin("pipe_screen", "get_vendor"); - trace_dump_arg(stream, ptr, screen); + trace_dump_arg(ptr, screen); result = screen->get_vendor(screen); - trace_dump_ret(stream, string, result); + trace_dump_ret(string, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -83,20 +80,19 @@ trace_screen_get_param(struct pipe_screen *_screen, int param) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; int result; - trace_dump_call_begin(stream, "pipe_screen", "get_param"); + trace_dump_call_begin("pipe_screen", "get_param"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, int, param); + trace_dump_arg(ptr, screen); + trace_dump_arg(int, param); result = screen->get_param(screen, param); - trace_dump_ret(stream, int, result); + trace_dump_ret(int, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -107,20 +103,19 @@ trace_screen_get_paramf(struct pipe_screen *_screen, int param) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; float result; - trace_dump_call_begin(stream, "pipe_screen", "get_paramf"); + trace_dump_call_begin("pipe_screen", "get_paramf"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, int, param); + trace_dump_arg(ptr, screen); + trace_dump_arg(int, param); result = screen->get_paramf(screen, param); - trace_dump_ret(stream, float, result); + trace_dump_ret(float, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -134,23 +129,22 @@ trace_screen_is_format_supported(struct pipe_screen *_screen, unsigned geom_flags) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; boolean result; - trace_dump_call_begin(stream, "pipe_screen", "is_format_supported"); + trace_dump_call_begin("pipe_screen", "is_format_supported"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, format, format); - trace_dump_arg(stream, int, target); - trace_dump_arg(stream, uint, tex_usage); - trace_dump_arg(stream, uint, geom_flags); + trace_dump_arg(ptr, screen); + trace_dump_arg(format, format); + trace_dump_arg(int, target); + trace_dump_arg(uint, tex_usage); + trace_dump_arg(uint, geom_flags); result = screen->is_format_supported(screen, format, target, tex_usage, geom_flags); - trace_dump_ret(stream, bool, result); + trace_dump_ret(bool, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -161,20 +155,19 @@ trace_screen_texture_create(struct pipe_screen *_screen, const struct pipe_texture *templat) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; struct pipe_texture *result; - trace_dump_call_begin(stream, "pipe_screen", "texture_create"); + trace_dump_call_begin("pipe_screen", "texture_create"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, template, templat); + trace_dump_arg(ptr, screen); + trace_dump_arg(template, templat); result = screen->texture_create(screen, templat); - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -187,23 +180,22 @@ trace_screen_texture_blanket(struct pipe_screen *_screen, struct pipe_buffer *buffer) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; unsigned pitch = *ppitch; struct pipe_texture *result; - trace_dump_call_begin(stream, "pipe_screen", "texture_blanket"); + trace_dump_call_begin("pipe_screen", "texture_blanket"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, template, templat); - trace_dump_arg(stream, uint, pitch); - trace_dump_arg(stream, ptr, buffer); + trace_dump_arg(ptr, screen); + trace_dump_arg(template, templat); + trace_dump_arg(uint, pitch); + trace_dump_arg(ptr, buffer); result = screen->texture_blanket(screen, templat, ppitch, buffer); - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -214,18 +206,17 @@ trace_screen_texture_release(struct pipe_screen *_screen, struct pipe_texture **ptexture) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; struct pipe_texture *texture = *ptexture; - trace_dump_call_begin(stream, "pipe_screen", "texture_release"); + trace_dump_call_begin("pipe_screen", "texture_release"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, ptr, texture); + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, texture); screen->texture_release(screen, ptexture); - trace_dump_call_end(stream); + trace_dump_call_end(); } static struct pipe_surface * @@ -236,24 +227,23 @@ trace_screen_get_tex_surface(struct pipe_screen *_screen, unsigned usage) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; struct pipe_surface *result; - trace_dump_call_begin(stream, "pipe_screen", "get_tex_surface"); + trace_dump_call_begin("pipe_screen", "get_tex_surface"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, ptr, texture); - trace_dump_arg(stream, uint, face); - trace_dump_arg(stream, uint, level); - trace_dump_arg(stream, uint, zslice); - trace_dump_arg(stream, uint, usage); + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, texture); + trace_dump_arg(uint, face); + trace_dump_arg(uint, level); + trace_dump_arg(uint, zslice); + trace_dump_arg(uint, usage); result = screen->get_tex_surface(screen, texture, face, level, zslice, usage); - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -264,18 +254,17 @@ trace_screen_tex_surface_release(struct pipe_screen *_screen, struct pipe_surface **psurface) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; struct pipe_surface *surface = *psurface; - trace_dump_call_begin(stream, "pipe_screen", "tex_surface_release"); + trace_dump_call_begin("pipe_screen", "tex_surface_release"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, ptr, surface); + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, surface); screen->tex_surface_release(screen, psurface); - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -285,21 +274,20 @@ trace_screen_surface_map(struct pipe_screen *_screen, unsigned flags) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; struct pipe_surface *result; - trace_dump_call_begin(stream, "pipe_screen", "surface_map"); + trace_dump_call_begin("pipe_screen", "surface_map"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, ptr, surface); - trace_dump_arg(stream, uint, flags); + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, surface); + trace_dump_arg(uint, flags); result = screen->surface_map(screen, surface, flags); - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -310,17 +298,16 @@ trace_screen_surface_unmap(struct pipe_screen *_screen, struct pipe_surface *surface) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; - trace_dump_call_begin(stream, "pipe_screen", "surface_unmap"); + trace_dump_call_begin("pipe_screen", "surface_unmap"); - trace_dump_arg(stream, ptr, screen); - trace_dump_arg(stream, ptr, surface); + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, surface); screen->surface_unmap(screen, surface); - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -328,20 +315,17 @@ static void trace_screen_destroy(struct pipe_screen *_screen) { struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_stream *stream = tr_scr->stream; struct pipe_screen *screen = tr_scr->screen; - trace_dump_call_begin(stream, "pipe_screen", "destroy"); + trace_dump_call_begin("pipe_screen", "destroy"); - trace_dump_arg(stream, ptr, screen); + trace_dump_arg(ptr, screen); screen->destroy(screen); - trace_dump_call_end(stream); + trace_dump_call_end(); - trace_dump_trace_end(stream); - - trace_stream_close(stream); + trace_dump_trace_end(); FREE(tr_scr); } @@ -350,26 +334,22 @@ trace_screen_destroy(struct pipe_screen *_screen) struct pipe_screen * trace_screen_create(struct pipe_screen *screen) { - struct trace_stream *stream; struct trace_screen *tr_scr; struct pipe_winsys *winsys; - if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) - return screen; - - tr_scr = CALLOC_STRUCT(trace_screen); - if(!tr_scr) - return NULL; + if(!screen) + goto error1; - tr_scr->stream = stream = trace_stream_create("gallium", "trace"); - if(!tr_scr->stream) - return NULL; + if(!trace_dump_trace_begin()) + goto error1; - trace_dump_trace_begin(stream, 0); + tr_scr = CALLOC_STRUCT(trace_screen); + if(!tr_scr) + goto error2; - winsys = trace_winsys_create(stream, screen->winsys); + winsys = trace_winsys_create(screen->winsys); if(!winsys) - return NULL; + goto error3; tr_scr->base.winsys = winsys; tr_scr->base.destroy = trace_screen_destroy; @@ -387,14 +367,20 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.surface_unmap = trace_screen_surface_unmap; tr_scr->screen = screen; - tr_scr->stream = stream = trace_winsys(winsys)->stream; - trace_dump_call_begin(stream, "", "pipe_screen_create"); - trace_dump_arg_begin(stream, "winsys"); - trace_dump_ptr(stream, screen->winsys); - trace_dump_arg_end(stream); - trace_dump_ret(stream, ptr, screen); - trace_dump_call_end(stream); + trace_dump_call_begin("", "pipe_screen_create"); + trace_dump_arg_begin("winsys"); + trace_dump_ptr(screen->winsys); + trace_dump_arg_end(); + trace_dump_ret(ptr, screen); + trace_dump_call_end(); return &tr_scr->base; + +error3: + FREE(tr_scr); +error2: + trace_dump_trace_end(); +error1: + return screen; } diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index 40b844778fe..446c4af6a6b 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -34,16 +34,11 @@ #include "pipe/p_screen.h" -struct trace_stream; - - struct trace_screen { struct pipe_screen base; struct pipe_screen *screen; - - struct trace_stream *stream; }; diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index e074ae7abc1..7b35e89e754 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -33,461 +33,442 @@ #include "tr_state.h" -void trace_dump_format(struct trace_stream *stream, - enum pipe_format format) +void trace_dump_format(enum pipe_format format) { - trace_dump_enum(stream, pf_name(format) ); + trace_dump_enum(pf_name(format) ); } -void trace_dump_block(struct trace_stream *stream, - const struct pipe_format_block *block) +void trace_dump_block(const struct pipe_format_block *block) { - trace_dump_struct_begin(stream, "pipe_format_block"); - trace_dump_member(stream, uint, block, size); - trace_dump_member(stream, uint, block, width); - trace_dump_member(stream, uint, block, height); - trace_dump_struct_end(stream); + trace_dump_struct_begin("pipe_format_block"); + trace_dump_member(uint, block, size); + trace_dump_member(uint, block, width); + trace_dump_member(uint, block, height); + trace_dump_struct_end(); } -void trace_dump_template(struct trace_stream *stream, - const struct pipe_texture *templat) +void trace_dump_template(const struct pipe_texture *templat) { assert(templat); if(!templat) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_texture"); + trace_dump_struct_begin("pipe_texture"); - trace_dump_member(stream, int, templat, target); - trace_dump_member(stream, format, templat, format); + trace_dump_member(int, templat, target); + trace_dump_member(format, templat, format); - trace_dump_member_begin(stream, "width"); - trace_dump_array(stream, uint, templat->width, 1); - trace_dump_member_end(stream); + trace_dump_member_begin("width"); + trace_dump_array(uint, templat->width, 1); + trace_dump_member_end(); - trace_dump_member_begin(stream, "height"); - trace_dump_array(stream, uint, templat->height, 1); - trace_dump_member_end(stream); + trace_dump_member_begin("height"); + trace_dump_array(uint, templat->height, 1); + trace_dump_member_end(); - trace_dump_member_begin(stream, "depth"); - trace_dump_array(stream, uint, templat->depth, 1); - trace_dump_member_end(stream); + trace_dump_member_begin("depth"); + trace_dump_array(uint, templat->depth, 1); + trace_dump_member_end(); - trace_dump_member_begin(stream, "block"); - trace_dump_block(stream, &templat->block); - trace_dump_member_end(stream); + trace_dump_member_begin("block"); + trace_dump_block(&templat->block); + trace_dump_member_end(); - trace_dump_member(stream, uint, templat, last_level); - trace_dump_member(stream, uint, templat, tex_usage); + trace_dump_member(uint, templat, last_level); + trace_dump_member(uint, templat, tex_usage); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_rasterizer_state(struct trace_stream *stream, - const struct pipe_rasterizer_state *state) +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_rasterizer_state"); - - trace_dump_member(stream, bool, state, flatshade); - trace_dump_member(stream, bool, state, light_twoside); - trace_dump_member(stream, uint, state, front_winding); - trace_dump_member(stream, uint, state, cull_mode); - trace_dump_member(stream, uint, state, fill_cw); - trace_dump_member(stream, uint, state, fill_ccw); - trace_dump_member(stream, bool, state, offset_cw); - trace_dump_member(stream, bool, state, offset_ccw); - trace_dump_member(stream, bool, state, scissor); - trace_dump_member(stream, bool, state, poly_smooth); - trace_dump_member(stream, bool, state, poly_stipple_enable); - trace_dump_member(stream, bool, state, point_smooth); - trace_dump_member(stream, bool, state, point_sprite); - trace_dump_member(stream, bool, state, point_size_per_vertex); - trace_dump_member(stream, bool, state, multisample); - trace_dump_member(stream, bool, state, line_smooth); - trace_dump_member(stream, bool, state, line_stipple_enable); - trace_dump_member(stream, uint, state, line_stipple_factor); - trace_dump_member(stream, uint, state, line_stipple_pattern); - trace_dump_member(stream, bool, state, line_last_pixel); - trace_dump_member(stream, bool, state, bypass_clipping); - trace_dump_member(stream, bool, state, bypass_vs); - trace_dump_member(stream, bool, state, origin_lower_left); - trace_dump_member(stream, bool, state, flatshade_first); - trace_dump_member(stream, bool, state, gl_rasterization_rules); - - trace_dump_member(stream, float, state, line_width); - trace_dump_member(stream, float, state, point_size); - trace_dump_member(stream, float, state, point_size_min); - trace_dump_member(stream, float, state, point_size_max); - trace_dump_member(stream, float, state, offset_units); - trace_dump_member(stream, float, state, offset_scale); + trace_dump_struct_begin("pipe_rasterizer_state"); + + trace_dump_member(bool, state, flatshade); + trace_dump_member(bool, state, light_twoside); + trace_dump_member(uint, state, front_winding); + trace_dump_member(uint, state, cull_mode); + trace_dump_member(uint, state, fill_cw); + trace_dump_member(uint, state, fill_ccw); + trace_dump_member(bool, state, offset_cw); + trace_dump_member(bool, state, offset_ccw); + trace_dump_member(bool, state, scissor); + trace_dump_member(bool, state, poly_smooth); + trace_dump_member(bool, state, poly_stipple_enable); + trace_dump_member(bool, state, point_smooth); + trace_dump_member(bool, state, point_sprite); + trace_dump_member(bool, state, point_size_per_vertex); + trace_dump_member(bool, state, multisample); + trace_dump_member(bool, state, line_smooth); + trace_dump_member(bool, state, line_stipple_enable); + trace_dump_member(uint, state, line_stipple_factor); + trace_dump_member(uint, state, line_stipple_pattern); + trace_dump_member(bool, state, line_last_pixel); + trace_dump_member(bool, state, bypass_clipping); + trace_dump_member(bool, state, bypass_vs); + trace_dump_member(bool, state, origin_lower_left); + trace_dump_member(bool, state, flatshade_first); + trace_dump_member(bool, state, gl_rasterization_rules); + + trace_dump_member(float, state, line_width); + trace_dump_member(float, state, point_size); + trace_dump_member(float, state, point_size_min); + trace_dump_member(float, state, point_size_max); + trace_dump_member(float, state, offset_units); + trace_dump_member(float, state, offset_scale); - trace_dump_member_array(stream, uint, state, sprite_coord_mode); + trace_dump_member_array(uint, state, sprite_coord_mode); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_poly_stipple(struct trace_stream *stream, - const struct pipe_poly_stipple *state) +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_poly_stipple"); + trace_dump_struct_begin("pipe_poly_stipple"); - trace_dump_member_begin(stream, "stipple"); - trace_dump_array(stream, - uint, + trace_dump_member_begin("stipple"); + trace_dump_array(uint, state->stipple, Elements(state->stipple)); - trace_dump_member_end(stream); + trace_dump_member_end(); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_viewport_state(struct trace_stream *stream, - const struct pipe_viewport_state *state) +void trace_dump_viewport_state(const struct pipe_viewport_state *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_viewport_state"); + trace_dump_struct_begin("pipe_viewport_state"); - trace_dump_member_array(stream, float, state, scale); - trace_dump_member_array(stream, float, state, translate); + trace_dump_member_array(float, state, scale); + trace_dump_member_array(float, state, translate); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_scissor_state(struct trace_stream *stream, - const struct pipe_scissor_state *state) +void trace_dump_scissor_state(const struct pipe_scissor_state *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_scissor_state"); + trace_dump_struct_begin("pipe_scissor_state"); - trace_dump_member(stream, uint, state, minx); - trace_dump_member(stream, uint, state, miny); - trace_dump_member(stream, uint, state, maxx); - trace_dump_member(stream, uint, state, maxy); + trace_dump_member(uint, state, minx); + trace_dump_member(uint, state, miny); + trace_dump_member(uint, state, maxx); + trace_dump_member(uint, state, maxy); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_clip_state(struct trace_stream *stream, - const struct pipe_clip_state *state) +void trace_dump_clip_state(const struct pipe_clip_state *state) { unsigned i; assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_scissor_state"); + trace_dump_struct_begin("pipe_scissor_state"); - trace_dump_member_begin(stream, "ucp"); - trace_dump_array_begin(stream); + trace_dump_member_begin("ucp"); + trace_dump_array_begin(); for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) - trace_dump_array(stream, float, state->ucp[i], 4); - trace_dump_array_end(stream); - trace_dump_member_end(stream); + trace_dump_array(float, state->ucp[i], 4); + trace_dump_array_end(); + trace_dump_member_end(); - trace_dump_member(stream, uint, state, nr); + trace_dump_member(uint, state, nr); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_constant_buffer(struct trace_stream *stream, - const struct pipe_constant_buffer *state) +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_constant_buffer"); + trace_dump_struct_begin("pipe_constant_buffer"); - trace_dump_member(stream, ptr, state, buffer); - trace_dump_member(stream, uint, state, size); + trace_dump_member(ptr, state, buffer); + trace_dump_member(uint, state, size); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_shader_state(struct trace_stream *stream, - const struct pipe_shader_state *state) +void trace_dump_shader_state(const struct pipe_shader_state *state) { static char str[8192]; assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } tgsi_dump_str(state->tokens, 0, str, sizeof(str)); - trace_dump_struct_begin(stream, "pipe_shader_state"); + trace_dump_struct_begin("pipe_shader_state"); - trace_dump_member_begin(stream, "tokens"); - trace_dump_string(stream, str); - trace_dump_member_end(stream); + trace_dump_member_begin("tokens"); + trace_dump_string(str); + trace_dump_member_end(); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_depth_stencil_alpha_state(struct trace_stream *stream, - const struct pipe_depth_stencil_alpha_state *state) +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state) { unsigned i; assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_depth_stencil_alpha_state"); + trace_dump_struct_begin("pipe_depth_stencil_alpha_state"); - trace_dump_member_begin(stream, "depth"); - trace_dump_struct_begin(stream, "pipe_depth_state"); - trace_dump_member(stream, bool, &state->depth, enabled); - trace_dump_member(stream, bool, &state->depth, writemask); - trace_dump_member(stream, uint, &state->depth, func); - trace_dump_member(stream, bool, &state->depth, occlusion_count); - trace_dump_struct_end(stream); - trace_dump_member_end(stream); + trace_dump_member_begin("depth"); + trace_dump_struct_begin("pipe_depth_state"); + trace_dump_member(bool, &state->depth, enabled); + trace_dump_member(bool, &state->depth, writemask); + trace_dump_member(uint, &state->depth, func); + trace_dump_member(bool, &state->depth, occlusion_count); + trace_dump_struct_end(); + trace_dump_member_end(); - trace_dump_member_begin(stream, "stencil"); - trace_dump_array_begin(stream); + trace_dump_member_begin("stencil"); + trace_dump_array_begin(); for(i = 0; i < Elements(state->stencil); ++i) { - trace_dump_elem_begin(stream); - trace_dump_struct_begin(stream, "pipe_stencil_state"); - trace_dump_member(stream, bool, &state->stencil[i], enabled); - trace_dump_member(stream, uint, &state->stencil[i], func); - trace_dump_member(stream, uint, &state->stencil[i], fail_op); - trace_dump_member(stream, uint, &state->stencil[i], zpass_op); - trace_dump_member(stream, uint, &state->stencil[i], zfail_op); - trace_dump_member(stream, uint, &state->stencil[i], ref_value); - trace_dump_member(stream, uint, &state->stencil[i], value_mask); - trace_dump_member(stream, uint, &state->stencil[i], write_mask); - trace_dump_struct_end(stream); - trace_dump_elem_end(stream); + trace_dump_elem_begin(); + trace_dump_struct_begin("pipe_stencil_state"); + trace_dump_member(bool, &state->stencil[i], enabled); + trace_dump_member(uint, &state->stencil[i], func); + trace_dump_member(uint, &state->stencil[i], fail_op); + trace_dump_member(uint, &state->stencil[i], zpass_op); + trace_dump_member(uint, &state->stencil[i], zfail_op); + trace_dump_member(uint, &state->stencil[i], ref_value); + trace_dump_member(uint, &state->stencil[i], value_mask); + trace_dump_member(uint, &state->stencil[i], write_mask); + trace_dump_struct_end(); + trace_dump_elem_end(); } - trace_dump_array_end(stream); - trace_dump_member_end(stream); - - trace_dump_member_begin(stream, "alpha"); - trace_dump_struct_begin(stream, "pipe_alpha_state"); - trace_dump_member(stream, bool, &state->alpha, enabled); - trace_dump_member(stream, uint, &state->alpha, func); - trace_dump_member(stream, float, &state->alpha, ref); - trace_dump_struct_end(stream); - trace_dump_member_end(stream); - - trace_dump_struct_end(stream); + trace_dump_array_end(); + trace_dump_member_end(); + + trace_dump_member_begin("alpha"); + trace_dump_struct_begin("pipe_alpha_state"); + trace_dump_member(bool, &state->alpha, enabled); + trace_dump_member(uint, &state->alpha, func); + trace_dump_member(float, &state->alpha, ref); + trace_dump_struct_end(); + trace_dump_member_end(); + + trace_dump_struct_end(); } -void trace_dump_blend_state(struct trace_stream *stream, - const struct pipe_blend_state *state) +void trace_dump_blend_state(const struct pipe_blend_state *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_blend_state"); + trace_dump_struct_begin("pipe_blend_state"); - trace_dump_member(stream, bool, state, blend_enable); + trace_dump_member(bool, state, blend_enable); - trace_dump_member(stream, uint, state, rgb_func); - trace_dump_member(stream, uint, state, rgb_src_factor); - trace_dump_member(stream, uint, state, rgb_dst_factor); + trace_dump_member(uint, state, rgb_func); + trace_dump_member(uint, state, rgb_src_factor); + trace_dump_member(uint, state, rgb_dst_factor); - trace_dump_member(stream, uint, state, alpha_func); - trace_dump_member(stream, uint, state, alpha_src_factor); - trace_dump_member(stream, uint, state, alpha_dst_factor); + trace_dump_member(uint, state, alpha_func); + trace_dump_member(uint, state, alpha_src_factor); + trace_dump_member(uint, state, alpha_dst_factor); - trace_dump_member(stream, bool, state, logicop_enable); - trace_dump_member(stream, uint, state, logicop_func); + trace_dump_member(bool, state, logicop_enable); + trace_dump_member(uint, state, logicop_func); - trace_dump_member(stream, uint, state, colormask); - trace_dump_member(stream, bool, state, dither); + trace_dump_member(uint, state, colormask); + trace_dump_member(bool, state, dither); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_blend_color(struct trace_stream *stream, - const struct pipe_blend_color *state) +void trace_dump_blend_color(const struct pipe_blend_color *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_blend_color"); + trace_dump_struct_begin("pipe_blend_color"); - trace_dump_member_array(stream, float, state, color); + trace_dump_member_array(float, state, color); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_framebuffer_state(struct trace_stream *stream, - const struct pipe_framebuffer_state *state) +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) { - trace_dump_struct_begin(stream, "pipe_framebuffer_state"); + trace_dump_struct_begin("pipe_framebuffer_state"); - trace_dump_member(stream, uint, state, width); - trace_dump_member(stream, uint, state, height); - trace_dump_member(stream, uint, state, num_cbufs); - trace_dump_member_array(stream, ptr, state, cbufs); - trace_dump_member(stream, ptr, state, zsbuf); + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + trace_dump_member(uint, state, num_cbufs); + trace_dump_member_array(ptr, state, cbufs); + trace_dump_member(ptr, state, zsbuf); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_sampler_state(struct trace_stream *stream, - const struct pipe_sampler_state *state) +void trace_dump_sampler_state(const struct pipe_sampler_state *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_sampler_state"); - - trace_dump_member(stream, uint, state, wrap_s); - trace_dump_member(stream, uint, state, wrap_t); - trace_dump_member(stream, uint, state, wrap_r); - trace_dump_member(stream, uint, state, min_img_filter); - trace_dump_member(stream, uint, state, min_mip_filter); - trace_dump_member(stream, uint, state, mag_img_filter); - trace_dump_member(stream, bool, state, compare_mode); - trace_dump_member(stream, uint, state, compare_func); - trace_dump_member(stream, bool, state, normalized_coords); - trace_dump_member(stream, uint, state, prefilter); - trace_dump_member(stream, float, state, shadow_ambient); - trace_dump_member(stream, float, state, lod_bias); - trace_dump_member(stream, float, state, min_lod); - trace_dump_member(stream, float, state, max_lod); - trace_dump_member_array(stream, float, state, border_color); - trace_dump_member(stream, float, state, max_anisotropy); - - trace_dump_struct_end(stream); + trace_dump_struct_begin("pipe_sampler_state"); + + trace_dump_member(uint, state, wrap_s); + trace_dump_member(uint, state, wrap_t); + trace_dump_member(uint, state, wrap_r); + trace_dump_member(uint, state, min_img_filter); + trace_dump_member(uint, state, min_mip_filter); + trace_dump_member(uint, state, mag_img_filter); + trace_dump_member(bool, state, compare_mode); + trace_dump_member(uint, state, compare_func); + trace_dump_member(bool, state, normalized_coords); + trace_dump_member(uint, state, prefilter); + trace_dump_member(float, state, shadow_ambient); + trace_dump_member(float, state, lod_bias); + trace_dump_member(float, state, min_lod); + trace_dump_member(float, state, max_lod); + trace_dump_member_array(float, state, border_color); + trace_dump_member(float, state, max_anisotropy); + + trace_dump_struct_end(); } -void trace_dump_surface(struct trace_stream *stream, - const struct pipe_surface *state) +void trace_dump_surface(const struct pipe_surface *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_surface"); + trace_dump_struct_begin("pipe_surface"); - trace_dump_member(stream, ptr, state, buffer); - trace_dump_member(stream, format, state, format); - trace_dump_member(stream, uint, state, status); - trace_dump_member(stream, uint, state, clear_value); - trace_dump_member(stream, uint, state, width); - trace_dump_member(stream, uint, state, height); + trace_dump_member(ptr, state, buffer); + trace_dump_member(format, state, format); + trace_dump_member(uint, state, status); + trace_dump_member(uint, state, clear_value); + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); - trace_dump_member_begin(stream, "block"); - trace_dump_block(stream, &state->block); - trace_dump_member_end(stream); + trace_dump_member_begin("block"); + trace_dump_block(&state->block); + trace_dump_member_end(); - trace_dump_member(stream, uint, state, nblocksx); - trace_dump_member(stream, uint, state, nblocksy); - trace_dump_member(stream, uint, state, stride); - trace_dump_member(stream, uint, state, layout); - trace_dump_member(stream, uint, state, offset); - trace_dump_member(stream, uint, state, refcount); - trace_dump_member(stream, uint, state, usage); - - trace_dump_member(stream, ptr, state, texture); - trace_dump_member(stream, uint, state, face); - trace_dump_member(stream, uint, state, level); - trace_dump_member(stream, uint, state, zslice); - - trace_dump_struct_end(stream); + trace_dump_member(uint, state, nblocksx); + trace_dump_member(uint, state, nblocksy); + trace_dump_member(uint, state, stride); + trace_dump_member(uint, state, layout); + trace_dump_member(uint, state, offset); + trace_dump_member(uint, state, refcount); + trace_dump_member(uint, state, usage); + + trace_dump_member(ptr, state, texture); + trace_dump_member(uint, state, face); + trace_dump_member(uint, state, level); + trace_dump_member(uint, state, zslice); + + trace_dump_struct_end(); } -void trace_dump_vertex_buffer(struct trace_stream *stream, - const struct pipe_vertex_buffer *state) +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_vertex_buffer"); + trace_dump_struct_begin("pipe_vertex_buffer"); - trace_dump_member(stream, uint, state, pitch); - trace_dump_member(stream, uint, state, max_index); - trace_dump_member(stream, uint, state, buffer_offset); - trace_dump_member(stream, ptr, state, buffer); + trace_dump_member(uint, state, pitch); + trace_dump_member(uint, state, max_index); + trace_dump_member(uint, state, buffer_offset); + trace_dump_member(ptr, state, buffer); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } -void trace_dump_vertex_element(struct trace_stream *stream, - const struct pipe_vertex_element *state) +void trace_dump_vertex_element(const struct pipe_vertex_element *state) { assert(state); if(!state) { - trace_dump_null(stream); + trace_dump_null(); return; } - trace_dump_struct_begin(stream, "pipe_vertex_element"); + trace_dump_struct_begin("pipe_vertex_element"); - trace_dump_member(stream, uint, state, src_offset); + trace_dump_member(uint, state, src_offset); - trace_dump_member(stream, uint, state, vertex_buffer_index); - trace_dump_member(stream, uint, state, nr_components); + trace_dump_member(uint, state, vertex_buffer_index); + trace_dump_member(uint, state, nr_components); - trace_dump_member(stream, format, state, src_format); + trace_dump_member(format, state, src_format); - trace_dump_struct_end(stream); + trace_dump_struct_end(); } diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index c1df63db6af..5ae533dc664 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -4,7 +4,7 @@ * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation streams (the + * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to @@ -33,63 +33,44 @@ #include "pipe/p_shader_tokens.h" -void trace_dump_format(struct trace_stream *stream, - enum pipe_format format); +void trace_dump_format(enum pipe_format format); -void trace_dump_block(struct trace_stream *stream, - const struct pipe_format_block *block); +void trace_dump_block(const struct pipe_format_block *block); -void trace_dump_template(struct trace_stream *stream, - const struct pipe_texture *templat); +void trace_dump_template(const struct pipe_texture *templat); -void trace_dump_rasterizer_state(struct trace_stream *stream, - const struct pipe_rasterizer_state *state); +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); -void trace_dump_poly_stipple(struct trace_stream *stream, - const struct pipe_poly_stipple *state); +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state); -void trace_dump_viewport_state(struct trace_stream *stream, - const struct pipe_viewport_state *state); +void trace_dump_viewport_state(const struct pipe_viewport_state *state); -void trace_dump_scissor_state(struct trace_stream *stream, - const struct pipe_scissor_state *state); +void trace_dump_scissor_state(const struct pipe_scissor_state *state); -void trace_dump_clip_state(struct trace_stream *stream, - const struct pipe_clip_state *state); +void trace_dump_clip_state(const struct pipe_clip_state *state); -void trace_dump_constant_buffer(struct trace_stream *stream, - const struct pipe_constant_buffer *state); +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); -void trace_dump_token(struct trace_stream *stream, - const struct tgsi_token *token); +void trace_dump_token(const struct tgsi_token *token); -void trace_dump_shader_state(struct trace_stream *stream, - const struct pipe_shader_state *state); +void trace_dump_shader_state(const struct pipe_shader_state *state); -void trace_dump_depth_stencil_alpha_state(struct trace_stream *stream, - const struct pipe_depth_stencil_alpha_state *state); +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state); -void trace_dump_blend_state(struct trace_stream *stream, - const struct pipe_blend_state *state); +void trace_dump_blend_state(const struct pipe_blend_state *state); -void trace_dump_blend_color(struct trace_stream *stream, - const struct pipe_blend_color *state); +void trace_dump_blend_color(const struct pipe_blend_color *state); -void trace_dump_framebuffer_state(struct trace_stream *stream, - const struct pipe_framebuffer_state *state); +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state); -void trace_dump_sampler_state(struct trace_stream *stream, - const struct pipe_sampler_state *state); +void trace_dump_sampler_state(const struct pipe_sampler_state *state); -void trace_dump_surface(struct trace_stream *stream, - const struct pipe_surface *state); +void trace_dump_surface(const struct pipe_surface *state); -void trace_dump_vertex_buffer(struct trace_stream *stream, - const struct pipe_vertex_buffer *state); +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); -void trace_dump_vertex_element(struct trace_stream *stream, - const struct pipe_vertex_element *state); +void trace_dump_vertex_element(const struct pipe_vertex_element *state); #endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/tr_stream.h b/src/gallium/drivers/trace/tr_stream.h index d50fed26917..53e854aa913 100644 --- a/src/gallium/drivers/trace/tr_stream.h +++ b/src/gallium/drivers/trace/tr_stream.h @@ -4,7 +4,7 @@ * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation streams (the + * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to @@ -28,6 +28,9 @@ /** * @file * Cross-platform sequential access stream abstraction. + * + * These are really general purpose file access functions, and might one day + * be moved into the util module. */ #ifndef TR_STREAM_H diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 524049148d0..be76c0716f0 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -29,7 +29,6 @@ #include "pipe/p_state.h" #include "util/u_hash_table.h" -#include "tr_stream.h" #include "tr_dump.h" #include "tr_state.h" #include "tr_winsys.h" @@ -51,19 +50,18 @@ static const char * trace_winsys_get_name(struct pipe_winsys *_winsys) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; const char *result; - trace_dump_call_begin(stream, "pipe_winsys", "get_name"); + trace_dump_call_begin("pipe_winsys", "get_name"); - trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(ptr, winsys); result = winsys->get_name(winsys); - trace_dump_ret(stream, string, result); + trace_dump_ret(string, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -75,18 +73,17 @@ trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, void *context_private) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; - trace_dump_call_begin(stream, "pipe_winsys", "flush_frontbuffer"); + trace_dump_call_begin("pipe_winsys", "flush_frontbuffer"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, surface); - trace_dump_arg(stream, ptr, context_private); + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); + trace_dump_arg(ptr, context_private); winsys->flush_frontbuffer(winsys, surface, context_private); - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -94,19 +91,18 @@ static struct pipe_surface * trace_winsys_surface_alloc(struct pipe_winsys *_winsys) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; struct pipe_surface *result; - trace_dump_call_begin(stream, "pipe_winsys", "surface_alloc"); + trace_dump_call_begin("pipe_winsys", "surface_alloc"); - trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(ptr, winsys); result = winsys->surface_alloc(winsys); - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -121,19 +117,18 @@ trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys, unsigned tex_usage) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; int result; - trace_dump_call_begin(stream, "pipe_winsys", "surface_alloc_storage"); + trace_dump_call_begin("pipe_winsys", "surface_alloc_storage"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, surface); - trace_dump_arg(stream, uint, width); - trace_dump_arg(stream, uint, height); - trace_dump_arg(stream, format, format); - trace_dump_arg(stream, uint, flags); - trace_dump_arg(stream, uint, tex_usage); + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); + trace_dump_arg(format, format); + trace_dump_arg(uint, flags); + trace_dump_arg(uint, tex_usage); result = winsys->surface_alloc_storage(winsys, surface, @@ -142,9 +137,9 @@ trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys, flags, tex_usage); - trace_dump_ret(stream, int, result); + trace_dump_ret(int, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -155,18 +150,17 @@ trace_winsys_surface_release(struct pipe_winsys *_winsys, struct pipe_surface **psurface) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; struct pipe_surface *surface = *psurface; - trace_dump_call_begin(stream, "pipe_winsys", "surface_release"); + trace_dump_call_begin("pipe_winsys", "surface_release"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, surface); + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); winsys->surface_release(winsys, psurface); - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -177,22 +171,21 @@ trace_winsys_buffer_create(struct pipe_winsys *_winsys, unsigned size) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; struct pipe_buffer *buffer; - trace_dump_call_begin(stream, "pipe_winsys", "buffer_create"); + trace_dump_call_begin("pipe_winsys", "buffer_create"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, uint, alignment); - trace_dump_arg(stream, uint, usage); - trace_dump_arg(stream, uint, size); + trace_dump_arg(ptr, winsys); + trace_dump_arg(uint, alignment); + trace_dump_arg(uint, usage); + trace_dump_arg(uint, size); buffer = winsys->buffer_create(winsys, alignment, usage, size); - trace_dump_ret(stream, ptr, buffer); + trace_dump_ret(ptr, buffer); - trace_dump_call_end(stream); + trace_dump_call_end(); /* Zero the buffer to avoid dumping uninitialized memory */ if(buffer->usage & PIPE_BUFFER_USAGE_CPU_WRITE) { @@ -214,21 +207,20 @@ trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, unsigned bytes) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; struct pipe_buffer *result; - trace_dump_call_begin(stream, "pipe_winsys", "user_buffer_create"); + trace_dump_call_begin("pipe_winsys", "user_buffer_create"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, ptr); - trace_dump_arg(stream, uint, bytes); + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, ptr); + trace_dump_arg(uint, bytes); result = winsys->user_buffer_create(winsys, ptr, bytes); - trace_dump_ret(stream, ptr, result); + trace_dump_ret(ptr, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -260,27 +252,26 @@ trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, struct pipe_buffer *buffer) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; const void *map; map = hash_table_get(tr_ws->buffer_maps, buffer); if(map) { - trace_dump_call_begin(stream, "pipe_winsys", "buffer_write"); + trace_dump_call_begin("pipe_winsys", "buffer_write"); - trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(ptr, winsys); - trace_dump_arg(stream, ptr, buffer); + trace_dump_arg(ptr, buffer); - trace_dump_arg_begin(stream, "data"); - trace_dump_bytes(stream, map, buffer->size); - trace_dump_arg_end(stream); + trace_dump_arg_begin("data"); + trace_dump_bytes(map, buffer->size); + trace_dump_arg_end(); - trace_dump_arg_begin(stream, "size"); - trace_dump_uint(stream, buffer->size); - trace_dump_arg_end(stream); + trace_dump_arg_begin("size"); + trace_dump_uint(buffer->size); + trace_dump_arg_end(); - trace_dump_call_end(stream); + trace_dump_call_end(); hash_table_remove(tr_ws->buffer_maps, buffer); } @@ -294,17 +285,16 @@ trace_winsys_buffer_destroy(struct pipe_winsys *_winsys, struct pipe_buffer *buffer) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; - trace_dump_call_begin(stream, "pipe_winsys", "buffer_destroy"); + trace_dump_call_begin("pipe_winsys", "buffer_destroy"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, buffer); + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, buffer); winsys->buffer_destroy(winsys, buffer); - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -314,19 +304,18 @@ trace_winsys_fence_reference(struct pipe_winsys *_winsys, struct pipe_fence_handle *src) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; struct pipe_fence_handle *dst = *pdst; - trace_dump_call_begin(stream, "pipe_winsys", "fence_reference"); + trace_dump_call_begin("pipe_winsys", "fence_reference"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, dst); - trace_dump_arg(stream, ptr, src); + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, dst); + trace_dump_arg(ptr, src); winsys->fence_reference(winsys, pdst, src); - trace_dump_call_end(stream); + trace_dump_call_end(); } @@ -336,21 +325,20 @@ trace_winsys_fence_signalled(struct pipe_winsys *_winsys, unsigned flag) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; int result; - trace_dump_call_begin(stream, "pipe_winsys", "fence_signalled"); + trace_dump_call_begin("pipe_winsys", "fence_signalled"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, fence); - trace_dump_arg(stream, uint, flag); + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, fence); + trace_dump_arg(uint, flag); result = winsys->fence_signalled(winsys, fence, flag); - trace_dump_ret(stream, int, result); + trace_dump_ret(int, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -362,21 +350,20 @@ trace_winsys_fence_finish(struct pipe_winsys *_winsys, unsigned flag) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; int result; - trace_dump_call_begin(stream, "pipe_winsys", "fence_finish"); + trace_dump_call_begin("pipe_winsys", "fence_finish"); - trace_dump_arg(stream, ptr, winsys); - trace_dump_arg(stream, ptr, fence); - trace_dump_arg(stream, uint, flag); + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, fence); + trace_dump_arg(uint, flag); result = winsys->fence_finish(winsys, fence, flag); - trace_dump_ret(stream, int, result); + trace_dump_ret(int, result); - trace_dump_call_end(stream); + trace_dump_call_end(); return result; } @@ -386,18 +373,17 @@ static void trace_winsys_destroy(struct pipe_winsys *_winsys) { struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct trace_stream *stream = tr_ws->stream; struct pipe_winsys *winsys = tr_ws->winsys; - trace_dump_call_begin(stream, "pipe_winsys", "destroy"); + trace_dump_call_begin("pipe_winsys", "destroy"); - trace_dump_arg(stream, ptr, winsys); + trace_dump_arg(ptr, winsys); /* winsys->destroy(winsys); */ - trace_dump_call_end(stream); + trace_dump_call_end(); hash_table_destroy(tr_ws->buffer_maps); @@ -406,15 +392,16 @@ trace_winsys_destroy(struct pipe_winsys *_winsys) struct pipe_winsys * -trace_winsys_create(struct trace_stream *stream, - struct pipe_winsys *winsys) +trace_winsys_create(struct pipe_winsys *winsys) { - struct trace_winsys *tr_ws; + if(!winsys) + goto error1; + tr_ws = CALLOC_STRUCT(trace_winsys); if(!tr_ws) - return NULL; + goto error1; tr_ws->base.destroy = trace_winsys_destroy; tr_ws->base.get_name = trace_winsys_get_name; @@ -432,16 +419,20 @@ trace_winsys_create(struct trace_stream *stream, tr_ws->base.fence_finish = trace_winsys_fence_finish; tr_ws->winsys = winsys; - tr_ws->stream = stream; tr_ws->buffer_maps = hash_table_create(trace_buffer_hash, trace_buffer_compare); if(!tr_ws->buffer_maps) - return NULL; + goto error2; - trace_dump_call_begin(stream, "", "pipe_winsys_create"); - trace_dump_ret(stream, ptr, winsys); - trace_dump_call_end(stream); + trace_dump_call_begin("", "pipe_winsys_create"); + trace_dump_ret(ptr, winsys); + trace_dump_call_end(); return &tr_ws->base; + +error2: + FREE(tr_ws); +error1: + return winsys; } diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h index 704d2c57c86..2218117347c 100644 --- a/src/gallium/drivers/trace/tr_winsys.h +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -35,7 +35,6 @@ struct hash_table; -struct trace_stream; struct trace_winsys @@ -44,8 +43,6 @@ struct trace_winsys struct pipe_winsys *winsys; - struct trace_stream *stream; - struct hash_table *buffer_maps; }; @@ -60,8 +57,7 @@ trace_winsys(struct pipe_winsys *winsys) struct pipe_winsys * -trace_winsys_create(struct trace_stream *stream, - struct pipe_winsys *winsys); +trace_winsys_create(struct pipe_winsys *winsys); #endif /* TR_WINSYS_H_ */ -- cgit v1.2.3 From 424dea98d47e77f61efc79134b230f2046061ebe Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 14 Aug 2008 14:34:33 +0100 Subject: trace: Trace surface contents. --- src/gallium/drivers/trace/README | 4 -- src/gallium/drivers/trace/tr_screen.c | 75 +++++++++++++++++++++++++---------- src/gallium/drivers/trace/tr_screen.h | 5 +++ 3 files changed, 60 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 5752448b934..5f0ae2c6419 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -22,9 +22,5 @@ which should create a gallium.*.trace file, which is an XML file. You can view copying trace.xsl and trace.css to the same directory, and opening with a XSLT capable browser like Firefox or Internet Explorer. -This is still work in progress, namely: -- surface writes are not traced -- no way to know the start/end of a frame - -- Jose Fonseca <jrfonseca@tungstengraphics.com> diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 68165a4553b..2eecfdc1875 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "pipe/p_util.h" +#include "util/u_hash_table.h" #include "tr_dump.h" #include "tr_state.h" @@ -33,6 +34,18 @@ #include "tr_screen.h" +static unsigned trace_surface_hash(void *surface) +{ + return (unsigned)(uintptr_t)surface; +} + + +static int trace_surface_compare(void *surface1, void *surface2) +{ + return (char *)surface2 - (char *)surface1; +} + + static const char * trace_screen_get_name(struct pipe_screen *_screen) { @@ -275,21 +288,17 @@ trace_screen_surface_map(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; - struct pipe_surface *result; - - trace_dump_call_begin("pipe_screen", "surface_map"); + void *map; - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, surface); - trace_dump_arg(uint, flags); - - result = screen->surface_map(screen, surface, flags); + map = screen->surface_map(screen, surface, flags); + if(map) { + if(flags & PIPE_BUFFER_USAGE_CPU_WRITE) { + assert(!hash_table_get(tr_scr->surface_maps, surface)); + hash_table_set(tr_scr->surface_maps, surface, map); + } + } - trace_dump_ret(ptr, result); - - trace_dump_call_end(); - - return result; + return map; } @@ -299,15 +308,36 @@ trace_screen_surface_unmap(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; - - trace_dump_call_begin("pipe_screen", "surface_unmap"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, surface); + const void *map; + + map = hash_table_get(tr_scr->surface_maps, surface); + if(map) { + size_t size = surface->nblocksy * surface->stride; + + trace_dump_call_begin("pipe_winsys", "surface_write"); + + trace_dump_arg(ptr, screen); + + trace_dump_arg(ptr, surface); + + trace_dump_arg_begin("data"); + trace_dump_bytes(map, size); + trace_dump_arg_end(); + + trace_dump_arg_begin("stride"); + trace_dump_uint(surface->stride); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + hash_table_remove(tr_scr->surface_maps, surface); + } screen->surface_unmap(screen, surface); - - trace_dump_call_end(); } @@ -368,6 +398,11 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->screen = screen; + tr_scr->surface_maps = hash_table_create(trace_surface_hash, + trace_surface_compare); + if(!tr_scr->surface_maps) + goto error3; + trace_dump_call_begin("", "pipe_screen_create"); trace_dump_arg_begin("winsys"); trace_dump_ptr(screen->winsys); diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index 446c4af6a6b..90103aa922d 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -34,11 +34,16 @@ #include "pipe/p_screen.h" +struct hash_table; + + struct trace_screen { struct pipe_screen base; struct pipe_screen *screen; + + struct hash_table *surface_maps; }; -- cgit v1.2.3 From 4fffc9d63f117cfc5d5b2400536e8757c786cc1e Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 14 Aug 2008 14:38:58 +0100 Subject: trace: Several fixes. --- src/gallium/drivers/trace/tr_context.c | 8 +++++--- src/gallium/drivers/trace/tr_winsys.c | 16 ++++++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 868b4f010d0..e81cab252c3 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -318,11 +318,11 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, trace_dump_call_begin("pipe_context", "create_sampler_state"); trace_dump_arg(ptr, pipe); - trace_dump_arg(ptr, state); + trace_dump_arg(sampler_state, state); result = pipe->create_sampler_state(pipe, state);; - trace_dump_ret(sampler_state, result); + trace_dump_ret(ptr, result); trace_dump_call_end(); @@ -888,10 +888,12 @@ trace_context_flush(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(uint, flags); - trace_dump_arg(ptr, fence); pipe->flush(pipe, flags, fence);; + if(fence) + trace_dump_ret(ptr, *fence); + trace_dump_call_end(); } diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index be76c0716f0..356579809ad 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -74,12 +74,14 @@ trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, { struct trace_winsys *tr_ws = trace_winsys(_winsys); struct pipe_winsys *winsys = tr_ws->winsys; - + trace_dump_call_begin("pipe_winsys", "flush_frontbuffer"); trace_dump_arg(ptr, winsys); trace_dump_arg(ptr, surface); + /* XXX: hide, as there is nothing we can do with this trace_dump_arg(ptr, context_private); + */ winsys->flush_frontbuffer(winsys, surface, context_private); @@ -203,8 +205,8 @@ trace_winsys_buffer_create(struct pipe_winsys *_winsys, static struct pipe_buffer * trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, - void *ptr, - unsigned bytes) + void *data, + unsigned size) { struct trace_winsys *tr_ws = trace_winsys(_winsys); struct pipe_winsys *winsys = tr_ws->winsys; @@ -213,10 +215,12 @@ trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, trace_dump_call_begin("pipe_winsys", "user_buffer_create"); trace_dump_arg(ptr, winsys); - trace_dump_arg(ptr, ptr); - trace_dump_arg(uint, bytes); + trace_dump_arg_begin("data"); + trace_dump_bytes(data, size); + trace_dump_arg_end(); + trace_dump_arg(uint, size); - result = winsys->user_buffer_create(winsys, ptr, bytes); + result = winsys->user_buffer_create(winsys, data, size); trace_dump_ret(ptr, result); -- cgit v1.2.3 From f121d0e54f39d8f6361dcf0bf4d938ccb5ae4b5e Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 15 Aug 2008 10:24:09 +0100 Subject: trace: Allow multiple screens. Flush after call. --- src/gallium/drivers/trace/tr_dump.c | 53 ++++++++++++++++++++++++----------- src/gallium/drivers/trace/tr_dump.h | 1 + src/gallium/drivers/trace/tr_stream.c | 12 ++++++++ src/gallium/drivers/trace/tr_stream.h | 3 ++ 4 files changed, 53 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 6ebb639b7c7..c711a56b947 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -52,6 +52,7 @@ static struct trace_stream *stream = NULL; +static unsigned refcount = 0; static INLINE void @@ -204,35 +205,54 @@ trace_dump_tag_end(const char *name) trace_dump_writes(">"); } +static void +trace_dump_trace_close(void) +{ + if(stream) { + trace_dump_writes("</trace>\n"); + trace_stream_close(stream); + stream = NULL; + refcount = 0; + } +} + boolean trace_dump_trace_begin() { if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) return FALSE; - stream = trace_stream_create("gallium", "trace"); - if(!stream) - return FALSE; - - trace_dump_writes("<?xml version='1.0' encoding='UTF-8'?>\n"); - trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); - trace_dump_writes("<trace version='0.1'>\n"); + if(!stream) { + stream = trace_stream_create("gallium", "trace"); + if(!stream) + return FALSE; + + trace_dump_writes("<?xml version='1.0' encoding='UTF-8'?>\n"); + trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); + trace_dump_writes("<trace version='0.1'>\n"); + #if defined(PIPE_OS_LINUX) - /* Linux applications rarely cleanup GL / Gallium resources so catch - * application exit here */ - atexit(trace_dump_trace_end); + /* Linux applications rarely cleanup GL / Gallium resources so catch + * application exit here */ + atexit(trace_dump_trace_close); #endif + } + + ++refcount; return TRUE; } +boolean trace_dump_enabled(void) +{ + return stream ? TRUE : FALSE; +} + void trace_dump_trace_end(void) { - if(stream) { - trace_dump_writes("</trace>\n"); - trace_stream_close(stream); - stream = NULL; - } + if(stream) + if(!--refcount) + trace_dump_trace_close(); } void trace_dump_call_begin(const char *klass, const char *method) @@ -247,6 +267,7 @@ void trace_dump_call_end(void) trace_dump_indent(1); trace_dump_tag_end("call"); trace_dump_newline(); + trace_stream_flush(stream); } void trace_dump_arg_begin(const char *name) @@ -372,7 +393,7 @@ void trace_dump_null(void) void trace_dump_ptr(const void *value) { if(value) - trace_dump_writef("<ptr>%p</ptr>", value); + trace_dump_writef("<ptr>0x%08lx</ptr>", (unsigned long)(uintptr_t)value); else trace_dump_null(); } diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 0beb1023b15..14176a78e97 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -39,6 +39,7 @@ boolean trace_dump_trace_begin(void); +boolean trace_dump_enabled(void); void trace_dump_trace_end(void); void trace_dump_call_begin(const char *klass, const char *method); void trace_dump_call_end(void); diff --git a/src/gallium/drivers/trace/tr_stream.c b/src/gallium/drivers/trace/tr_stream.c index 14cc257e154..aecc1286b8d 100644 --- a/src/gallium/drivers/trace/tr_stream.c +++ b/src/gallium/drivers/trace/tr_stream.c @@ -86,6 +86,18 @@ trace_stream_write(struct trace_stream *stream, const void *data, size_t size) } +void +trace_stream_flush(struct trace_stream *stream) +{ + if(!stream) + return; + +#if defined(PIPE_OS_LINUX) + fflush(stream->file); +#endif +} + + void trace_stream_close(struct trace_stream *stream) { diff --git a/src/gallium/drivers/trace/tr_stream.h b/src/gallium/drivers/trace/tr_stream.h index 53e854aa913..679c4a725d7 100644 --- a/src/gallium/drivers/trace/tr_stream.h +++ b/src/gallium/drivers/trace/tr_stream.h @@ -48,6 +48,9 @@ trace_stream_create(const char *name, const char *ext); boolean trace_stream_write(struct trace_stream *stream, const void *data, size_t size); +void +trace_stream_flush(struct trace_stream *stream); + void trace_stream_close(struct trace_stream *stream); -- cgit v1.2.3 From f40de50def1b7ee75dd320b151c025b7ddff45be Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 15 Aug 2008 10:31:23 +0100 Subject: trace: Wrap all textures and surface created by the pipe driver. That is, Unfortunately, this causes a regression in softpipe, where the output gets tyled. --- src/gallium/drivers/trace/SConscript | 11 ++-- src/gallium/drivers/trace/tr_context.c | 73 ++++++++++++++++++++- src/gallium/drivers/trace/tr_screen.c | 104 +++++++++++++++++++++--------- src/gallium/drivers/trace/tr_screen.h | 16 +---- src/gallium/drivers/trace/tr_texture.c | 112 +++++++++++++++++++++++++++++++++ src/gallium/drivers/trace/tr_texture.h | 95 ++++++++++++++++++++++++++++ src/gallium/drivers/trace/tr_winsys.c | 16 +++++ 7 files changed, 378 insertions(+), 49 deletions(-) create mode 100644 src/gallium/drivers/trace/tr_texture.c create mode 100644 src/gallium/drivers/trace/tr_texture.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript index 35507e21e4c..a21ced9176a 100644 --- a/src/gallium/drivers/trace/SConscript +++ b/src/gallium/drivers/trace/SConscript @@ -3,14 +3,15 @@ Import('*') env = env.Clone() trace = env.ConvenienceLibrary( - target = 'trace', - source = [ + target = 'trace', + source = [ 'tr_context.c', 'tr_dump.c', - 'tr_screen.c', + 'tr_screen.c', 'tr_state.c', - 'tr_stream.c', + 'tr_stream.c', + 'tr_texture.c', 'tr_winsys.c', - ]) + ]) Export('trace') \ No newline at end of file diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index e81cab252c3..529bed3c6bb 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -30,9 +30,53 @@ #include "tr_dump.h" #include "tr_state.h" +#include "tr_screen.h" +#include "tr_texture.h" #include "tr_context.h" +static INLINE struct pipe_texture * +trace_texture_unwrap(struct trace_context *tr_ctx, + struct pipe_texture *texture) +{ + struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen); + struct trace_texture *tr_tex; + + if(!texture) + return NULL; + + tr_tex = trace_texture(tr_scr, texture); + + assert(tr_tex->texture); + assert(tr_tex->texture->screen == tr_scr->screen); + return tr_tex->texture; +} + + +static INLINE struct pipe_surface * +trace_surface_unwrap(struct trace_context *tr_ctx, + struct pipe_surface *surface) +{ + struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen); + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + + if(!surface) + return NULL; + + assert(surface->texture); + if(!surface->texture) + return surface; + + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + + assert(tr_surf->surface); + assert(tr_surf->surface->texture->screen == tr_scr->screen); + return tr_surf->surface; +} + + static INLINE void trace_context_set_edgeflags(struct pipe_context *_pipe, const unsigned *bitfield) @@ -666,7 +710,18 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - + struct pipe_framebuffer_state unwrapped_state; + unsigned i; + + /* Unwrap the input state */ + memcpy(&unwrapped_state, state, sizeof(unwrapped_state)); + for(i = 0; i < state->num_cbufs; ++i) + unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]); + for(i = state->num_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) + unwrapped_state.cbufs[i] = NULL; + unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf); + state = &unwrapped_state; + trace_dump_call_begin("pipe_context", "set_framebuffer_state"); trace_dump_arg(ptr, pipe); @@ -739,6 +794,12 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe, { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_SAMPLERS]; + unsigned i; + + for(i = 0; i < num_textures; ++i) + unwrapped_textures[i] = trace_texture_unwrap(tr_ctx, textures[i]); + textures = unwrapped_textures; trace_dump_call_begin("pipe_context", "set_sampler_textures"); @@ -810,6 +871,9 @@ trace_context_surface_copy(struct pipe_context *_pipe, struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; + dest = trace_surface_unwrap(tr_ctx, dest); + src = trace_surface_unwrap(tr_ctx, src); + trace_dump_call_begin("pipe_context", "surface_copy"); trace_dump_arg(ptr, pipe); @@ -841,6 +905,8 @@ trace_context_surface_fill(struct pipe_context *_pipe, struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; + dst = trace_surface_unwrap(tr_ctx, dst); + trace_dump_call_begin("pipe_context", "surface_fill"); trace_dump_arg(ptr, pipe); @@ -864,6 +930,8 @@ trace_context_clear(struct pipe_context *_pipe, struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; + surface = trace_surface_unwrap(tr_ctx, surface); + trace_dump_call_begin("pipe_context", "clear"); trace_dump_arg(ptr, pipe); @@ -925,6 +993,9 @@ trace_context_create(struct pipe_screen *screen, if(!pipe) goto error1; + if(!trace_dump_enabled()) + goto error1; + tr_ctx = CALLOC_STRUCT(trace_context); if(!tr_ctx) goto error1; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 2eecfdc1875..cca8597a87e 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -26,26 +26,14 @@ **************************************************************************/ #include "pipe/p_util.h" -#include "util/u_hash_table.h" #include "tr_dump.h" #include "tr_state.h" #include "tr_winsys.h" +#include "tr_texture.h" #include "tr_screen.h" -static unsigned trace_surface_hash(void *surface) -{ - return (unsigned)(uintptr_t)surface; -} - - -static int trace_surface_compare(void *surface1, void *surface2) -{ - return (char *)surface2 - (char *)surface1; -} - - static const char * trace_screen_get_name(struct pipe_screen *_screen) { @@ -182,6 +170,8 @@ trace_screen_texture_create(struct pipe_screen *_screen, trace_dump_call_end(); + result = trace_texture_create(tr_scr, result); + return result; } @@ -210,6 +200,8 @@ trace_screen_texture_blanket(struct pipe_screen *_screen, trace_dump_call_end(); + result = trace_texture_create(tr_scr, result); + return result; } @@ -220,18 +212,34 @@ trace_screen_texture_release(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; - struct pipe_texture *texture = *ptexture; + struct trace_texture *tr_tex; + struct pipe_texture *texture; + + assert(ptexture); + if(*ptexture) { + tr_tex = trace_texture(tr_scr, *ptexture); + texture = tr_tex->texture; + assert(texture->screen == screen); + } + else + texture = NULL; trace_dump_call_begin("pipe_screen", "texture_release"); trace_dump_arg(ptr, screen); trace_dump_arg(ptr, texture); - screen->texture_release(screen, ptexture); + if (*ptexture) { + if (!--(*ptexture)->refcount) + trace_texture_destroy(tr_scr, *ptexture); + + *ptexture = NULL; + } trace_dump_call_end(); } + static struct pipe_surface * trace_screen_get_tex_surface(struct pipe_screen *_screen, struct pipe_texture *texture, @@ -241,8 +249,14 @@ trace_screen_get_tex_surface(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; struct pipe_surface *result; + assert(texture); + tr_tex = trace_texture(tr_scr, texture); + texture = tr_tex->texture; + assert(texture->screen == screen); + trace_dump_call_begin("pipe_screen", "get_tex_surface"); trace_dump_arg(ptr, screen); @@ -258,6 +272,8 @@ trace_screen_get_tex_surface(struct pipe_screen *_screen, trace_dump_call_end(); + result = trace_surface_create(tr_tex, result); + return result; } @@ -268,14 +284,30 @@ trace_screen_tex_surface_release(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; - struct pipe_surface *surface = *psurface; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + struct pipe_surface *surface; + + assert(psurface); + if(*psurface) { + tr_tex = trace_texture(tr_scr, (*psurface)->texture); + tr_surf = trace_surface(tr_tex, *psurface); + surface = tr_surf->surface; + } + else + surface = NULL; trace_dump_call_begin("pipe_screen", "tex_surface_release"); trace_dump_arg(ptr, screen); trace_dump_arg(ptr, surface); - screen->tex_surface_release(screen, psurface); + if (*psurface) { + if (!--(*psurface)->refcount) + trace_surface_destroy(tr_tex, *psurface); + + *psurface = NULL; + } trace_dump_call_end(); } @@ -288,13 +320,19 @@ trace_screen_surface_map(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; void *map; + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + map = screen->surface_map(screen, surface, flags); if(map) { if(flags & PIPE_BUFFER_USAGE_CPU_WRITE) { - assert(!hash_table_get(tr_scr->surface_maps, surface)); - hash_table_set(tr_scr->surface_maps, surface, map); + assert(!tr_surf->map); + tr_surf->map = map; } } @@ -308,10 +346,14 @@ trace_screen_surface_unmap(struct pipe_screen *_screen, { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; - const void *map; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; - map = hash_table_get(tr_scr->surface_maps, surface); - if(map) { + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + + if(tr_surf->map) { size_t size = surface->nblocksy * surface->stride; trace_dump_call_begin("pipe_winsys", "surface_write"); @@ -321,7 +363,7 @@ trace_screen_surface_unmap(struct pipe_screen *_screen, trace_dump_arg(ptr, surface); trace_dump_arg_begin("data"); - trace_dump_bytes(map, size); + trace_dump_bytes(tr_surf->map, size); trace_dump_arg_end(); trace_dump_arg_begin("stride"); @@ -334,7 +376,7 @@ trace_screen_surface_unmap(struct pipe_screen *_screen, trace_dump_call_end(); - hash_table_remove(tr_scr->surface_maps, surface); + tr_surf->map = NULL; } screen->surface_unmap(screen, surface); @@ -398,11 +440,6 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->screen = screen; - tr_scr->surface_maps = hash_table_create(trace_surface_hash, - trace_surface_compare); - if(!tr_scr->surface_maps) - goto error3; - trace_dump_call_begin("", "pipe_screen_create"); trace_dump_arg_begin("winsys"); trace_dump_ptr(screen->winsys); @@ -419,3 +456,12 @@ error2: error1: return screen; } + + +struct trace_screen * +trace_screen(struct pipe_screen *screen) +{ + assert(screen); + assert(screen->destroy == trace_screen_destroy); + return (struct trace_screen *)screen; +} diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index 90103aa922d..698b84811d2 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -29,31 +29,19 @@ #define TR_SCREEN_H_ -#include "pipe/p_compiler.h" -#include "pipe/p_debug.h" #include "pipe/p_screen.h" -struct hash_table; - - struct trace_screen { struct pipe_screen base; struct pipe_screen *screen; - - struct hash_table *surface_maps; }; -static INLINE struct trace_screen * -trace_screen(struct pipe_screen *screen) -{ - assert(screen); - return (struct trace_screen *)screen; -} - +struct trace_screen * +trace_screen(struct pipe_screen *screen); struct pipe_screen * diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c new file mode 100644 index 00000000000..99ba74d3667 --- /dev/null +++ b/src/gallium/drivers/trace/tr_texture.c @@ -0,0 +1,112 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "util/u_hash_table.h" + +#include "tr_screen.h" +#include "tr_texture.h" + + +struct pipe_texture * +trace_texture_create(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + struct trace_texture *tr_tex; + + if(!texture) + goto error; + + assert(texture->screen == tr_scr->screen); + + tr_tex = CALLOC_STRUCT(trace_texture); + if(!tr_tex) + goto error; + + memcpy(&tr_tex->base, texture, sizeof(struct pipe_texture)); + tr_tex->base.screen = &tr_scr->base; + tr_tex->texture = texture; + + return &tr_tex->base; + +error: + pipe_texture_reference(&texture, NULL); + return NULL; +} + + +void +trace_texture_destroy(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + struct trace_texture *tr_tex = trace_texture(tr_scr, texture); + pipe_texture_reference(&tr_tex->texture, NULL); + FREE(tr_tex); +} + + +struct pipe_surface * +trace_surface_create(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + struct trace_surface *tr_surf; + + if(!surface) + goto error; + + assert(surface->texture == tr_tex->texture); + + tr_surf = CALLOC_STRUCT(trace_surface); + if(!tr_surf) + goto error; + + memcpy(&tr_surf->base, surface, sizeof(struct pipe_surface)); + + tr_surf->base.winsys = tr_tex->base.screen->winsys; + tr_surf->base.texture = NULL; + pipe_texture_reference(&tr_surf->base.texture, &tr_tex->base); + tr_surf->surface = surface; + + return &tr_surf->base; + +error: + pipe_surface_reference(&surface, NULL); + return NULL; +} + + +void +trace_surface_destroy(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + struct trace_surface *tr_surf = trace_surface(tr_tex, surface); + pipe_texture_reference(&tr_surf->base.texture, NULL); + pipe_surface_reference(&tr_surf->surface, NULL); + FREE(tr_surf); +} + diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h new file mode 100644 index 00000000000..9e72edb8a3c --- /dev/null +++ b/src/gallium/drivers/trace/tr_texture.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_TEXTURE_H_ +#define TR_TEXTURE_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "tr_screen.h" + + +struct trace_texture +{ + struct pipe_texture base; + + struct pipe_texture *texture; +}; + + +struct trace_surface +{ + struct pipe_surface base; + + struct pipe_surface *surface; + + void *map; +}; + + +static INLINE struct trace_texture * +trace_texture(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + if(!texture) + return NULL; + assert(texture->screen == &tr_scr->base); + return (struct trace_texture *)texture; +} + + +static INLINE struct trace_surface * +trace_surface(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + if(!surface) + return NULL; + assert(surface->texture == &tr_tex->base); + return (struct trace_surface *)surface; +} + + +struct pipe_texture * +trace_texture_create(struct trace_screen *tr_scr, + struct pipe_texture *texture); + +void +trace_texture_destroy(struct trace_screen *tr_scr, + struct pipe_texture *texture); + +struct pipe_surface * +trace_surface_create(struct trace_texture *tr_tex, + struct pipe_surface *surface); + +void +trace_surface_destroy(struct trace_texture *tr_tex, + struct pipe_surface *surface); + + +#endif /* TR_TEXTURE_H_ */ diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 356579809ad..c4262114082 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -31,6 +31,8 @@ #include "tr_dump.h" #include "tr_state.h" +#include "tr_screen.h" +#include "tr_texture.h" #include "tr_winsys.h" @@ -75,6 +77,14 @@ trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, struct trace_winsys *tr_ws = trace_winsys(_winsys); struct pipe_winsys *winsys = tr_ws->winsys; + assert(surface); + if(surface->texture) { + struct trace_screen *tr_scr = trace_screen(surface->texture->screen); + struct trace_screen *tr_tex = trace_texture(tr_scr, surface->texture); + struct trace_surface *tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + } + trace_dump_call_begin("pipe_winsys", "flush_frontbuffer"); trace_dump_arg(ptr, winsys); @@ -106,6 +116,8 @@ trace_winsys_surface_alloc(struct pipe_winsys *_winsys) trace_dump_call_end(); + assert(!result || !result->texture); + return result; } @@ -122,6 +134,8 @@ trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys, struct pipe_winsys *winsys = tr_ws->winsys; int result; + assert(surface && !surface->texture); + trace_dump_call_begin("pipe_winsys", "surface_alloc_storage"); trace_dump_arg(ptr, winsys); @@ -155,6 +169,8 @@ trace_winsys_surface_release(struct pipe_winsys *_winsys, struct pipe_winsys *winsys = tr_ws->winsys; struct pipe_surface *surface = *psurface; + assert(psurface && *psurface && !(*psurface)->texture); + trace_dump_call_begin("pipe_winsys", "surface_release"); trace_dump_arg(ptr, winsys); -- cgit v1.2.3 From 9d58b2a432d44fc2861f2df1a611188f92dc288f Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 15 Aug 2008 11:20:57 +0100 Subject: trace: Fix typo. --- src/gallium/drivers/trace/tr_winsys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index c4262114082..120006ea9f0 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -80,7 +80,7 @@ trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, assert(surface); if(surface->texture) { struct trace_screen *tr_scr = trace_screen(surface->texture->screen); - struct trace_screen *tr_tex = trace_texture(tr_scr, surface->texture); + struct trace_texture *tr_tex = trace_texture(tr_scr, surface->texture); struct trace_surface *tr_surf = trace_surface(tr_tex, surface); surface = tr_surf->surface; } -- cgit v1.2.3 From f90e50dff9e5cdbad6e9bb74c0aeaaaa82242b25 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin <marchesin@icps.u-strasbg.fr> Date: Mon, 18 Aug 2008 03:00:25 +0200 Subject: nv30: add some opcodes. --- src/gallium/drivers/nv30/nv30_fragprog.c | 11 +++++++++++ src/gallium/drivers/nv30/nv30_vertprog.c | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index d3693fdf567..02f30ad9bad 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -526,6 +526,12 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, case TGSI_OPCODE_MUL: arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + arith(fpc, sat, SFL, dst, mask, none, none, none); + break; case TGSI_OPCODE_POW: arith(fpc, sat, POW, dst, mask, src[0], src[1], none); break; @@ -557,6 +563,9 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, case TGSI_OPCODE_SGE: arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_SGT: + arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SLT: arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); break; @@ -730,6 +739,8 @@ nv30_fragprog_translate(struct nv30_context *nv30, struct tgsi_parse_context parse; struct nv30_fpc *fpc = NULL; + tgsi_dump(fp->pipe.tokens,0); + fpc = CALLOC(1, sizeof(struct nv30_fpc)); if (!fpc) return; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 6c075cdb752..72824559e8b 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -4,6 +4,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" #include "nv30_context.h" #include "nv30_state.h" @@ -457,6 +458,9 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, case TGSI_OPCODE_SGE: arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_SGT: + arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SLT: arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); break; @@ -570,6 +574,8 @@ nv30_vertprog_translate(struct nv30_context *nv30, struct tgsi_parse_context parse; struct nv30_vpc *vpc = NULL; + tgsi_dump(vp->pipe.tokens,0); + vpc = CALLOC(1, sizeof(struct nv30_vpc)); if (!vpc) return; -- cgit v1.2.3 From 6a31bb6ad8251ae977327e64562f373a89f55c70 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 16 Aug 2008 18:45:00 +0100 Subject: trace: Get the trace file from the GALLIUM_TRACE option itself. --- src/gallium/drivers/trace/README | 9 ++++----- src/gallium/drivers/trace/tr_dump.c | 7 +++++-- src/gallium/drivers/trace/tr_stream.c | 6 +----- src/gallium/drivers/trace/tr_stream.h | 3 ++- 4 files changed, 12 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 5f0ae2c6419..7e98ec24546 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -8,7 +8,6 @@ To use do ln -s libGL.so build/linux-x86-debug/gallium/winsys/xlib/libGL.so.1 export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/gallium/winsys/xlib - export GALLIUM_TRACE=y ensure the right libGL.so is being picked by doing @@ -16,11 +15,11 @@ ensure the right libGL.so is being picked by doing and then try running - glxinfo + GALLIUM_TRACE=tri.trace ./progs/trivial/tri -which should create a gallium.*.trace file, which is an XML file. You can view -copying trace.xsl and trace.css to the same directory, and opening with a -XSLT capable browser like Firefox or Internet Explorer. +which should create a tri.trace file, which is an XML file. You can view copying +trace.xsl to the same directory, and opening with a XSLT capable browser like +Firefox or Internet Explorer. -- Jose Fonseca <jrfonseca@tungstengraphics.com> diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index c711a56b947..ecd0d6830d2 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -218,12 +218,15 @@ trace_dump_trace_close(void) boolean trace_dump_trace_begin() { - if(!debug_get_bool_option("GALLIUM_TRACE", FALSE)) + const char *filename; + + filename = debug_get_option("GALLIUM_TRACE", NULL); + if(!filename) return FALSE; if(!stream) { - stream = trace_stream_create("gallium", "trace"); + stream = trace_stream_create(filename); if(!stream) return FALSE; diff --git a/src/gallium/drivers/trace/tr_stream.c b/src/gallium/drivers/trace/tr_stream.c index aecc1286b8d..d008dbac27b 100644 --- a/src/gallium/drivers/trace/tr_stream.c +++ b/src/gallium/drivers/trace/tr_stream.c @@ -47,18 +47,14 @@ struct trace_stream struct trace_stream * -trace_stream_create(const char *name, const char *ext) +trace_stream_create(const char *filename) { struct trace_stream *stream; - static unsigned file_no = 0; - char filename[1024]; stream = CALLOC_STRUCT(trace_stream); if(!stream) goto error1; - snprintf(filename, sizeof(filename), "%s.%u.%s", name, file_no, ext); - #if defined(PIPE_OS_LINUX) stream->file = fopen(filename, "w"); if(!stream->file) diff --git a/src/gallium/drivers/trace/tr_stream.h b/src/gallium/drivers/trace/tr_stream.h index 679c4a725d7..6111174d6a8 100644 --- a/src/gallium/drivers/trace/tr_stream.h +++ b/src/gallium/drivers/trace/tr_stream.h @@ -39,11 +39,12 @@ #include "pipe/p_compiler.h" + struct trace_stream; struct trace_stream * -trace_stream_create(const char *name, const char *ext); +trace_stream_create(const char *filename); boolean trace_stream_write(struct trace_stream *stream, const void *data, size_t size); -- cgit v1.2.3 From d042f415fc9edcc174573fc2cc06afa373a7ef9b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 16 Aug 2008 19:44:37 +0100 Subject: trace: Use long longs to ensure covering 64bits integers. --- src/gallium/drivers/trace/tr_dump.c | 8 ++++---- src/gallium/drivers/trace/tr_dump.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index ecd0d6830d2..1613a626df4 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -302,14 +302,14 @@ void trace_dump_bool(int value) trace_dump_writef("<bool>%c</bool>", value ? '1' : '0'); } -void trace_dump_int(long int value) +void trace_dump_int(long long int value) { - trace_dump_writef("<int>%li</int>", value); + trace_dump_writef("<int>%lli</int>", value); } -void trace_dump_uint(long unsigned value) +void trace_dump_uint(long long unsigned value) { - trace_dump_writef("<uint>%lu</uint>", value); + trace_dump_writef("<uint>%llu</uint>", value); } void trace_dump_float(double value) diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 14176a78e97..6ddc8fc15c4 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -48,8 +48,8 @@ void trace_dump_arg_end(void); void trace_dump_ret_begin(void); void trace_dump_ret_end(void); void trace_dump_bool(int value); -void trace_dump_int(long int value); -void trace_dump_uint(long unsigned value); +void trace_dump_int(long long int value); +void trace_dump_uint(long long unsigned value); void trace_dump_float(double value); void trace_dump_bytes(const void *data, long unsigned size); void trace_dump_string(const char *str); -- cgit v1.2.3 From 747762f379f05d127865de77615d6e4dd46be191 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Sat, 16 Aug 2008 19:45:05 +0100 Subject: trace: Preliminary stream implementation for GDI. --- src/gallium/drivers/trace/SConscript | 3 +- src/gallium/drivers/trace/tr_stream.c | 108 ----------------------- src/gallium/drivers/trace/tr_stream_stdc.c | 104 ++++++++++++++++++++++ src/gallium/drivers/trace/tr_stream_wd.c | 134 +++++++++++++++++++++++++++++ 4 files changed, 240 insertions(+), 109 deletions(-) delete mode 100644 src/gallium/drivers/trace/tr_stream.c create mode 100644 src/gallium/drivers/trace/tr_stream_stdc.c create mode 100644 src/gallium/drivers/trace/tr_stream_wd.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript index a21ced9176a..5c49468c4eb 100644 --- a/src/gallium/drivers/trace/SConscript +++ b/src/gallium/drivers/trace/SConscript @@ -9,7 +9,8 @@ trace = env.ConvenienceLibrary( 'tr_dump.c', 'tr_screen.c', 'tr_state.c', - 'tr_stream.c', + 'tr_stream_stdc.c', + 'tr_stream_wd.c', 'tr_texture.c', 'tr_winsys.c', ]) diff --git a/src/gallium/drivers/trace/tr_stream.c b/src/gallium/drivers/trace/tr_stream.c deleted file mode 100644 index d008dbac27b..00000000000 --- a/src/gallium/drivers/trace/tr_stream.c +++ /dev/null @@ -1,108 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_config.h" - -#if defined(PIPE_OS_LINUX) -#include <stdio.h> -#else -#error Unsupported platform -#endif - -#include "pipe/p_util.h" - -#include "tr_stream.h" - - -struct trace_stream -{ -#if defined(PIPE_OS_LINUX) - FILE *file; -#endif -}; - - -struct trace_stream * -trace_stream_create(const char *filename) -{ - struct trace_stream *stream; - - stream = CALLOC_STRUCT(trace_stream); - if(!stream) - goto error1; - -#if defined(PIPE_OS_LINUX) - stream->file = fopen(filename, "w"); - if(!stream->file) - goto error2; -#endif - - return stream; - -error2: - FREE(stream); -error1: - return NULL; -} - - -boolean -trace_stream_write(struct trace_stream *stream, const void *data, size_t size) -{ - if(!stream) - return FALSE; - -#if defined(PIPE_OS_LINUX) - return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; -#endif -} - - -void -trace_stream_flush(struct trace_stream *stream) -{ - if(!stream) - return; - -#if defined(PIPE_OS_LINUX) - fflush(stream->file); -#endif -} - - -void -trace_stream_close(struct trace_stream *stream) -{ - if(!stream) - return; - -#if defined(PIPE_OS_LINUX) - fclose(stream->file); -#endif - - FREE(stream); -} diff --git a/src/gallium/drivers/trace/tr_stream_stdc.c b/src/gallium/drivers/trace/tr_stream_stdc.c new file mode 100644 index 00000000000..4c77e1c995c --- /dev/null +++ b/src/gallium/drivers/trace/tr_stream_stdc.c @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation based on the Standard C Library. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) + +#include <stdio.h> + +#include "pipe/p_util.h" + +#include "tr_stream.h" + + +struct trace_stream +{ + FILE *file; +}; + + +struct trace_stream * +trace_stream_create(const char *filename) +{ + struct trace_stream *stream; + + stream = CALLOC_STRUCT(trace_stream); + if(!stream) + goto error1; + + stream->file = fopen(filename, "w"); + if(!stream->file) + goto error2; + + return stream; + +error2: + FREE(stream); +error1: + return NULL; +} + + +boolean +trace_stream_write(struct trace_stream *stream, const void *data, size_t size) +{ + if(!stream) + return FALSE; + + return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; +} + + +void +trace_stream_flush(struct trace_stream *stream) +{ + if(!stream) + return; + + fflush(stream->file); +} + + +void +trace_stream_close(struct trace_stream *stream) +{ + if(!stream) + return; + + fclose(stream->file); + + FREE(stream); +} + + +#endif diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c new file mode 100644 index 00000000000..9a05c0cec89 --- /dev/null +++ b/src/gallium/drivers/trace/tr_stream_wd.c @@ -0,0 +1,134 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation for the Windows Display driver. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +#include <windows.h> +#include <winddi.h> + +#include "pipe/p_util.h" + +#include "tr_stream.h" + + +#define MAP_FILE_SIZE (4*1024*1024) + + +struct trace_stream +{ + WCHAR wFileName[MAX_PATH + 1]; + ULONG_PTR iFile; + char *pMap; + size_t written; +}; + + +struct trace_stream * +trace_stream_create(const char *filename) +{ + struct trace_stream *stream; + ULONG BytesInUnicodeString; + + stream = CALLOC_STRUCT(trace_stream); + if(!stream) + goto error1; + + EngMultiByteToUnicodeN( + stream->wFileName, + sizeof(stream->wFileName), + &BytesInUnicodeString, + (char *)filename, + strlen(filename)); + + stream->pMap = EngMapFile(stream->wFileName, MAP_FILE_SIZE, &stream->iFile); + if(!stream->pMap) + goto error2; + + return stream; + +error2: + FREE(stream); +error1: + return NULL; +} + + +boolean +trace_stream_write(struct trace_stream *stream, const void *data, size_t size) +{ + boolean ret; + + if(!stream) + return FALSE; + + if(stream->written + size > MAP_FILE_SIZE) { + ret = FALSE; + size = MAP_FILE_SIZE - stream->written; + } + else + ret = TRUE; + + memcpy(stream->pMap + stream->written, data, size); + stream->written += size; + + return ret; +} + + +void +trace_stream_flush(struct trace_stream *stream) +{ + (void)stream; +} + + +void +trace_stream_close(struct trace_stream *stream) +{ + if(!stream) + return; + + EngUnmapFile(stream->iFile); + if(stream->written < MAP_FILE_SIZE) { + /* Truncate file size */ + stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); + if(stream->pMap) + EngUnmapFile(stream->iFile); + } + + FREE(stream); +} + + +#endif -- cgit v1.2.3 From 90a1c6e4032571a1c3e43daeb357068ba14136fe Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 18 Aug 2008 12:42:08 +0100 Subject: trace: Explain how to integrate with a state tracker or winsys. --- src/gallium/drivers/trace/README | 45 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 7e98ec24546..e7a2f12b022 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -1,9 +1,20 @@ + TRACE PIPE DRIVER + + += About = + This directory contains a Gallium3D pipe driver which traces all incoming calls. + += Build Instructions = + To build, invoke scons on the top dir as scons statetrackers=mesa drivers=softpipe,i915simple,trace winsys=xlib + += Usage = + To use do ln -s libGL.so build/linux-x86-debug/gallium/winsys/xlib/libGL.so.1 @@ -11,15 +22,43 @@ To use do ensure the right libGL.so is being picked by doing - ldd `which glxinfo` + ldd progs/trivial/tri and then try running - GALLIUM_TRACE=tri.trace ./progs/trivial/tri + GALLIUM_TRACE=tri.trace progs/trivial/tri which should create a tri.trace file, which is an XML file. You can view copying -trace.xsl to the same directory, and opening with a XSLT capable browser like +trace.xsl to the same directory, and opening with a XSLT capable browser such as Firefox or Internet Explorer. + += Integrating = + +You can integrate the trace pipe driver either inside the state tracker or the +winsys. The procedure on both cases is the same. Let's assume you have a +pipe_screen and a pipe_context pair obtained by the usual means (variable and +function names are just for illustration purposes): + + real_screen = real_screen_create(...); + + real_context = real_context_create(...); + +The trace screen and pipe_context is then created by doing + + trace_screen = trace_screen_create(real_screen); + + trace_context = trace_context_create(trace_screen, real_context); + +You can then simply use trace_screen and trace_context instead of real_screen +and real_context. + +Do not call trace_winsys_create. Simply pass trace_screen->winsys or +trace_context->winsys in places you would pass winsys. + +You can create as many contexts you wish. Just ensure that you don't mistake +trace_screen with real_screen when creating them. + + -- Jose Fonseca <jrfonseca@tungstengraphics.com> -- cgit v1.2.3 From 200d6dcc83ea9ac0bf6f1506214e0bd9b65714f2 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 19 Aug 2008 17:08:41 +0100 Subject: trace: Support C++. --- src/gallium/drivers/trace/tr_context.h | 9 +++++++++ src/gallium/drivers/trace/tr_screen.h | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 679371e3105..7831900ec29 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -34,6 +34,11 @@ #include "pipe/p_context.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct trace_context { struct pipe_context base; @@ -56,4 +61,8 @@ trace_context_create(struct pipe_screen *screen, struct pipe_context *pipe); +#ifdef __cplusplus +} +#endif + #endif /* TR_CONTEXT_H_ */ diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index 698b84811d2..93fefdb9a5f 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -32,6 +32,11 @@ #include "pipe/p_screen.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct trace_screen { struct pipe_screen base; @@ -48,4 +53,8 @@ struct pipe_screen * trace_screen_create(struct pipe_screen *screen); +#ifdef __cplusplus +} +#endif + #endif /* TR_SCREEN_H_ */ -- cgit v1.2.3 From d27ffb8c6d4551371995608a6d752dfeb26c2351 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 19 Aug 2008 19:11:47 +0100 Subject: trace: Fix pipe_clip_state dump. --- src/gallium/drivers/trace/tr_state.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 7b35e89e754..30ab5a8fdcb 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -51,7 +51,6 @@ void trace_dump_block(const struct pipe_format_block *block) void trace_dump_template(const struct pipe_texture *templat) { - assert(templat); if(!templat) { trace_dump_null(); return; @@ -87,7 +86,6 @@ void trace_dump_template(const struct pipe_texture *templat) void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -136,7 +134,6 @@ void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) void trace_dump_poly_stipple(const struct pipe_poly_stipple *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -156,7 +153,6 @@ void trace_dump_poly_stipple(const struct pipe_poly_stipple *state) void trace_dump_viewport_state(const struct pipe_viewport_state *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -173,7 +169,6 @@ void trace_dump_viewport_state(const struct pipe_viewport_state *state) void trace_dump_scissor_state(const struct pipe_scissor_state *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -194,18 +189,20 @@ void trace_dump_clip_state(const struct pipe_clip_state *state) { unsigned i; - assert(state); if(!state) { trace_dump_null(); return; } - trace_dump_struct_begin("pipe_scissor_state"); + trace_dump_struct_begin("pipe_clip_state"); trace_dump_member_begin("ucp"); trace_dump_array_begin(); - for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) + for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) { + trace_dump_elem_begin(); trace_dump_array(float, state->ucp[i], 4); + trace_dump_elem_end(); + } trace_dump_array_end(); trace_dump_member_end(); @@ -217,7 +214,6 @@ void trace_dump_clip_state(const struct pipe_clip_state *state) void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -235,7 +231,7 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) void trace_dump_shader_state(const struct pipe_shader_state *state) { static char str[8192]; - assert(state); + if(!state) { trace_dump_null(); return; @@ -257,7 +253,6 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_ { unsigned i; - assert(state); if(!state) { trace_dump_null(); return; @@ -307,7 +302,6 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_ void trace_dump_blend_state(const struct pipe_blend_state *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -337,7 +331,6 @@ void trace_dump_blend_state(const struct pipe_blend_state *state) void trace_dump_blend_color(const struct pipe_blend_color *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -367,7 +360,6 @@ void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) void trace_dump_sampler_state(const struct pipe_sampler_state *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -398,7 +390,6 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state) void trace_dump_surface(const struct pipe_surface *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -436,7 +427,6 @@ void trace_dump_surface(const struct pipe_surface *state) void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) { - assert(state); if(!state) { trace_dump_null(); return; @@ -455,7 +445,6 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) void trace_dump_vertex_element(const struct pipe_vertex_element *state) { - assert(state); if(!state) { trace_dump_null(); return; -- cgit v1.2.3 From 7f9959ae8394f8e52a180f5e261b0f9470f6c5bc Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 19 Aug 2008 17:41:34 -0600 Subject: gallium: fix do_flip bug in sp_surface_copy() Surfaces are always in y=0=top raster order so the caller should invert the Y coordinate if needed; don't do it in sp_surface_copy(). Fixes a glCopyTexture regression. --- src/gallium/drivers/softpipe/sp_surface.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 7dc15c38d10..4f1bb881cb5 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -39,6 +39,9 @@ * Copy a rectangular region from one surface to another. * Surfaces must have same bpp. * + * Note that it's always the case that Y=0=top of the raster. + * If do_flip is non-zero, the region being copied will be flipped vertically. + * * Assumes all values are within bounds -- no checking at this level - * do it higher up if required. */ @@ -72,7 +75,7 @@ sp_surface_copy(struct pipe_context *pipe, width, height, src_map, do_flip ? -(int) src->stride : src->stride, - srcx, do_flip ? src->height - 1 - srcy : srcy); + srcx, srcy); pipe->screen->surface_unmap(pipe->screen, src); pipe->screen->surface_unmap(pipe->screen, dst); -- cgit v1.2.3 From 0fae7648987d8264f85a9b6b6d7f903bff82a0f0 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 20 Aug 2008 10:31:38 -0600 Subject: gallium: test for and cull prims with inf/nan vertices in sp_setup.c code. --- src/gallium/drivers/softpipe/sp_setup.c | 45 +++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index b7f2f16307e..dd0562e7efe 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -104,6 +104,16 @@ struct setup_context { +/** + * Test if x is NaN or +/- infinity. + */ +static INLINE boolean +is_inf_or_nan(float x) +{ + union fi tmp; + tmp.f = x; + return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31); +} static boolean cull_tri( struct setup_context *setup, @@ -293,6 +303,9 @@ static void print_vertex(const struct setup_context *setup, } #endif +/** + * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise + */ static boolean setup_sort_vertices( struct setup_context *setup, float det, const float (*v0)[4], @@ -370,10 +383,13 @@ static boolean setup_sort_vertices( struct setup_context *setup, setup->ebot.dx * setup->emaj.dy); setup->oneoverarea = 1.0f / area; + /* debug_printf("%s one-over-area %f area %f det %f\n", __FUNCTION__, setup->oneoverarea, area, det ); */ + if (is_inf_or_nan(setup->oneoverarea)) + return FALSE; } /* We need to know if this is a front or back-facing triangle for: @@ -742,7 +758,8 @@ void setup_tri( struct setup_context *setup, if (cull_tri( setup, det )) return; - setup_sort_vertices( setup, det, v0, v1, v2 ); + if (!setup_sort_vertices( setup, det, v0, v1, v2 )) + return; setup_tri_coefficients( setup ); setup_tri_edges( setup ); @@ -827,7 +844,7 @@ line_persp_coeff(struct setup_context *setup, * Compute the setup->coef[] array dadx, dady, a0 values. * Must be called after setup->vmin,vmax are initialized. */ -static INLINE void +static INLINE boolean setup_line_coefficients(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]) @@ -836,6 +853,7 @@ setup_line_coefficients(struct setup_context *setup, const struct sp_fragment_shader *spfs = softpipe->fs; const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); uint fragSlot; + float area; /* use setup->vmin, vmax to point to vertices */ setup->vprovoke = v1; @@ -844,9 +862,12 @@ setup_line_coefficients(struct setup_context *setup, setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; - /* NOTE: this is not really 1/area */ - setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + - setup->emaj.dy * setup->emaj.dy); + + /* NOTE: this is not really area but something proportional to it */ + area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; + if (area == 0.0f || is_inf_or_nan(area)) + return FALSE; + setup->oneoverarea = 1.0f / area; /* z and w are done by linear interpolation: */ @@ -886,6 +907,7 @@ setup_line_coefficients(struct setup_context *setup, setup->coef[fragSlot].dady[1] = 0.0; } } + return TRUE; } @@ -942,18 +964,19 @@ setup_line(struct setup_context *setup, print_vertex(setup, v1); #endif - assert(v0[0][0] < 1.0e9); - assert(v0[0][1] < 1.0e9); - assert(v1[0][0] < 1.0e9); - assert(v1[0][1] < 1.0e9); - if (setup->softpipe->no_rast) return; if (dx == 0 && dy == 0) return; - setup_line_coefficients(setup, v0, v1); + if (!setup_line_coefficients(setup, v0, v1)) + return; + + assert(v0[0][0] < 1.0e9); + assert(v0[0][1] < 1.0e9); + assert(v1[0][0] < 1.0e9); + assert(v1[0][1] < 1.0e9); if (dx < 0) { dx = -dx; /* make positive */ -- cgit v1.2.3 From 0fff3e4ea991ce2f841739ee8c8e8937452e56fa Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 21 Aug 2008 13:39:52 +0100 Subject: trace: Split the output stream on windows. Because windows limits the ammount of memory that can be mapped. --- src/gallium/drivers/trace/tr_stream_wd.c | 102 +++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 27 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c index 9a05c0cec89..c375f50b951 100644 --- a/src/gallium/drivers/trace/tr_stream_wd.c +++ b/src/gallium/drivers/trace/tr_stream_wd.c @@ -47,32 +47,73 @@ struct trace_stream { + char filename[MAX_PATH + 1]; WCHAR wFileName[MAX_PATH + 1]; ULONG_PTR iFile; char *pMap; size_t written; + unsigned suffix; }; +static INLINE boolean +trace_stream_map(struct trace_stream *stream) +{ + ULONG BytesInUnicodeString; + static char filename[MAX_PATH + 1]; + unsigned filename_len; + + filename_len = util_snprintf(filename, + sizeof(filename), + "\\??\\%s.%03u", + stream->filename, + stream->suffix++); + + EngMultiByteToUnicodeN( + stream->wFileName, + sizeof(stream->wFileName), + &BytesInUnicodeString, + filename, + filename_len); + + stream->pMap = EngMapFile(stream->wFileName, MAP_FILE_SIZE, &stream->iFile); + if(!stream->pMap) + return FALSE; + + memset(stream->pMap, 0, MAP_FILE_SIZE); + stream->written = 0; + + return TRUE; +} + + +static INLINE void +trace_stream_unmap(struct trace_stream *stream) +{ + EngUnmapFile(stream->iFile); + if(stream->written < MAP_FILE_SIZE) { + /* Truncate file size */ + stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); + if(stream->pMap) + EngUnmapFile(stream->iFile); + } + + stream->pMap = NULL; +} + + struct trace_stream * trace_stream_create(const char *filename) { struct trace_stream *stream; - ULONG BytesInUnicodeString; stream = CALLOC_STRUCT(trace_stream); if(!stream) goto error1; - EngMultiByteToUnicodeN( - stream->wFileName, - sizeof(stream->wFileName), - &BytesInUnicodeString, - (char *)filename, - strlen(filename)); + strncpy(stream->filename, filename, sizeof(stream->filename)); - stream->pMap = EngMapFile(stream->wFileName, MAP_FILE_SIZE, &stream->iFile); - if(!stream->pMap) + if(!trace_stream_map(stream)) goto error2; return stream; @@ -84,25 +125,38 @@ error1: } +static INLINE void +trace_stream_copy(struct trace_stream *stream, const char *data, size_t size) +{ + assert(stream->written + size <= MAP_FILE_SIZE); + memcpy(stream->pMap + stream->written, data, size); + stream->written += size; +} + + boolean trace_stream_write(struct trace_stream *stream, const void *data, size_t size) { - boolean ret; - if(!stream) return FALSE; - if(stream->written + size > MAP_FILE_SIZE) { - ret = FALSE; - size = MAP_FILE_SIZE - stream->written; - } - else - ret = TRUE; + if(!stream->pMap) + return FALSE; - memcpy(stream->pMap + stream->written, data, size); - stream->written += size; + while(stream->written + size > MAP_FILE_SIZE) { + size_t step = MAP_FILE_SIZE - stream->written; + trace_stream_copy(stream, data, step); + data = (const char *)data + step; + size -= step; + + trace_stream_unmap(stream); + if(!trace_stream_map(stream)) + return FALSE; + } + + trace_stream_copy(stream, data, size); - return ret; + return TRUE; } @@ -119,13 +173,7 @@ trace_stream_close(struct trace_stream *stream) if(!stream) return; - EngUnmapFile(stream->iFile); - if(stream->written < MAP_FILE_SIZE) { - /* Truncate file size */ - stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); - if(stream->pMap) - EngUnmapFile(stream->iFile); - } + trace_stream_unmap(stream); FREE(stream); } -- cgit v1.2.3 From 34d12c1787116c254e528dd981810b7b78b7a2ee Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 21 Aug 2008 13:57:59 +0100 Subject: trace: Hack to detect writes to user buffers. It often happens that new data is written directly to the user buffers without mapping/unmapping. This hack marks user buffers and dumps them before passing them to pipe context. --- src/gallium/drivers/trace/tr_context.c | 11 ++++++++++ src/gallium/drivers/trace/tr_winsys.c | 40 ++++++++++++++++++++++++++++++++++ src/gallium/drivers/trace/tr_winsys.h | 13 +++++++++++ 3 files changed, 64 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 529bed3c6bb..f16359e8ad6 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -32,6 +32,7 @@ #include "tr_state.h" #include "tr_screen.h" #include "tr_texture.h" +#include "tr_winsys.h" #include "tr_context.h" @@ -131,6 +132,8 @@ trace_context_draw_elements(struct pipe_context *_pipe, struct pipe_context *pipe = tr_ctx->pipe; boolean result; + trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer); + trace_dump_call_begin("pipe_context", "draw_elements"); trace_dump_arg(ptr, pipe); @@ -164,6 +167,8 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, struct pipe_context *pipe = tr_ctx->pipe; boolean result; + trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer); + trace_dump_call_begin("pipe_context", "draw_range_elements"); trace_dump_arg(ptr, pipe); @@ -691,6 +696,8 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; + trace_winsys_user_buffer_update(_pipe->winsys, (struct pipe_buffer *)buffer); + trace_dump_call_begin("pipe_context", "set_constant_buffer"); trace_dump_arg(ptr, pipe); @@ -820,6 +827,10 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; + unsigned i; + + for(i = 0; i < num_buffers; ++i) + trace_winsys_user_buffer_update(_pipe->winsys, buffers[i].buffer); trace_dump_call_begin("pipe_context", "set_vertex_buffers"); diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 120006ea9f0..2c7a6f893b0 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -242,10 +242,50 @@ trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, trace_dump_call_end(); + /* XXX: Mark the user buffers. (we should wrap pipe_buffers, but is is + * impossible to do so while texture-less surfaces are still around */ + if(result) { + assert(!(result->usage & TRACE_BUFFER_USAGE_USER)); + result->usage |= TRACE_BUFFER_USAGE_USER; + } + return result; } +void +trace_winsys_user_buffer_update(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + const void *map; + + if(buffer && buffer->usage & TRACE_BUFFER_USAGE_USER) { + map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(map) { + trace_dump_call_begin("pipe_winsys", "buffer_write"); + + trace_dump_arg(ptr, winsys); + + trace_dump_arg(ptr, buffer); + + trace_dump_arg_begin("data"); + trace_dump_bytes(map, buffer->size); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(buffer->size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + winsys->buffer_unmap(winsys, buffer); + } + } +} + + static void * trace_winsys_buffer_map(struct pipe_winsys *_winsys, struct pipe_buffer *buffer, diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h index 2218117347c..062ddf66a00 100644 --- a/src/gallium/drivers/trace/tr_winsys.h +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -34,6 +34,14 @@ #include "pipe/p_winsys.h" +/** + * It often happens that new data is written directly to the user buffers + * without mapping/unmapping. This flag marks user buffers, so that their + * contents can be dumpped before being used by the pipe context. + */ +#define TRACE_BUFFER_USAGE_USER (1 << 31) + + struct hash_table; @@ -60,4 +68,9 @@ struct pipe_winsys * trace_winsys_create(struct pipe_winsys *winsys); +void +trace_winsys_user_buffer_update(struct pipe_winsys *winsys, + struct pipe_buffer *buffer); + + #endif /* TR_WINSYS_H_ */ -- cgit v1.2.3 From 152d00d199f2c8c46a96be450cfd017b4798b4d1 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 22 Aug 2008 03:13:27 +0100 Subject: trace: Use a 4 hexadecimal digit suffix. --- src/gallium/drivers/trace/tr_stream_wd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c index c375f50b951..45309dcb4e6 100644 --- a/src/gallium/drivers/trace/tr_stream_wd.c +++ b/src/gallium/drivers/trace/tr_stream_wd.c @@ -65,7 +65,7 @@ trace_stream_map(struct trace_stream *stream) filename_len = util_snprintf(filename, sizeof(filename), - "\\??\\%s.%03u", + "\\??\\%s.%04x", stream->filename, stream->suffix++); -- cgit v1.2.3 From 807a7487ff6f0af60240ce467bd1ac160b0b054e Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 22 Aug 2008 03:14:24 +0100 Subject: trace: Don't trace texture/surfaces releases, only destructions. --- src/gallium/drivers/trace/tr_screen.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index cca8597a87e..a6467ec35fd 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -224,19 +224,20 @@ trace_screen_texture_release(struct pipe_screen *_screen, else texture = NULL; - trace_dump_call_begin("pipe_screen", "texture_release"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, texture); - if (*ptexture) { - if (!--(*ptexture)->refcount) + if (!--(*ptexture)->refcount) { + trace_dump_call_begin("pipe_screen", "texture_destroy"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, texture); + trace_texture_destroy(tr_scr, *ptexture); + + trace_dump_call_end(); + } *ptexture = NULL; } - - trace_dump_call_end(); } @@ -297,19 +298,20 @@ trace_screen_tex_surface_release(struct pipe_screen *_screen, else surface = NULL; - trace_dump_call_begin("pipe_screen", "tex_surface_release"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, surface); - if (*psurface) { - if (!--(*psurface)->refcount) + if (!--(*psurface)->refcount) { + trace_dump_call_begin("pipe_screen", "tex_surface_destroy"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, surface); + trace_surface_destroy(tr_tex, *psurface); + + trace_dump_call_end(); + } *psurface = NULL; } - - trace_dump_call_end(); } -- cgit v1.2.3 From 1a46dcc8a927dfb38ca1381e7b3dafb789f8257c Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 22 Aug 2008 15:25:21 -0600 Subject: gallium: replace LOG2() macro with util_fast_log2() inline func --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 8 ++++---- src/gallium/drivers/softpipe/sp_context.c | 3 +++ src/gallium/drivers/softpipe/sp_tex_sample.c | 3 ++- src/gallium/include/pipe/p_util.h | 15 --------------- 4 files changed, 9 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index e3906070237..00ed4da4507 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -713,10 +713,10 @@ lg24f( { const unsigned X = 0; - store[X + 0] = LOG2( store[X + 0] ); - store[X + 1] = LOG2( store[X + 1] ); - store[X + 2] = LOG2( store[X + 2] ); - store[X + 3] = LOG2( store[X + 3] ); + store[X + 0] = util_fast_log2( store[X + 0] ); + store[X + 1] = util_fast_log2( store[X + 1] ); + store[X + 2] = util_fast_log2( store[X + 2] ); + store[X + 3] = util_fast_log2( store[X + 3] ); } static void diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 626c3a9d4e1..9b1313bc83e 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" +#include "util/u_math.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" @@ -128,6 +129,8 @@ softpipe_create( struct pipe_screen *screen, struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); uint i; + util_init_math(); + #ifdef PIPE_ARCH_X86 softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); #else diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 01f4d0ca811..d7680ffa81d 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -41,6 +41,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" +#include "util/u_math.h" /* @@ -499,7 +500,7 @@ compute_lambda(struct tgsi_sampler *sampler, rho = MAX2(rho, max); } - lambda = LOG2(rho); + lambda = util_fast_log2(rho); lambda += lodbias + sampler->state->lod_bias; lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 473a8d94abb..660192b28e7 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -443,21 +443,6 @@ static INLINE int iround(float f) #define FABSF(x) ((float) fabs(x)) #endif -/* Pretty fast, and accurate. - * Based on code from http://www.flipcode.com/totd/ - */ -static INLINE float LOG2(float val) -{ - union fi num; - int log_2; - - num.f = val; - log_2 = ((num.i >> 23) & 255) - 128; - num.i &= ~(255 << 23); - num.i += 127 << 23; - num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3; - return num.f + log_2; -} #if defined(__GNUC__) #define CEILF(x) ceilf(x) -- cgit v1.2.3 From 9935e3b7303da656e258d4bd5bc799ffbfbc737b Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 22 Aug 2008 15:51:02 -0600 Subject: gallium: stop using ifloor(), FABSF(), etc --- src/gallium/drivers/softpipe/sp_setup.c | 14 +++--- src/gallium/drivers/softpipe/sp_tex_sample.c | 72 ++++++++++++++-------------- 2 files changed, 44 insertions(+), 42 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index dd0562e7efe..c8c55fa6e85 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -44,6 +44,8 @@ #include "draw/draw_vertex.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" + #define DEBUG_VERTS 0 #define DEBUG_FRAGS 0 @@ -611,18 +613,18 @@ static void setup_tri_edges( struct setup_context *setup ) float vmid_y = setup->vmid[0][1] - 0.5f; float vmax_y = setup->vmax[0][1] - 0.5f; - setup->emaj.sy = CEILF(vmin_y); - setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); + setup->emaj.sy = ceilf(vmin_y); + setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - setup->etop.sy = CEILF(vmid_y); - setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); + setup->etop.sy = ceilf(vmid_y); + setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); setup->etop.dxdy = setup->etop.dx / setup->etop.dy; setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - setup->ebot.sy = CEILF(vmin_y); - setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); + setup->ebot.sy = ceilf(vmin_y); + setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index d7680ffa81d..58a95d13e13 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -51,7 +51,7 @@ * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). */ -#define FRAC(f) ((f) - ifloor(f)) +#define FRAC(f) ((f) - util_ifloor(f)) /** @@ -100,7 +100,7 @@ nearest_texcoord(unsigned wrapMode, float s, unsigned size) case PIPE_TEX_WRAP_REPEAT: /* s limited to [0,1) */ /* i limited to [0,size-1] */ - i = ifloor(s * size); + i = util_ifloor(s * size); i = REMAINDER(i, size); return i; case PIPE_TEX_WRAP_CLAMP: @@ -111,7 +111,7 @@ nearest_texcoord(unsigned wrapMode, float s, unsigned size) else if (s >= 1.0F) i = size - 1; else - i = ifloor(s * size); + i = util_ifloor(s * size); return i; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: { @@ -124,7 +124,7 @@ nearest_texcoord(unsigned wrapMode, float s, unsigned size) else if (s > max) i = size - 1; else - i = ifloor(s * size); + i = util_ifloor(s * size); } return i; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: @@ -138,14 +138,14 @@ nearest_texcoord(unsigned wrapMode, float s, unsigned size) else if (s >= max) i = size; else - i = ifloor(s * size); + i = util_ifloor(s * size); } return i; case PIPE_TEX_WRAP_MIRROR_REPEAT: { const float min = 1.0F / (2.0F * size); const float max = 1.0F - min; - const int flr = ifloor(s); + const int flr = util_ifloor(s); float u; if (flr & 1) u = 1.0F - (s - (float) flr); @@ -156,20 +156,20 @@ nearest_texcoord(unsigned wrapMode, float s, unsigned size) else if (u > max) i = size - 1; else - i = ifloor(u * size); + i = util_ifloor(u * size); } return i; case PIPE_TEX_WRAP_MIRROR_CLAMP: { /* s limited to [0,1] */ /* i limited to [0,size-1] */ - const float u = FABSF(s); + const float u = fabsf(s); if (u <= 0.0F) i = 0; else if (u >= 1.0F) i = size - 1; else - i = ifloor(u * size); + i = util_ifloor(u * size); } return i; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: @@ -178,13 +178,13 @@ nearest_texcoord(unsigned wrapMode, float s, unsigned size) /* i limited to [0, size-1] */ const float min = 1.0F / (2.0F * size); const float max = 1.0F - min; - const float u = FABSF(s); + const float u = fabsf(s); if (u < min) i = 0; else if (u > max) i = size - 1; else - i = ifloor(u * size); + i = util_ifloor(u * size); } return i; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: @@ -193,13 +193,13 @@ nearest_texcoord(unsigned wrapMode, float s, unsigned size) /* i limited to [0, size-1] */ const float min = -1.0F / (2.0F * size); const float max = 1.0F - min; - const float u = FABSF(s); + const float u = fabsf(s); if (u < min) i = -1; else if (u > max) i = size; else - i = ifloor(u * size); + i = util_ifloor(u * size); } return i; default: @@ -226,7 +226,7 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, switch (wrapMode) { case PIPE_TEX_WRAP_REPEAT: u = s * size - 0.5F; - *i0 = REMAINDER(ifloor(u), size); + *i0 = REMAINDER(util_ifloor(u), size); *i1 = REMAINDER(*i0 + 1, size); break; case PIPE_TEX_WRAP_CLAMP: @@ -237,7 +237,7 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, else u = s * size; u -= 0.5F; - *i0 = ifloor(u); + *i0 = util_ifloor(u); *i1 = *i0 + 1; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: @@ -248,7 +248,7 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, else u = s * size; u -= 0.5F; - *i0 = ifloor(u); + *i0 = util_ifloor(u); *i1 = *i0 + 1; if (*i0 < 0) *i0 = 0; @@ -266,19 +266,19 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, else u = s * size; u -= 0.5F; - *i0 = ifloor(u); + *i0 = util_ifloor(u); *i1 = *i0 + 1; } break; case PIPE_TEX_WRAP_MIRROR_REPEAT: { - const int flr = ifloor(s); + const int flr = util_ifloor(s); if (flr & 1) u = 1.0F - (s - (float) flr); else u = s - (float) flr; u = (u * size) - 0.5F; - *i0 = ifloor(u); + *i0 = util_ifloor(u); *i1 = *i0 + 1; if (*i0 < 0) *i0 = 0; @@ -287,23 +287,23 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, } break; case PIPE_TEX_WRAP_MIRROR_CLAMP: - u = FABSF(s); + u = fabsf(s); if (u >= 1.0F) u = (float) size; else u *= size; u -= 0.5F; - *i0 = ifloor(u); + *i0 = util_ifloor(u); *i1 = *i0 + 1; break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - u = FABSF(s); + u = fabsf(s); if (u >= 1.0F) u = (float) size; else u *= size; u -= 0.5F; - *i0 = ifloor(u); + *i0 = util_ifloor(u); *i1 = *i0 + 1; if (*i0 < 0) *i0 = 0; @@ -314,7 +314,7 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, { const float min = -1.0F / (2.0F * size); const float max = 1.0F - min; - u = FABSF(s); + u = fabsf(s); if (u <= min) u = min * size; else if (u >= max) @@ -322,7 +322,7 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, else u *= size; u -= 0.5F; - *i0 = ifloor(u); + *i0 = util_ifloor(u); *i1 = *i0 + 1; } break; @@ -343,12 +343,12 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) int i; switch (wrapMode) { case PIPE_TEX_WRAP_CLAMP: - i = ifloor(s); + i = util_ifloor(s); return CLAMP(i, 0, (int) size-1); case PIPE_TEX_WRAP_CLAMP_TO_EDGE: /* fall-through */ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) ); + return util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) ); default: assert(0); return 0; @@ -368,7 +368,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, case PIPE_TEX_WRAP_CLAMP: /* Not exactly what the spec says, but it matches NVIDIA output */ s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f); - *i0 = ifloor(s); + *i0 = util_ifloor(s); *i1 = *i0 + 1; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: @@ -376,7 +376,7 @@ linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, case PIPE_TEX_WRAP_CLAMP_TO_BORDER: s = CLAMP(s, 0.5F, (float) size - 0.5F); s -= 0.5F; - *i0 = ifloor(s); + *i0 = util_ifloor(s); *i1 = *i0 + 1; if (*i1 > (int) size - 1) *i1 = size - 1; @@ -402,7 +402,7 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz */ - const float arx = FABSF(rx), ary = FABSF(ry), arz = FABSF(rz); + const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); unsigned face; float sc, tc, ma; @@ -477,16 +477,16 @@ compute_lambda(struct tgsi_sampler *sampler, { float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = FABSF(dsdx); - dsdy = FABSF(dsdy); + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); rho = MAX2(dsdx, dsdy) * sampler->texture->width[0]; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; float max; - dtdx = FABSF(dtdx); - dtdy = FABSF(dtdy); + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); max = MAX2(dtdx, dtdy) * sampler->texture->height[0]; rho = MAX2(rho, max); } @@ -494,8 +494,8 @@ compute_lambda(struct tgsi_sampler *sampler, float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; float max; - dpdx = FABSF(dpdx); - dpdy = FABSF(dpdy); + dpdx = fabsf(dpdx); + dpdy = fabsf(dpdy); max = MAX2(dpdx, dpdy) * sampler->texture->depth[0]; rho = MAX2(rho, max); } -- cgit v1.2.3 From a13475ff0057f1de8e3bc08d6ca42b9e135a3f5a Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 22 Aug 2008 16:09:37 -0600 Subject: gallium: replace align_int() with align() The two functions are identical. Removed align_int() from p_util.h --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 6 +++--- src/gallium/drivers/i915simple/i915_texture.c | 8 ++++---- src/gallium/drivers/i965simple/brw_tex_layout.c | 8 ++++---- src/gallium/include/pipe/p_util.h | 11 +++-------- 4 files changed, 14 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 0aec4b71baf..be3535ed9e1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -119,7 +119,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; - unsigned alloc_count = align_int( fetch_count, 4 ); + unsigned alloc_count = align( fetch_count, 4 ); struct vertex_header *pipeline_verts = (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); @@ -195,7 +195,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; - unsigned alloc_count = align_int( count, 4 ); + unsigned alloc_count = align( count, 4 ); struct vertex_header *pipeline_verts = (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); @@ -271,7 +271,7 @@ static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; - unsigned alloc_count = align_int( count, 4 ); + unsigned alloc_count = align( count, 4 ); struct vertex_header *pipeline_verts = (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index cf4964b26b3..ca0fb8761bf 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -220,7 +220,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) */ if (pt->last_level > 0) { unsigned mip1_nblocksx - = align_int(pf_get_nblocksx(&pt->block, minify(width)), align_x) + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + pf_get_nblocksx(&pt->block, minify(minify(width))); if (mip1_nblocksx > nblocksx) @@ -229,14 +229,14 @@ i945_miptree_layout_2d( struct i915_texture *tex ) /* Pitch must be a whole number of dwords */ - tex->stride = align_int(tex->stride, 64); + tex->stride = align(tex->stride, 64); tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 1, width, height, 1); i915_miptree_set_image_offset(tex, level, 0, x, y); - nblocksy = align_int(nblocksy, align_y); + nblocksy = align(nblocksy, align_y); /* Because the images are packed better, the final offset * might not be the maximal one: @@ -246,7 +246,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) /* Layout_below: step right after second mipmap level. */ if (level == 1) { - x += align_int(nblocksx, align_x); + x += align(nblocksx, align_x); } else { y += nblocksy; diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 8c7725605be..9b6cf817238 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -144,7 +144,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) */ if (pt->last_level > 0) { unsigned mip1_nblocksx - = align_int(pf_get_nblocksx(&pt->block, minify(width)), align_x) + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + pf_get_nblocksx(&pt->block, minify(minify(width))); if (mip1_nblocksx > nblocksx) @@ -153,14 +153,14 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Pitch must be a whole number of dwords */ - tex->stride = align_int(tex->stride, 64); + tex->stride = align(tex->stride, 64); tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { intel_miptree_set_level_info(tex, level, 1, x, y, width, height, 1); - nblocksy = align_int(nblocksy, align_y); + nblocksy = align(nblocksy, align_y); /* Because the images are packed better, the final offset * might not be the maximal one: @@ -170,7 +170,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Layout_below: step right after second mipmap level. */ if (level == 1) { - x += align_int(nblocksx, align_x); + x += align(nblocksx, align_x); } else { y += nblocksy; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 8f5cb4ddc5b..cac0039e123 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -289,13 +289,14 @@ align16( void *unaligned ) } -static INLINE int align_int(int x, int align) +static INLINE int align(int value, int alignment) { - return (x + align - 1) & ~(align - 1); + return (value + alignment - 1) & ~(alignment - 1); } + #if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) static INLINE unsigned ffs( unsigned u ) { @@ -399,12 +400,6 @@ do { \ } while (0) -static INLINE int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - - /* util/p_util.c */ extern void pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, -- cgit v1.2.3 From fe1e39afbb147deab60ecc932c24f921b46f1364 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 22 Aug 2008 16:19:22 -0600 Subject: gallium: move pipe_copy_rect(), pipe_fill_rect() protos into new u_rect.h header --- src/gallium/auxiliary/util/p_util.c | 1 + src/gallium/auxiliary/util/u_rect.h | 54 +++++++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_surface.c | 1 + src/gallium/drivers/i915simple/i915_surface.c | 1 + src/gallium/drivers/i965simple/brw_surface.c | 1 + src/gallium/drivers/softpipe/sp_surface.c | 1 + src/gallium/include/pipe/p_util.h | 13 ------- src/mesa/state_tracker/st_texture.c | 1 + 8 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_rect.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/p_util.c b/src/gallium/auxiliary/util/p_util.c index 271be4edf14..787881b1923 100644 --- a/src/gallium/auxiliary/util/p_util.c +++ b/src/gallium/auxiliary/util/p_util.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_format.h" +#include "util/u_rect.h" /** diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h new file mode 100644 index 00000000000..fba48088641 --- /dev/null +++ b/src/gallium/auxiliary/util/u_rect.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Pipe copy/fill rect helpers. + */ + + +#ifndef U_RECT_H +#define U_RECT_H + + +#include "pipe/p_format.h" + + +extern void +pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, const ubyte * src, + int src_stride, unsigned src_x, int src_y); + +extern void +pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, uint32_t value); + + + +#endif /* U_RECT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 5549eb496d8..01ffa31c2c2 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -30,6 +30,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" +#include "util/u_rect.h" #include "cell_context.h" #include "cell_surface.h" diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 4430e816263..17b5125e569 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -34,6 +34,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" +#include "util/u_rect.h" /* Assumes all values are within bounds -- no checking at this level - diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 0be3dfc7438..69da2522859 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -33,6 +33,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" +#include "util/u_rect.h" diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 4f1bb881cb5..bfbae234f1d 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -30,6 +30,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" +#include "util/u_rect.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index cac0039e123..4a3fca59621 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -31,7 +31,6 @@ #include "p_config.h" #include "p_compiler.h" #include "p_debug.h" -#include "p_format.h" #include "p_pointer.h" #if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) @@ -400,18 +399,6 @@ do { \ } while (0) -/* util/p_util.c - */ -extern void pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, - unsigned dst_stride, unsigned dst_x, unsigned dst_y, - unsigned width, unsigned height, const ubyte * src, - int src_stride, unsigned src_x, int src_y); - -extern void -pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block, - unsigned dst_stride, unsigned dst_x, unsigned dst_y, - unsigned width, unsigned height, uint32_t value); - #if defined(_MSC_VER) #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 289b78b38b3..63046a0ecce 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -38,6 +38,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_rect.h" #define DBG if(0) printf -- cgit v1.2.3 From ec7415642d7eb192164e7a513198b86756de484c Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Sat, 23 Aug 2008 12:31:50 +0200 Subject: trace: Include u_string.h. --- src/gallium/drivers/trace/tr_stream_wd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c index 45309dcb4e6..b3b65f09716 100644 --- a/src/gallium/drivers/trace/tr_stream_wd.c +++ b/src/gallium/drivers/trace/tr_stream_wd.c @@ -38,6 +38,7 @@ #include <winddi.h> #include "pipe/p_util.h" +#include "util/u_string.h" #include "tr_stream.h" -- cgit v1.2.3 From 4f25420bdd834e81a3e22733304efc5261c2998a Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Sun, 24 Aug 2008 17:48:55 -0600 Subject: gallium: refactor/replace p_util.h with util/u_memory.h and util/u_math.h Also, rename p_tile.[ch] to u_tile.[ch] --- src/gallium/README.portability | 4 +- src/gallium/auxiliary/cso_cache/cso_cache.c | 3 +- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- src/gallium/auxiliary/cso_cache/cso_hash.c | 2 +- src/gallium/auxiliary/draw/draw_context.c | 3 +- src/gallium/auxiliary/draw/draw_pipe.c | 1 - src/gallium/auxiliary/draw/draw_pipe_aaline.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_clip.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_cull.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_offset.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 6 +- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_util.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_validate.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 3 +- src/gallium/auxiliary/draw/draw_pt.c | 1 - src/gallium/auxiliary/draw/draw_pt_emit.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 3 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 +- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 2 +- src/gallium/auxiliary/draw/draw_pt_util.c | 1 - src/gallium/auxiliary/draw/draw_pt_varray.c | 4 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 +- src/gallium/auxiliary/draw/draw_vbuf.h | 2 - src/gallium/auxiliary/draw/draw_vs.c | 6 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 4 +- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 3 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 3 +- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 - src/gallium/auxiliary/draw/draw_vs_sse.c | 3 +- src/gallium/auxiliary/draw/draw_vs_varient.c | 3 +- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 3 +- src/gallium/auxiliary/gallivm/instructions.cpp | 2 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 2 +- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 2 +- .../auxiliary/pipebuffer/pb_buffer_malloc.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 2 +- .../auxiliary/pipebuffer/pb_bufmgr_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_validate.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 2 +- src/gallium/auxiliary/sct/sct.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_build.c | 1 - src/gallium/auxiliary/tgsi/tgsi_build.h | 4 + src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 1 - src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_parse.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_scan.c | 6 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_transform.c | 1 + src/gallium/auxiliary/tgsi/tgsi_transform.h | 1 - src/gallium/auxiliary/tgsi/tgsi_util.c | 1 - src/gallium/auxiliary/translate/translate.c | 1 - src/gallium/auxiliary/translate/translate_cache.c | 2 +- .../auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 2 +- src/gallium/auxiliary/util/Makefile | 2 +- src/gallium/auxiliary/util/SConscript | 2 +- src/gallium/auxiliary/util/p_debug.c | 1 - src/gallium/auxiliary/util/u_blit.c | 5 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- src/gallium/auxiliary/util/u_handle_table.c | 4 +- src/gallium/auxiliary/util/u_hash_table.c | 5 +- src/gallium/auxiliary/util/u_math.h | 240 +++- src/gallium/auxiliary/util/u_memory.h | 222 ++++ src/gallium/auxiliary/util/u_mm.c | 2 +- src/gallium/auxiliary/util/u_pack_color.h | 36 +- src/gallium/auxiliary/util/u_pointer.h | 107 ++ src/gallium/auxiliary/util/u_rect.c | 1 - src/gallium/auxiliary/util/u_simple_shaders.c | 2 +- src/gallium/auxiliary/util/u_tile.c | 1169 ++++++++++++++++++++ src/gallium/auxiliary/util/u_tile.h | 101 ++ src/gallium/drivers/cell/common.h | 1 - src/gallium/drivers/cell/ppu/cell_clear.c | 2 +- src/gallium/drivers/cell/ppu/cell_context.c | 2 +- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- src/gallium/drivers/cell/ppu/cell_render.c | 2 +- src/gallium/drivers/cell/ppu/cell_screen.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_derived.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 2 +- src/gallium/drivers/cell/ppu/cell_winsys.c | 2 +- src/gallium/drivers/cell/spu/spu_exec.c | 1 - src/gallium/drivers/cell/spu/spu_tri.c | 1 - src/gallium/drivers/cell/spu/spu_util.c | 1 - src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 1 - src/gallium/drivers/cell/spu/spu_vertex_shader.c | 1 - src/gallium/drivers/failover/fo_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_debug_fp.c | 2 +- src/gallium/drivers/i915simple/i915_fpc.h | 1 - .../drivers/i915simple/i915_fpc_translate.c | 2 + src/gallium/drivers/i915simple/i915_prim_emit.c | 4 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 3 +- src/gallium/drivers/i915simple/i915_screen.c | 2 +- src/gallium/drivers/i915simple/i915_state.c | 3 +- .../drivers/i915simple/i915_state_derived.c | 2 +- .../drivers/i915simple/i915_state_dynamic.c | 4 +- .../drivers/i915simple/i915_state_immediate.c | 2 +- .../drivers/i915simple/i915_state_sampler.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 3 +- src/gallium/drivers/i915simple/i915_texture.c | 3 +- src/gallium/drivers/i965simple/brw_cc.c | 6 +- src/gallium/drivers/i965simple/brw_clip_state.c | 3 +- src/gallium/drivers/i965simple/brw_context.c | 2 +- src/gallium/drivers/i965simple/brw_curbe.c | 3 +- src/gallium/drivers/i965simple/brw_draw_upload.c | 1 + src/gallium/drivers/i965simple/brw_gs_state.c | 3 +- src/gallium/drivers/i965simple/brw_screen.c | 2 +- src/gallium/drivers/i965simple/brw_sf_state.c | 5 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_state_batch.c | 2 +- src/gallium/drivers/i965simple/brw_state_cache.c | 2 +- src/gallium/drivers/i965simple/brw_state_pool.c | 3 +- src/gallium/drivers/i965simple/brw_state_upload.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 3 +- src/gallium/drivers/i965simple/brw_tex_layout.c | 8 +- src/gallium/drivers/i965simple/brw_vs_state.c | 3 +- src/gallium/drivers/i965simple/brw_wm.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 3 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 3 +- .../drivers/i965simple/brw_wm_sampler_state.c | 3 +- src/gallium/drivers/i965simple/brw_wm_state.c | 3 +- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_fs_exec.c | 2 +- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 2 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 1 + src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 29 +- src/gallium/drivers/softpipe/sp_quad_bufloop.c | 2 +- src/gallium/drivers/softpipe/sp_quad_colormask.c | 3 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 2 +- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_earlyz.c | 2 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 3 +- src/gallium/drivers/softpipe/sp_quad_occlusion.c | 2 +- src/gallium/drivers/softpipe/sp_quad_output.c | 2 +- src/gallium/drivers/softpipe/sp_quad_stencil.c | 2 +- src/gallium/drivers/softpipe/sp_quad_stipple.c | 2 +- src/gallium/drivers/softpipe/sp_query.c | 2 +- src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/softpipe/sp_setup.c | 2 +- src/gallium/drivers/softpipe/sp_state_blend.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 3 +- src/gallium/drivers/softpipe/sp_state_fs.c | 2 +- src/gallium/drivers/softpipe/sp_state_rasterizer.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 3 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/drivers/softpipe/sp_texture.c | 3 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 4 +- src/gallium/drivers/trace/tr_context.c | 2 +- src/gallium/drivers/trace/tr_dump.c | 2 + src/gallium/drivers/trace/tr_dump.h | 1 - src/gallium/drivers/trace/tr_screen.c | 2 +- src/gallium/drivers/trace/tr_state.c | 1 + src/gallium/drivers/trace/tr_stream_stdc.c | 2 +- src/gallium/drivers/trace/tr_stream_wd.c | 2 +- src/gallium/drivers/trace/tr_texture.c | 2 +- src/gallium/drivers/trace/tr_winsys.c | 3 +- src/gallium/include/pipe/p_util.h | 460 -------- src/gallium/state_trackers/python/gallium.i | 2 +- src/gallium/state_trackers/python/st_device.c | 3 +- src/gallium/state_trackers/python/st_sample.c | 5 +- .../state_trackers/python/st_softpipe_winsys.c | 3 +- .../winsys/drm/intel/common/intel_be_device.c | 2 +- .../winsys/drm/intel/dri/intel_winsys_softpipe.c | 2 +- src/gallium/winsys/egl_xlib/egl_xlib.c | 2 +- src/gallium/winsys/egl_xlib/sw_winsys.c | 3 +- src/gallium/winsys/gdi/wmesa.c | 2 +- src/gallium/winsys/xlib/brw_aub.c | 1 - src/gallium/winsys/xlib/xm_winsys.c | 3 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/state_tracker/acc2.c | 319 ++++++ src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_bitmap.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 2 +- src/mesa/state_tracker/st_program.c | 2 +- src/mesa/state_tracker/st_texture.c | 1 - 201 files changed, 2453 insertions(+), 686 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_memory.h create mode 100644 src/gallium/auxiliary/util/u_pointer.h create mode 100644 src/gallium/auxiliary/util/u_tile.c create mode 100644 src/gallium/auxiliary/util/u_tile.h delete mode 100644 src/gallium/include/pipe/p_util.h create mode 100644 src/mesa/state_tracker/acc2.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/README.portability b/src/gallium/README.portability index d5d5987a7ff..adecf4bb798 100644 --- a/src/gallium/README.portability +++ b/src/gallium/README.portability @@ -35,8 +35,8 @@ not available in Windows Kernel Mode. Use the appropriate p_*.h include. * Use MALLOC, CALLOC, FREE instead of the malloc, calloc, free functions. -* Use align_pointer() function defined in p_util.h for aligning pointers in a -portable way. +* Use align_pointer() function defined in u_memory.h for aligning pointers + in a portable way. == Debugging == diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 36dc46ff809..6b1754ea002 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -28,9 +28,10 @@ /* Authors: Zack Rusin <zack@tungstengraphics.com> */ -#include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_memory.h" + #include "cso_cache.h" #include "cso_hash.h" diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 86e4d46a200..f22ba40824d 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -36,7 +36,7 @@ */ #include "pipe/p_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 0646efd9527..7f0044c5a7f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -31,7 +31,7 @@ */ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cso_hash.h" diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2f263cf06a9..1c26cb31a39 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -31,7 +31,8 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw_context.h" #include "draw_vbuf.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 1db43876efb..3cde9d36d3a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -30,7 +30,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" #include "draw/draw_private.h" #include "draw/draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 991304b2c84..20841bb5d69 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -32,11 +32,12 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index c7f4349cb3e..2c1cacbdb47 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -38,7 +38,6 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" @@ -47,6 +46,9 @@ #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "draw_context.h" #include "draw_vs.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index fa10f8efca2..3265dcd154a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -32,7 +32,9 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" + #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index d0d22a38e07..053be5f050d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -33,7 +33,7 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 4741b22d023..43d1fecc4dd 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -28,7 +28,9 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "pipe/p_shader_tokens.h" #include "draw_vs.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 2f5865741c1..1fea5e6dcbc 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -32,7 +32,8 @@ * \author Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index e97136fa1f8..b764d9c518c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -34,12 +34,14 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index bf0db18a684..b65e2aa1021 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -36,10 +36,12 @@ */ -#include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_pipe.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "draw/draw_pipe.h" /** Subclass of draw_stage */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 3ac825f5656..c329d923390 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -28,7 +28,8 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index 8f97fdedaac..68835fd1a59 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -33,7 +33,7 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c index 04438f4dd08..e22e5fed0c6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_util.c +++ b/src/gallium/auxiliary/draw/draw_pipe_util.c @@ -30,7 +30,7 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_private.h" #include "draw/draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index 6be1d369c33..f34c68728ef 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -28,7 +28,7 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index a6fde77a0ed..c0cf4269dbb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -35,7 +35,8 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_vbuf.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 48ec2f1239c..184e363594d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -28,9 +28,10 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 54590984c6b..4f1326053df 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -28,7 +28,8 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 85a75525c8b..669c11c993c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -30,7 +30,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 40f05cb9e0b..d4eca80588b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 07f4c991642..6377f896fb0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 4a1f3b09536..0684c93d102 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -30,7 +30,7 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index fdf9b6fe6a8..87094f30924 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -31,7 +31,8 @@ */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index be3535ed9e1..f617aac9f79 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -25,7 +25,8 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index af6306b1c67..96dc706b998 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_context.h" #include "draw/draw_context.h" #include "draw/draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index 32c8a9632cb..3bc7939c556 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -30,7 +30,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 46e722a154e..c15afe65f1a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -25,7 +25,9 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index cda2987c9ed..b8b5de729d2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -30,7 +30,7 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index e90f37872a1..62247ccd9f9 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -37,8 +37,6 @@ #define DRAW_VBUF_H_ -#include "pipe/p_util.h" - struct draw_context; struct vertex_info; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index f798b204929..34adbd49b00 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -31,11 +31,15 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "pipe/p_shader_tokens.h" + #include "draw_private.h" #include "draw_context.h" #include "draw_vs.h" + #include "translate/translate.h" #include "translate/translate_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 41bdd012d56..760fcb389fc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -29,9 +29,9 @@ */ -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_math.h" +#include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index eda677cc62c..ab3c5b94a50 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index e029b7b4bb5..b358bd2df47 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -29,8 +29,9 @@ #include "pipe/p_config.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index e26903d8cc5..44563803f90 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -31,7 +31,8 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index fc03473b919..2ce30b9a02b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -32,7 +32,6 @@ * Brian Paul */ -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" #include "draw_context.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 61f0c084c38..0efabd9de8b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -31,13 +31,14 @@ * Brian Paul */ +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_config.h" #include "draw_vs.h" #if defined(PIPE_ARCH_X86) -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 994ce3e8896..4daf05dae7c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -30,7 +30,8 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index cf5b9788375..e64bfb1c6cb 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -41,11 +41,12 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" +#include "util/u_memory.h" + #include <llvm/Module.h> #include <llvm/CallingConv.h> #include <llvm/Constants.h> diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 035224e8f3c..a82dc30306d 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -35,7 +35,7 @@ #include "storage.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include <llvm/CallingConv.h> #include <llvm/Constants.h> diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 76049ade7c0..efddc04e818 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -29,7 +29,7 @@ #include "storagesoa.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include <llvm/CallingConv.h> #include <llvm/Constants.h> diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index ce41418a0f9..8ae052e8750 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -45,7 +45,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index e90d2e56231..20fc87b39d2 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -35,7 +35,7 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c index 0d2d6c0c1b2..2afaeafa1a1 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index bed4bec4fe7..b914c2d0fea 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -38,7 +38,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index d02e3500fff..5e518370d0e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -37,7 +37,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 05efd8ce41a..8fc63ce648c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -35,7 +35,7 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index c51e582611d..b40eb6cc90c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -36,7 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_mm.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 95af08929a9..93d2cc96351 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -39,7 +39,7 @@ #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 598d9ce3105..af307e265a2 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -39,7 +39,7 @@ #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 362fd896f31..1e54fc39d44 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_debug.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index 978944091fd..28d137dbc43 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -35,7 +35,7 @@ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 300c1c2d9d7..dfa5c35ab6a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -33,7 +33,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "rtasm_execmem.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 7f6bf577b26..285ddc0e3f3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -30,7 +30,7 @@ */ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "rtasm_ppc_spe.h" #ifdef GALLIUM_CELL diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c index 5e4126e014d..49bb7ea92e2 100644 --- a/src/gallium/auxiliary/sct/sct.c +++ b/src/gallium/auxiliary/sct/sct.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" #include "sct.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 050b448fe7c..74614d36884 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 6ae7f324f85..7d6234746a2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -28,6 +28,10 @@ #ifndef TGSI_BUILD_H #define TGSI_BUILD_H + +struct tgsi_token; + + #if defined __cplusplus extern "C" { #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 1025866a252..be25cb45a0a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "util/u_string.h" #include "tgsi_dump_c.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index e28b56c842f..fb573fe1f0c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -52,11 +52,11 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_memory.h" #include "util/u_math.h" #define FAST_MATH 1 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index d16f0cdcad4..3757486ba9b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -26,10 +26,10 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" +#include "util/u_memory.h" void tgsi_full_token_init( diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 59bcf10b530..be4870a4983 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -33,11 +33,11 @@ */ -#include "tgsi_scan.h" -#include "tgsi/tgsi_parse.h" +#include "util/u_math.h" #include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" -#include "pipe/p_util.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 00ed4da4507..626724ad4e4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index 357f77b05a6..ea87da31e50 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -31,6 +31,7 @@ * Authors: Brian Paul */ +#include "pipe/p_debug.h" #include "tgsi_transform.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h index 3da0b382711..a121adbaef4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -29,7 +29,6 @@ #define TGSI_TRANSFORM_H -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_build.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 09486e649e1..50101a9bb0c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index b93fbf9033e..7678903f75c 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -31,7 +31,6 @@ */ #include "pipe/p_config.h" -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "translate.h" diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c index 115dc9287e0..d8069a149cf 100644 --- a/src/gallium/auxiliary/translate/translate_cache.c +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "translate.h" #include "translate_cache.h" diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 4c8179ffa87..4d336f47ea3 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -30,7 +30,7 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "translate.h" diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 18a212ac1cf..7955186e168 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -28,7 +28,7 @@ #include "pipe/p_config.h" #include "pipe/p_compiler.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_simple_list.h" #include "translate.h" diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 6eebf6d29b6..6e5fd26c057 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -5,7 +5,6 @@ LIBNAME = util C_SOURCES = \ p_debug.c \ - p_tile.c \ u_blit.c \ u_draw_quad.c \ u_gen_mipmap.c \ @@ -16,6 +15,7 @@ C_SOURCES = \ u_rect.c \ u_simple_shaders.c \ u_snprintf.c \ + u_tile.c \ u_time.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 94382fe1f9e..ce3fad7068e 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,7 +6,6 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_debug_mem.c', 'p_debug_prof.c', - 'p_tile.c', 'u_blit.c', 'u_draw_quad.c', 'u_gen_mipmap.c', @@ -17,6 +16,7 @@ util = env.ConvenienceLibrary( 'u_rect.c', 'u_simple_shaders.c', 'u_snprintf.c', + 'u_tile.c', 'u_time.c', ]) diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 2c2f2f8931f..7d1dba5a247 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -51,7 +51,6 @@ #endif #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" #include "pipe/p_format.h" #include "pipe/p_state.h" diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index ae087df4cf7..05399f9885e 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -37,12 +37,13 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" -#include "util/u_draw_quad.h" #include "util/u_blit.h" +#include "util/u_draw_quad.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "cso_cache/cso_context.h" diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 8713ff5d584..c1e2c19f877 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -37,10 +37,10 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_draw_quad.h" #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 2176a009592..2c40011923d 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -35,9 +35,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "u_handle_table.h" +#include "util/u_memory.h" +#include "util/u_handle_table.h" #define HANDLE_TABLE_INITIAL_SIZE 16 diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index dd5eca7fca3..0bc8de9632c 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -40,10 +40,11 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "cso_cache/cso_hash.h" -#include "u_hash_table.h" + +#include "util/u_memory.h" +#include "util/u_hash_table.h" struct hash_table diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index a541d30a5d7..9b4ca393714 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -40,8 +40,6 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" -#include "util/u_math.h" #ifdef __cplusplus @@ -49,6 +47,132 @@ extern "C" { #endif +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +__inline double ceil(double val) +{ + double ceil_val; + + if((val - (long) val) == 0) { + ceil_val = val; + } + else { + if(val > 0) { + ceil_val = (long) val + 1; + } + else { + ceil_val = (long) val; + } + } + + return ceil_val; +} + +#ifndef PIPE_SUBSYSTEM_WINDOWS_CE +__inline double floor(double val) +{ + double floor_val; + + if((val - (long) val) == 0) { + floor_val = val; + } + else { + if(val > 0) { + floor_val = (long) val; + } + else { + floor_val = (long) val - 1; + } + } + + return floor_val; +} +#endif + +#pragma function(pow) +__inline double __cdecl pow(double val, double exponent) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(log) +__inline double __cdecl log(double val) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(atan2) +__inline double __cdecl atan2(double val) +{ + /* XXX */ + assert(0); + return 0; +} +#else +#include <math.h> +#include <stdarg.h> +#endif + + +#if defined(_MSC_VER) +#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +static INLINE float cosf( float f ) +{ + return (float) cos( (double) f ); +} + +static INLINE float sinf( float f ) +{ + return (float) sin( (double) f ); +} + +static INLINE float ceilf( float f ) +{ + return (float) ceil( (double) f ); +} + +static INLINE float floorf( float f ) +{ + return (float) floor( (double) f ); +} + +static INLINE float powf( float f, float g ) +{ + return (float) pow( (double) f, (double) g ); +} + +static INLINE float sqrtf( float f ) +{ + return (float) sqrt( (double) f ); +} + +static INLINE float fabsf( float f ) +{ + return (float) fabs( (double) f ); +} + +static INLINE float logf( float f ) +{ + return (float) log( (double) f ); +} + +#else +/* Work-around an extra semi-colon in VS 2005 logf definition */ +#ifdef logf +#undef logf +#define logf(x) ((float)log((double)(x))) +#endif /* logf */ +#endif +#endif /* _MSC_VER */ + + + + + #define POW2_TABLE_SIZE 256 #define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1)) extern float pow2_table[POW2_TABLE_SIZE]; @@ -59,6 +183,11 @@ extern void util_init_math(void); +union fi { + float f; + int i; + unsigned ui; +}; /** @@ -195,6 +324,113 @@ util_iround(float f) +#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) +/** + * Find first bit set in word. Least significant bit is 1. + * Return 0 if no bits set. + */ +static INLINE +unsigned ffs( unsigned u ) +{ + unsigned i; + + if( u == 0 ) { + return 0; + } + + __asm bsf eax, [u] + __asm inc eax + __asm mov [i], eax + + return i; +} +#endif + + +/** + * Return float bits. + */ +static INLINE unsigned +fui( float f ) +{ + union fi fi; + fi.f = f; + return fi.ui; +} + + + +static INLINE float +ubyte_to_float(ubyte ub) +{ + return (float) ub * (1.0f / 255.0f); +} + + +/** + * Convert float in [0,1] to ubyte in [0,255] with clamping. + */ +static INLINE ubyte +float_to_ubyte(float f) +{ + const int ieee_0996 = 0x3f7f0000; /* 0.996 or so */ + union fi tmp; + + tmp.f = f; + if (tmp.i < 0) { + return (ubyte) 0; + } + else if (tmp.i >= ieee_0996) { + return (ubyte) 255; + } + else { + tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f; + return (ubyte) tmp.i; + } +} + + + +#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) + +#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) +#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) + + +static INLINE int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + + +#ifndef COPY_4V +#define COPY_4V( DST, SRC ) \ +do { \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ + (DST)[3] = (SRC)[3]; \ +} while (0) +#endif + + +#ifndef COPY_4FV +#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) +#endif + + +#ifndef ASSIGN_4V +#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ +do { \ + (DST)[0] = (V0); \ + (DST)[1] = (V1); \ + (DST)[2] = (V2); \ + (DST)[3] = (V3); \ +} while (0) +#endif + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h new file mode 100644 index 00000000000..148a5cb9972 --- /dev/null +++ b/src/gallium/auxiliary/util/u_memory.h @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Memory functions + */ + + +#ifndef U_MEMORY_H +#define U_MEMORY_H + + +#include "util/u_pointer.h" + + + /* Define ENOMEM for WINCE */ +#if (_WIN32_WCE < 600) +#ifndef ENOMEM +#define ENOMEM 12 +#endif +#endif + + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) + +/* memory debugging */ + +#include "p_debug.h" + +#define MALLOC( _size ) \ + debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) +#define CALLOC( _count, _size ) \ + debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) +#define FREE( _ptr ) \ + debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) +#define REALLOC( _ptr, _old_size, _size ) \ + debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void * __stdcall +EngAllocMem( + unsigned long Flags, + unsigned long MemSize, + unsigned long Tag ); + +void __stdcall +EngFreeMem( + void *Mem ); + +#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) +#define _FREE( _ptr ) EngFreeMem( _ptr ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +void * +ExAllocatePool( + unsigned long PoolType, + size_t NumberOfBytes); + +void +ExFreePool(void *P); + +#define MALLOC(_size) ExAllocatePool(0, _size) +#define _FREE(_ptr) ExFreePool(_ptr) + +#else + +#define MALLOC( SIZE ) malloc( SIZE ) +#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) +#define FREE( PTR ) free( PTR ) +#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) + +#endif + + +#ifndef CALLOC +static INLINE void * +CALLOC( unsigned count, unsigned size ) +{ + void *ptr = MALLOC( count * size ); + if( ptr ) { + memset( ptr, 0, count * size ); + } + return ptr; +} +#endif /* !CALLOC */ + +#ifndef FREE +static INLINE void +FREE( void *ptr ) +{ + if( ptr ) { + _FREE( ptr ); + } +} +#endif /* !FREE */ + +#ifndef REALLOC +static INLINE void * +REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) +{ + void *new_ptr = NULL; + + if (new_size != 0) { + unsigned copy_size = old_size < new_size ? old_size : new_size; + new_ptr = MALLOC( new_size ); + if (new_ptr && old_ptr && copy_size) { + memcpy( new_ptr, old_ptr, copy_size ); + } + } + + FREE( old_ptr ); + return new_ptr; +} +#endif /* !REALLOC */ + + +#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) + +#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) + + +/** + * Return memory on given byte alignment + */ +static INLINE void * +align_malloc(size_t bytes, uint alignment) +{ +#if defined(HAVE_POSIX_MEMALIGN) + void *mem; + alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1); + if(posix_memalign(& mem, alignment, bytes) != 0) + return NULL; + return mem; +#else + char *ptr, *buf; + + assert( alignment > 0 ); + + ptr = (char *) MALLOC(bytes + alignment + sizeof(void *)); + if (!ptr) + return NULL; + + buf = (char *) align_pointer( ptr + sizeof(void *), alignment ); + *(char **)(buf - sizeof(void *)) = ptr; + + return buf; +#endif /* defined(HAVE_POSIX_MEMALIGN) */ +} + +/** + * Free memory returned by align_malloc(). + */ +static INLINE void +align_free(void *ptr) +{ +#if defined(HAVE_POSIX_MEMALIGN) + FREE(ptr); +#else + void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); + void *realAddr = *cubbyHole; + FREE(realAddr); +#endif /* defined(HAVE_POSIX_MEMALIGN) */ +} + + +/** + * Duplicate a block of memory. + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + +/** + * Number of elements in an array. + */ +#ifndef Elements +#define Elements(x) (sizeof(x)/sizeof((x)[0])) +#endif + + +/** + * Offset of a field in a struct, in bytes. + */ +#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) + + + +#endif /* U_MEMORY_H */ diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c index b49ae074e0e..0f51dd59777 100644 --- a/src/gallium/auxiliary/util/u_mm.c +++ b/src/gallium/auxiliary/util/u_mm.c @@ -24,9 +24,9 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_memory.h" #include "util/u_mm.h" diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 06abb34d5a0..39e4ae9d071 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -37,6 +37,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" +#include "util/u_math.h" /** @@ -150,10 +151,10 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) if (pf_size_x(format) <= 8) { /* format uses 8-bit components or less */ - UNCLAMPED_FLOAT_TO_UBYTE(r, rgba[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, rgba[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, rgba[2]); - UNCLAMPED_FLOAT_TO_UBYTE(a, rgba[3]); + r = float_to_ubyte(rgba[0]); + g = float_to_ubyte(rgba[1]); + b = float_to_ubyte(rgba[2]); + a = float_to_ubyte(rgba[3]); } switch (format) { @@ -286,4 +287,31 @@ util_pack_z(enum pipe_format format, double z) } +/** + * Pack 4 ubytes into a 4-byte word + */ +static INLINE unsigned +pack_ub4(ubyte b0, ubyte b1, ubyte b2, ubyte b3) +{ + return ((((unsigned int)b0) << 0) | + (((unsigned int)b1) << 8) | + (((unsigned int)b2) << 16) | + (((unsigned int)b3) << 24)); +} + + +/** + * Pack/convert 4 floats into one 4-byte word. + */ +static INLINE unsigned +pack_ui32_float4(float a, float b, float c, float d) +{ + return pack_ub4( float_to_ubyte(a), + float_to_ubyte(b), + float_to_ubyte(c), + float_to_ubyte(d) ); +} + + + #endif /* U_PACK_COLOR_H */ diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h new file mode 100644 index 00000000000..e1af9f11cb9 --- /dev/null +++ b/src/gallium/auxiliary/util/u_pointer.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_POINTER_H +#define U_POINTER_H + +#include "pipe/p_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE intptr_t +pointer_to_intptr( const void *p ) +{ + union { + const void *p; + intptr_t i; + } pi; + pi.p = p; + return pi.i; +} + +static INLINE void * +intptr_to_pointer( intptr_t i ) +{ + union { + void *p; + intptr_t i; + } pi; + pi.i = i; + return pi.p; +} + +static INLINE uintptr_t +pointer_to_uintptr( const void *ptr ) +{ + union { + const void *p; + uintptr_t u; + } pu; + pu.p = ptr; + return pu.u; +} + +static INLINE void * +uintptr_to_pointer( uintptr_t u ) +{ + union { + void *p; + uintptr_t u; + } pu; + pu.u = u; + return pu.p; +} + +/** + * Return a pointer aligned to next multiple of N bytes. + */ +static INLINE void * +align_pointer( const void *unaligned, uintptr_t alignment ) +{ + uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1); + return uintptr_to_pointer( aligned ); +} + + +/** + * Return a pointer aligned to next multiple of 16 bytes. + */ +static INLINE void * +align16( void *unaligned ) +{ + return align_pointer( unaligned, 16 ); +} + + + +#ifdef __cplusplus +} +#endif + +#endif /* U_POINTER_H */ diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 94e447b9d53..b31ab5415fa 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -31,7 +31,6 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" #include "util/u_rect.h" diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index c34fb6ee334..f06d13c2c4a 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -37,10 +37,10 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "tgsi/tgsi_build.h" diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c new file mode 100644 index 00000000000..853c503f4ff --- /dev/null +++ b/src/gallium/auxiliary/util/u_tile.c @@ -0,0 +1,1169 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * RGBA/float tile get/put functions. + * Usable both by drivers and state trackers. + * Surfaces should already be in a mapped state. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_rect.h" +#include "util/u_tile.h" + + +/** + * Move raw block of pixels from surface to user memory. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_get_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *dst, int dst_stride) +{ + const void *src; + + if (dst_stride == 0) + dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + assert(src); + if(!src) + return; + + pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); + + pipe_surface_unmap(ps); +} + + +/** + * Move raw block of pixels from user memory to surface. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_put_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *src, int src_stride) +{ + void *dst; + + if (src_stride == 0) + src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(dst); + if(!dst) + return; + + pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); + + pipe_surface_unmap(ps); +} + + + + +/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */ +#define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F)) + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +a8r8g8b8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 24) & 0xff); + } + p += dst_stride; + } +} + + +static void +a8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (a << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +x8r8g8b8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); + pRow[3] = ubyte_to_float(0xff); + } + p += dst_stride; + } +} + + +static void +x8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ + +static void +b8g8r8a8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 24) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 0) & 0xff); + } + p += dst_stride; + } +} + + +static void +b8g8r8a8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (b << 24) | (g << 16) | (r << 8) | a; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ + +static void +a1r5g5b5_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = ((pixel >> 15) ) * 1.0f; + } + p += dst_stride; + } +} + + +static void +a1r5g5b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + r = r >> 3; /* 5 bits */ + g = g >> 3; /* 5 bits */ + b = b >> 3; /* 5 bits */ + a = a >> 7; /* 1 bit */ + *dst++ = (a << 15) | (r << 10) | (g << 5) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ + +static void +a4r4g4b4_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f); + pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f); + pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f); + pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f); + } + p += dst_stride; + } +} + + +static void +a4r4g4b4_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + *dst++ = (a << 12) | (r << 16) | (g << 4) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_R5G6B5_UNORM ***/ + +static void +r5g6b5_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = 1.0f; + } + p += dst_stride; + } +} + + +static void +r5g6b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0); + uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0); + uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0); + *dst++ = (r << 11) | (g << 5) | (b); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_Z16_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z16_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const float scale = 1.0f / 65535.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src++ * scale; + } + p += dst_stride; + } +} + + + + +/*** PIPE_FORMAT_L8_UNORM ***/ + +static void +l8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = ubyte_to_float(*src); + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +l8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + r = float_to_ubyte(pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_A8_UNORM ***/ + +static void +a8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = 0.0; + pRow[3] = ubyte_to_float(*src); + } + p += dst_stride; + } +} + + +static void +a8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned a; + a = float_to_ubyte(pRow[3]); + *dst++ = a; + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_R16_SNORM ***/ + +static void +r16_get_tile_rgba(const short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = + pRow[2] = 0.0; + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +r16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst++, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ + +static void +r16g16b16a16_get_tile_rgba(const short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src += 4, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = SHORT_TO_FLOAT(src[1]); + pRow[2] = SHORT_TO_FLOAT(src[2]); + pRow[3] = SHORT_TO_FLOAT(src[3]); + } + p += dst_stride; + } +} + + +static void +r16g16b16a16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst += 4, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]); + UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]); + UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_I8_UNORM ***/ + +static void +i8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = ubyte_to_float(*src); + } + p += dst_stride; + } +} + + +static void +i8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + r = float_to_ubyte(pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A8L8_UNORM ***/ + +static void +a8l8_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + ushort p = *src++; + pRow[0] = + pRow[1] = + pRow[2] = ubyte_to_float(p & 0xff); + pRow[3] = ubyte_to_float(p >> 8); + } + p += dst_stride; + } +} + + +static void +a8l8_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, a; + r = float_to_ubyte(pRow[0]); + a = float_to_ubyte(pRow[3]); + *dst++ = (a << 8) | r; + } + p += src_stride; + } +} + + + + +/*** PIPE_FORMAT_Z32_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / (double) 0xffffffff; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (*src++ * scale); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_S8Z24_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +s8z24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ & 0xffffff)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_Z24S8_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +z24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ >> 8)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ + +/** + * Convert YCbCr (or YCrCb) to RGBA. + */ +static void +ycbcr_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride, + boolean rev) +{ + const float scale = 1.0f / 255.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + /* do two texels at a time */ + for (j = 0; j < (w & ~1); j += 2, src += 2) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + /* odd pixel: use y1,cr,cb */ + r = 1.164f * (y1-16) + 1.596f * (cr-128); + g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y1-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + } + /* do the last texel */ + if (w & 1) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + } + p += dst_stride; + } +} + + +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_L8_UNORM: + l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A8_UNORM: + a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_I8_UNORM: + i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A8L8_UNORM: + a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R16_SNORM: + r16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z32_UNORM: + z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z24S8_UNORM: + z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_YCBCR: + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); + break; + case PIPE_FORMAT_YCBCR_REV: + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); + break; + default: + assert(0); + } +} + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p) +{ + unsigned dst_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV) + assert((x & 1) == 0); + + pipe_get_tile_raw(ps, x, y, w, h, packed, 0); + + pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride); + + FREE(packed); +} + + +void +pipe_put_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p) +{ + unsigned src_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + assert(0); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_L8_UNORM: + l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A8_UNORM: + a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_I8_UNORM: + i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A8L8_UNORM: + a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R16_SNORM: + r16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z32_UNORM: + /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z24S8_UNORM: + /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + default: + assert(0); + } + + pipe_put_tile_raw(ps, x, y, w, h, packed, 0); + + FREE(packed); +} + + +/** + * Get a block of Z values, converted to 32-bit range. + */ +void +pipe_get_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z) +{ + const uint dstStride = w; + ubyte *map; + uint *pDest = z; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + if (!map) { + assert(0); + return; + } + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + const uint *pSrc + = (const uint *)(map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += dstStride; + pSrc += ps->stride/4; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + { + const uint *pSrc + = (const uint *)(map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 24-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); + } + pDest += dstStride; + pSrc += ps->stride/4; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + const ushort *pSrc + = (const ushort *)(map + y * ps->stride + x*2); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 16-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 16) | pSrc[j]; + } + pDest += dstStride; + pSrc += ps->stride/2; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + +void +pipe_put_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *zSrc) +{ + const uint srcStride = w; + const uint *pSrc = zSrc; + ubyte *map; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + if (!map) { + assert(0); + return; + } + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + uint *pDest = (uint *) (map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += ps->stride/4; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + { + uint *pDest = (uint *) (map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 24-bit Z (0 stencil) */ + pDest[j] = pSrc[j] >> 8; + } + pDest += ps->stride/4; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + ushort *pDest = (ushort *) (map + y * ps->stride + x*2); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 16-bit Z */ + pDest[j] = pSrc[j] >> 16; + } + pDest += ps->stride/2; + pSrc += srcStride; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h new file mode 100644 index 00000000000..a8ac8053083 --- /dev/null +++ b/src/gallium/auxiliary/util/u_tile.h @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef P_TILE_H +#define P_TILE_H + +#include "pipe/p_compiler.h" + +struct pipe_surface; + + +/** + * Clip tile against surface dims. + * \return TRUE if tile is totally clipped, FALSE otherwise + */ +static INLINE boolean +pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) +{ + if (x >= ps->width) + return TRUE; + if (y >= ps->height) + return TRUE; + if (x + *w > ps->width) + *w = ps->width - x; + if (y + *h > ps->height) + *h = ps->height - y; + return FALSE; +} + +#ifdef __cplusplus +extern "C" { +#endif + +void +pipe_get_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *p, int dst_stride); + +void +pipe_put_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *p, int src_stride); + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p); + +void +pipe_put_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p); + + +void +pipe_get_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z); + +void +pipe_put_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *z); + +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index f430e88b9c1..6bace0bb11a 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -34,7 +34,6 @@ #define CELL_COMMON_H #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 3ffe09add66..cee0917b631 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -34,7 +34,7 @@ #include <assert.h> #include <stdint.h> #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 12eb5aa2547..5af95a3c103 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -35,7 +35,7 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 67b87f16d7f..971d65d09e4 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -30,7 +30,7 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index b663b376222..dd25ae880e5 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -33,7 +33,7 @@ #include "cell_context.h" #include "cell_render.h" #include "cell_spu.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_private.h" diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 2bf441a0c5f..139b3719b62 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 5480534ad90..8ab938a02aa 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9cae67f0912..3646a0ee4f3 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell_context.h" #include "cell_state.h" #include "cell_state_emit.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index f5707f2bb8b..cd96b317faa 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 01ffa31c2c2..2d31ad89a66 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 533b64227d6..1add81373db 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -33,7 +33,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c index ebabce3c8f5..d570bbd2f91 100644 --- a/src/gallium/drivers/cell/ppu/cell_winsys.c +++ b/src/gallium/drivers/cell/ppu/cell_winsys.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell_winsys.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 42e5022f30b..89c61136a4c 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -63,7 +63,6 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index ab4ff8160a9..8944ef171eb 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -32,7 +32,6 @@ #include <transpose_matrix4x4.h> #include "pipe/p_compiler.h" #include "pipe/p_format.h" -#include "pipe/p_util.h" #include "spu_colorpack.h" #include "spu_main.h" #include "spu_texture.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index 74ab2bbd1f0..dbcf4b0eb93 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,4 +1,3 @@ -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" //#include "tgsi_build.h" diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 219fd90cc0e..26f23637492 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -32,7 +32,6 @@ * Ian Romanick <idr@us.ibm.com> */ -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "spu_exec.h" diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 3119a78c060..a1e81975e65 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -34,7 +34,6 @@ #include <spu_mfcio.h> -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 014a3e31d50..10c4ffc2096 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -28,7 +28,7 @@ #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_context.h" #include "fo_context.h" diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index e2bf5ab678f..c6776716a2f 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c index c024a051a58..48be3e14727 100644 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -29,7 +29,7 @@ #include "i915_reg.h" #include "i915_debug.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" static void diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h index 80a9576304c..2f0f99d0468 100644 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -29,7 +29,6 @@ #ifndef I915_FPC_H #define I915_FPC_H -#include "pipe/p_util.h" #include "i915_context.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 64432982c42..34b4a846c11 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,6 +33,8 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index 9ffa4601380..d194c2fb158 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -27,7 +27,9 @@ #include "draw/draw_pipe.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" #include "i915_context.h" #include "i915_winsys.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index aef3682bbfc..e4ece550985 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -41,9 +41,10 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "i915_context.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 0afa17bed85..e9e40c3f0b2 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index e8521b385ef..d2487d82778 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -31,8 +31,9 @@ #include "draw/draw_context.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 4daccec6e0c..488615067c5 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c index 8cfbdddd19b..86126a5a152 100644 --- a/src/gallium/drivers/i915simple/i915_state_dynamic.c +++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c @@ -30,7 +30,9 @@ #include "i915_context.h" #include "i915_reg.h" #include "i915_state.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" #define FILE_DEBUG_FLAG DEBUG_STATE diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index 2501f2d7cb8..8c16bb4e271 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -33,7 +33,7 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 7868f21ca6a..c09c10601b4 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -27,7 +27,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "i915_state_inlines.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 17b5125e569..62f1926644a 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -30,10 +30,9 @@ #include "i915_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index ca0fb8761bf..32344da4d5a 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -34,8 +34,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "i915_context.h" #include "i915_texture.h" diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c index 337e4f95f69..79d4150383a 100644 --- a/src/gallium/drivers/i965simple/brw_cc.c +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -29,7 +29,8 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "brw_context.h" #include "brw_state.h" @@ -232,8 +233,7 @@ static void upload_cc_unit( struct brw_context *brw ) cc.cc3.alpha_test_func = brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], - brw->attribs.DepthStencil->alpha.ref); + cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; } diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c index ea5c05a2796..8e78dd51be9 100644 --- a/src/gallium/drivers/i965simple/brw_clip_state.c +++ b/src/gallium/drivers/i965simple/brw_clip_state.c @@ -32,7 +32,8 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" static void upload_clip_unit( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 8326f7b9c40..96920df0087 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -39,7 +39,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c index 52bbd525c16..824ee7fd6df 100644 --- a/src/gallium/drivers/i965simple/brw_curbe.c +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -39,7 +39,8 @@ #include "brw_wm.h" #include "pipe/p_state.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #define FILE_DEBUG_FLAG DEBUG_FALLBACKS diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c index 9c0c78c2369..7c20ea52af5 100644 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -33,6 +33,7 @@ #include "brw_context.h" #include "brw_state.h" + struct brw_array_state { union header_union header; diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c index 3932e9e9394..5b8016b2e93 100644 --- a/src/gallium/drivers/i965simple/brw_gs_state.c +++ b/src/gallium/drivers/i965simple/brw_gs_state.c @@ -34,7 +34,8 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index fadfbf94ab3..ab7cd624b27 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c index 9acd3ea61b2..2a5de61c219 100644 --- a/src/gallium/drivers/i965simple/brw_sf_state.c +++ b/src/gallium/drivers/i965simple/brw_sf_state.c @@ -30,11 +30,12 @@ */ - #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + static void upload_sf_vp(struct brw_context *brw) { diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index 30f37a99d46..86d877d7efd 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -1,7 +1,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 27ca32843da..af46cb546fa 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -31,7 +31,7 @@ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c index 35db76b5941..43a1c89fc40 100644 --- a/src/gallium/drivers/i965simple/brw_state_batch.c +++ b/src/gallium/drivers/i965simple/brw_state_batch.c @@ -32,7 +32,7 @@ #include "brw_state.h" #include "brw_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* A facility similar to the data caching code above, which aims to * prevent identical commands being issued repeatedly. diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c index b3a5124461d..094248fa691 100644 --- a/src/gallium/drivers/i965simple/brw_state_cache.c +++ b/src/gallium/drivers/i965simple/brw_state_cache.c @@ -38,7 +38,7 @@ #include "brw_sf.h" #include "brw_gs.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index f3174bfe0ae..78d4c0e411b 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -43,7 +43,8 @@ */ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "brw_context.h" #include "brw_state.h" diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c index e727601e1e7..bac9161b5f1 100644 --- a/src/gallium/drivers/i965simple/brw_state_upload.c +++ b/src/gallium/drivers/i965simple/brw_state_upload.c @@ -33,7 +33,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 69da2522859..b89756c47b0 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -29,10 +29,9 @@ #include "brw_context.h" #include "brw_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 9b6cf817238..05eda9d1f26 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -33,16 +33,16 @@ /* Code to layout images in a mipmap tree for i965. */ -#include "brw_tex_layout.h" - #include "pipe/p_state.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" - +#include "util/u_math.h" +#include "util/u_memory.h" #include "brw_context.h" +#include "brw_tex_layout.h" + #define FILE_DEBUG_FLAG DEBUG_TEXTURE diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c index c73469929ce..1eaff878928 100644 --- a/src/gallium/drivers/i965simple/brw_vs_state.c +++ b/src/gallium/drivers/i965simple/brw_vs_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" static void upload_vs_unit( struct brw_context *brw ) { diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c index 7fc5f59a98c..8de565b96cd 100644 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -35,7 +35,7 @@ #include "brw_wm.h" #include "brw_eu.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index e6f1a44817d..d50e66f613f 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -2,7 +2,8 @@ #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index 6a4a5aef092..ab6410aa607 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -2,7 +2,8 @@ #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c index b9eaee56ee8..52b2909a651 100644 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #define COMPAREFUNC_ALWAYS 0 diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c index f3aa36b07f8..37a9bf919cd 100644 --- a/src/gallium/drivers/i965simple/brw_wm_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" /*********************************************************************** * WM unit - fragment programs and rasterization diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 9b1313bc83e..dda90f760a3 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -32,8 +32,8 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index cc171bbc396..d0456731bea 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -34,7 +34,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 20226da78c3..34adac5226c 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -36,7 +36,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_sse2.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 8b7da7c7473..35653a8e48c 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -34,7 +34,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_sse2.h" diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 941ab62e00c..038ff04d4f1 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -41,7 +41,7 @@ #include "sp_prim_setup.h" #include "draw/draw_pipe.h" #include "draw/draw_vertex.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** * Triangle setup info (derived from draw_stage). diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e9fae951e0b..425e13cd283 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -43,6 +43,7 @@ #include "sp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "util/u_memory.h" #define SP_MAX_VBUF_INDEXES 1024 diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 7a42b08ef53..7d3580fb4f2 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -7,7 +7,7 @@ #include "sp_headers.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" static void diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 74c6bff84ae..a834accb863 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -31,7 +31,8 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" @@ -128,15 +129,15 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) /* convert to ubyte */ for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ - - UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ } switch (softpipe->blend->logicop_func) { @@ -209,10 +210,10 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) } for (j = 0; j < 4; j++) { - quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); - quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); - quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); - quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); } } diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c index b3db428ef18..92e9af09c18 100644 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -1,5 +1,5 @@ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index 7fe080990bd..f72f31db973 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -31,7 +31,8 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index dd5ebb22961..ad907ec25fe 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 0c82692c6ee..227cb2014e1 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -30,7 +30,7 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c index 22ea99049fe..5a66a866993 100644 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -30,7 +30,7 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_headers.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 8c88c192f8f..5499ba5361f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -35,7 +35,8 @@ * all the enabled attributes run contiguously. */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c index 54254df1f11..db13e73ae35 100644 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index 40083138a43..b64646a449f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index b4c7e942fa5..ce9562e07c6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -10,7 +10,7 @@ #include "sp_tile_cache.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** Only 8-bit stencil supported */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index f1e9b80e09a..a39ecc2e9d4 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -7,7 +7,7 @@ #include "sp_headers.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index adf9ccf64c6..2106ee1d235 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -32,7 +32,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_query.h" diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index f6b3d7ac24f..9644dbd168f 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index c8c55fa6e85..87336ab6e31 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -42,9 +42,9 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vertex.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#include "util/u_memory.h" #define DEBUG_VERTS 0 diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index 2d40d6bd8f5..384fe559afd 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -28,7 +28,7 @@ /* Authors: Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f10a1fa4718..6b6a4c3ff34 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -25,7 +25,8 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 76fe6bfef9f..1be461b3a46 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -30,7 +30,7 @@ #include "sp_fs.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 98e04352dbc..87b72196838 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 033288a0aa3..99a28c0d7e0 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -29,7 +29,7 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index bfbae234f1d..389aceb27ce 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -26,10 +26,9 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 58a95d13e13..49250ec084c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -39,9 +39,9 @@ #include "sp_tile_cache.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" #include "util/u_math.h" +#include "util/u_memory.h" /* diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index f775591352b..3a737d6f722 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -33,8 +33,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 57c12ffe335..b50c9845133 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -32,9 +32,9 @@ * Brian Paul */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_memory.h" +#include "util/u_tile.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_texture.h" diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index f16359e8ad6..1dd77193791 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" #include "tr_dump.h" diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 1613a626df4..48032c1617f 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -45,6 +45,8 @@ #endif #include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 6ddc8fc15c4..76a53731b31 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -35,7 +35,6 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" boolean trace_dump_trace_begin(void); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index a6467ec35fd..8789f86b1a8 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "tr_dump.h" #include "tr_state.h" diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 30ab5a8fdcb..986d939e0c6 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -27,6 +27,7 @@ #include "pipe/p_compiler.h" +#include "util/u_memory.h" #include "tgsi/tgsi_dump.h" #include "tr_dump.h" diff --git a/src/gallium/drivers/trace/tr_stream_stdc.c b/src/gallium/drivers/trace/tr_stream_stdc.c index 4c77e1c995c..4c19ec0b243 100644 --- a/src/gallium/drivers/trace/tr_stream_stdc.c +++ b/src/gallium/drivers/trace/tr_stream_stdc.c @@ -36,7 +36,7 @@ #include <stdio.h> -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c index b3b65f09716..704eb15bd71 100644 --- a/src/gallium/drivers/trace/tr_stream_wd.c +++ b/src/gallium/drivers/trace/tr_stream_wd.c @@ -37,7 +37,7 @@ #include <windows.h> #include <winddi.h> -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 99ba74d3667..440a78704ab 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -25,9 +25,9 @@ * **************************************************************************/ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "util/u_hash_table.h" +#include "util/u_memory.h" #include "tr_screen.h" #include "tr_texture.h" diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 2c7a6f893b0..177835854e1 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -25,8 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" -#include "pipe/p_state.h" +#include "util/u_memory.h" #include "util/u_hash_table.h" #include "tr_dump.h" diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h deleted file mode 100644 index 4a3fca59621..00000000000 --- a/src/gallium/include/pipe/p_util.h +++ /dev/null @@ -1,460 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef P_UTIL_H -#define P_UTIL_H - -#include "p_config.h" -#include "p_compiler.h" -#include "p_debug.h" -#include "p_pointer.h" - -#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -__inline double ceil(double val) -{ - double ceil_val; - - if((val - (long) val) == 0) { - ceil_val = val; - } else { - if(val > 0) { - ceil_val = (long) val + 1; - } else { - ceil_val = (long) val; - } - } - - return ceil_val; -} - -#ifndef PIPE_SUBSYSTEM_WINDOWS_CE -__inline double floor(double val) -{ - double floor_val; - - if((val - (long) val) == 0) { - floor_val = val; - } else { - if(val > 0) { - floor_val = (long) val; - } else { - floor_val = (long) val - 1; - } - } - - return floor_val; -} -#endif - -#pragma function(pow) -__inline double __cdecl pow(double val, double exponent) -{ - /* XXX */ - assert(0); - return 0; -} - -#pragma function(log) -__inline double __cdecl log(double val) -{ - /* XXX */ - assert(0); - return 0; -} - -#pragma function(atan2) -__inline double __cdecl atan2(double val) -{ - /* XXX */ - assert(0); - return 0; -} -#else -#include <math.h> -#include <stdarg.h> -#endif - - /* Define ENOMEM for WINCE */ -#if (_WIN32_WCE < 600) -#ifndef ENOMEM -#define ENOMEM 12 -#endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) - -/* memory debugging */ - -#include "p_debug.h" - -#define MALLOC( _size ) \ - debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) -#define CALLOC( _count, _size ) \ - debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) -#define FREE( _ptr ) \ - debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) -#define REALLOC( _ptr, _old_size, _size ) \ - debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -void * __stdcall -EngAllocMem( - unsigned long Flags, - unsigned long MemSize, - unsigned long Tag ); - -void __stdcall -EngFreeMem( - void *Mem ); - -#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) -#define _FREE( _ptr ) EngFreeMem( _ptr ) - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - -void * -ExAllocatePool( - unsigned long PoolType, - size_t NumberOfBytes); - -void -ExFreePool(void *P); - -#define MALLOC(_size) ExAllocatePool(0, _size) -#define _FREE(_ptr) ExFreePool(_ptr) - -#else - -#define MALLOC( SIZE ) malloc( SIZE ) -#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) -#define FREE( PTR ) free( PTR ) -#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) - -#endif - - -#ifndef CALLOC -static INLINE void * -CALLOC( unsigned count, unsigned size ) -{ - void *ptr = MALLOC( count * size ); - if( ptr ) { - memset( ptr, 0, count * size ); - } - return ptr; -} -#endif /* !CALLOC */ - -#ifndef FREE -static INLINE void -FREE( void *ptr ) -{ - if( ptr ) { - _FREE( ptr ); - } -} -#endif /* !FREE */ - -#ifndef REALLOC -static INLINE void * -REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) -{ - void *new_ptr = NULL; - - if (new_size != 0) { - unsigned copy_size = old_size < new_size ? old_size : new_size; - new_ptr = MALLOC( new_size ); - if (new_ptr && old_ptr && copy_size) { - memcpy( new_ptr, old_ptr, copy_size ); - } - } - - FREE( old_ptr ); - return new_ptr; -} -#endif /* !REALLOC */ - - -#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) - -#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) - - -/** - * Return memory on given byte alignment - */ -static INLINE void * -align_malloc(size_t bytes, uint alignment) -{ -#if defined(HAVE_POSIX_MEMALIGN) - void *mem; - alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1); - if(posix_memalign(& mem, alignment, bytes) != 0) - return NULL; - return mem; -#else - char *ptr, *buf; - - assert( alignment > 0 ); - - ptr = (char *) MALLOC(bytes + alignment + sizeof(void *)); - if (!ptr) - return NULL; - - buf = (char *) align_pointer( ptr + sizeof(void *), alignment ); - *(char **)(buf - sizeof(void *)) = ptr; - - return buf; -#endif /* defined(HAVE_POSIX_MEMALIGN) */ -} - -/** - * Free memory returned by align_malloc(). - */ -static INLINE void -align_free(void *ptr) -{ -#if defined(HAVE_POSIX_MEMALIGN) - FREE(ptr); -#else - void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); - void *realAddr = *cubbyHole; - FREE(realAddr); -#endif /* defined(HAVE_POSIX_MEMALIGN) */ -} - - - -/** - * Duplicate a block of memory. - */ -static INLINE void * -mem_dup(const void *src, uint size) -{ - void *dup = MALLOC(size); - if (dup) - memcpy(dup, src, size); - return dup; -} - - - -#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) -#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) -#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) - -#ifndef Elements -#define Elements(x) (sizeof(x)/sizeof((x)[0])) -#endif -#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) - -/** - * Return a pointer aligned to next multiple of 16 bytes. - */ -static INLINE void * -align16( void *unaligned ) -{ - return align_pointer( unaligned, 16 ); -} - - -static INLINE int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - - - - -#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) -static INLINE unsigned ffs( unsigned u ) -{ - unsigned i; - - if( u == 0 ) { - return 0; - } - - __asm bsf eax, [u] - __asm inc eax - __asm mov [i], eax - - return i; -} -#endif - -union fi { - float f; - int i; - unsigned ui; -}; - -#define UBYTE_TO_FLOAT( ub ) ((float)(ub) / 255.0F) - -#define IEEE_0996 0x3f7f0000 /* 0.996 or so */ - -/* This function/macro is sensitive to precision. Test very carefully - * if you change it! - */ -#define UNCLAMPED_FLOAT_TO_UBYTE(UB, F) \ - do { \ - union fi __tmp; \ - __tmp.f = (F); \ - if (__tmp.i < 0) \ - UB = (ubyte) 0; \ - else if (__tmp.i >= IEEE_0996) \ - UB = (ubyte) 255; \ - else { \ - __tmp.f = __tmp.f * (255.0f/256.0f) + 32768.0f; \ - UB = (ubyte) __tmp.i; \ - } \ - } while (0) - - - -static INLINE unsigned pack_ub4( unsigned char b0, - unsigned char b1, - unsigned char b2, - unsigned char b3 ) -{ - return ((((unsigned int)b0) << 0) | - (((unsigned int)b1) << 8) | - (((unsigned int)b2) << 16) | - (((unsigned int)b3) << 24)); -} - -static INLINE unsigned fui( float f ) -{ - union fi fi; - fi.f = f; - return fi.ui; -} - -static INLINE unsigned char float_to_ubyte( float f ) -{ - unsigned char ub; - UNCLAMPED_FLOAT_TO_UBYTE(ub, f); - return ub; -} - -static INLINE unsigned pack_ui32_float4( float a, - float b, - float c, - float d ) -{ - return pack_ub4( float_to_ubyte(a), - float_to_ubyte(b), - float_to_ubyte(c), - float_to_ubyte(d) ); -} - -#define COPY_4V( DST, SRC ) \ -do { \ - (DST)[0] = (SRC)[0]; \ - (DST)[1] = (SRC)[1]; \ - (DST)[2] = (SRC)[2]; \ - (DST)[3] = (SRC)[3]; \ -} while (0) - - -#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) - - -#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ -do { \ - (DST)[0] = (V0); \ - (DST)[1] = (V1); \ - (DST)[2] = (V2); \ - (DST)[3] = (V3); \ -} while (0) - - - -#if defined(_MSC_VER) -#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - -static INLINE float cosf( float f ) -{ - return (float) cos( (double) f ); -} - -static INLINE float sinf( float f ) -{ - return (float) sin( (double) f ); -} - -static INLINE float ceilf( float f ) -{ - return (float) ceil( (double) f ); -} - -static INLINE float floorf( float f ) -{ - return (float) floor( (double) f ); -} - -static INLINE float powf( float f, float g ) -{ - return (float) pow( (double) f, (double) g ); -} - -static INLINE float sqrtf( float f ) -{ - return (float) sqrt( (double) f ); -} - -static INLINE float fabsf( float f ) -{ - return (float) fabs( (double) f ); -} - -static INLINE float logf( float f ) -{ - return (float) log( (double) f ); -} - -#else -/* Work-around an extra semi-colon in VS 2005 logf definition */ -#ifdef logf -#undef logf -#define logf(x) ((float)log((double)(x))) -#endif /* logf */ -#endif -#endif /* _MSC_VER */ - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 641b19e9406..a67372c6239 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -42,7 +42,7 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index a1889539dce..f71d85dd9b4 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -26,12 +26,13 @@ **************************************************************************/ -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_inlines.h" #include "cso_cache/cso_context.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "trace/tr_screen.h" #include "trace/tr_context.h" diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c index b47c7be2933..7765df3c4a4 100644 --- a/src/gallium/state_trackers/python/st_sample.c +++ b/src/gallium/state_trackers/python/st_sample.c @@ -29,9 +29,10 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "st_sample.h" diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index 6ea3c9a5cf0..2d4f5434b35 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -39,8 +39,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "st_winsys.h" diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c index 8db03296156..019ee5cbd2e 100644 --- a/src/gallium/winsys/drm/intel/common/intel_be_device.c +++ b/src/gallium/winsys/drm/intel/common/intel_be_device.c @@ -13,8 +13,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_memory.h" #include "i915simple/i915_screen.h" diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c index 0d98d16cf1f..20920a20529 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c +++ b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c @@ -32,8 +32,8 @@ #include "intel_context.h" #include "intel_winsys_softpipe.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 829732eea8b..e9f821d2764 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -38,8 +38,8 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "eglconfig.h" diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index f4199e6f891..ae81d7f801b 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -37,8 +37,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sw_winsys.h" diff --git a/src/gallium/winsys/gdi/wmesa.c b/src/gallium/winsys/gdi/wmesa.c index ff52ceb8c41..730fb1b5417 100644 --- a/src/gallium/winsys/gdi/wmesa.c +++ b/src/gallium/winsys/gdi/wmesa.c @@ -12,8 +12,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "glapi/glapi.h" #include "colors.h" diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index 6e814ce5d11..f3198029629 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -34,7 +34,6 @@ #include "brw_aub.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 4b4dc56e843..68ead7f528e 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -42,8 +42,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #ifdef GALLIUM_CELL diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index 7fc9debdd54..3439367636d 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -37,7 +37,7 @@ #include "xmesaP.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "i965simple/brw_winsys.h" #include "i965simple/brw_screen.h" diff --git a/src/mesa/state_tracker/acc2.c b/src/mesa/state_tracker/acc2.c new file mode 100644 index 00000000000..fa5de2b764c --- /dev/null +++ b/src/mesa/state_tracker/acc2.c @@ -0,0 +1,319 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" + +#include "st_context.h" +#include "st_cb_accum.h" +#include "st_cb_fbo.h" +#include "st_draw.h" +#include "st_format.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/p_tile.h" + + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + +/** + * For hardware that supports deep color buffers, we could accelerate + * most/all the accum operations with blending/texturing. + * For now, just use the get/put_tile() functions and do things in software. + */ + + +static void +acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, + uint x, uint y, uint w, uint h, float *p) +{ + const enum pipe_format f = acc_ps->format; + const int cpp = acc_ps->cpp; + + acc_ps->format = PIPE_FORMAT_R16G16B16A16_SNORM; + acc_ps->cpp = 8; + + pipe_get_tile_rgba(pipe, acc_ps, x, y, w, h, p); + + acc_ps->format = f; + acc_ps->cpp = cpp; +} + + +static void +acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, + uint x, uint y, uint w, uint h, const float *p) +{ + enum pipe_format f = acc_ps->format; + const int cpp = acc_ps->cpp; + + acc_ps->format = PIPE_FORMAT_R16G16B16A16_SNORM; + acc_ps->cpp = 8; + + pipe_put_tile_rgba(pipe, acc_ps, x, y, w, h, p); + + acc_ps->format = f; + acc_ps->cpp = cpp; +} + + + +void +st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_renderbuffer *acc_strb = st_renderbuffer(rb); + struct pipe_surface *acc_ps = acc_strb->surface; + const GLint xpos = ctx->DrawBuffer->_Xmin; + const GLint ypos = ctx->DrawBuffer->_Ymin; + const GLint width = ctx->DrawBuffer->_Xmax - xpos; + const GLint height = ctx->DrawBuffer->_Ymax - ypos; + const GLfloat r = ctx->Accum.ClearColor[0]; + const GLfloat g = ctx->Accum.ClearColor[1]; + const GLfloat b = ctx->Accum.ClearColor[2]; + const GLfloat a = ctx->Accum.ClearColor[3]; + GLfloat *accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + int i; + +#if 1 + GLvoid *map; + + map = pipe_surface_map(acc_ps); + switch (acc_strb->format) { + case PIPE_FORMAT_R16G16B16A16_SNORM: + { + GLshort r = FLOAT_TO_SHORT(ctx->Accum.ClearColor[0]); + GLshort g = FLOAT_TO_SHORT(ctx->Accum.ClearColor[1]); + GLshort b = FLOAT_TO_SHORT(ctx->Accum.ClearColor[2]); + GLshort a = FLOAT_TO_SHORT(ctx->Accum.ClearColor[3]); + int i, j; + for (i = 0; i < height; i++) { + GLshort *dst = ((GLshort *) map + + ((ypos + i) * acc_ps->pitch + xpos) * 4); + for (j = 0; j < width; j++) { + dst[0] = r; + dst[1] = g; + dst[2] = b; + dst[3] = a; + dst += 4; + } + } + } + break; + default: + _mesa_problem(ctx, "unexpected format in st_clear_accum_buffer()"); + } + + pipe_surface_unmap(acc_ps); + +#else + for (i = 0; i < width * height; i++) { + accBuf[i*4+0] = r; + accBuf[i*4+1] = g; + accBuf[i*4+2] = b; + accBuf[i*4+3] = a; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); +#endif +} + + +/** For ADD/MULT */ +static void +accum_mad(struct pipe_context *pipe, GLfloat scale, GLfloat bias, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps) +{ + GLfloat *accBuf; + GLint i; + + accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + for (i = 0; i < 4 * width * height; i++) { + accBuf[i] = accBuf[i] * scale + bias; + } + + pipe_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + free(accBuf); +} + + +static void +accum_accum(struct pipe_context *pipe, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + GLfloat *colorBuf, *accBuf; + GLint i; + + colorBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, colorBuf); + acc_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + for (i = 0; i < 4 * width * height; i++) { + accBuf[i] = accBuf[i] + colorBuf[i] * value; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + free(colorBuf); + free(accBuf); +} + + +static void +accum_load(struct pipe_context *pipe, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + GLfloat *buf; + GLint i; + + buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, buf); + + for (i = 0; i < 4 * width * height; i++) { + buf[i] = buf[i] * value; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, buf); + + free(buf); +} + + +static void +accum_return(GLcontext *ctx, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + struct pipe_context *pipe = ctx->st->pipe; + const GLubyte *colormask = ctx->Color.ColorMask; + GLfloat *abuf, *cbuf = NULL; + GLint i, ch; + + abuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + acc_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, abuf); + + if (!colormask[0] || !colormask[1] || !colormask[2] || !colormask[3]) { + cbuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, cbuf); + } + + for (i = 0; i < width * height; i++) { + for (ch = 0; ch < 4; ch++) { + if (colormask[ch]) { + GLfloat val = abuf[i * 4 + ch] * value; + abuf[i * 4 + ch] = CLAMP(val, 0.0, 1.0); + } + else { + abuf[i * 4 + ch] = cbuf[i * 4 + ch]; + } + } + } + + pipe_put_tile_rgba(pipe, color_ps, xpos, ypos, width, height, abuf); + + free(abuf); + if (cbuf) + free(cbuf); +} + + +static void +st_Accum(GLcontext *ctx, GLenum op, GLfloat value) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct st_renderbuffer *acc_strb + = st_renderbuffer(ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); + struct st_renderbuffer *color_strb + = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + struct pipe_surface *acc_ps = acc_strb->surface; + struct pipe_surface *color_ps = color_strb->surface; + + const GLint xpos = ctx->DrawBuffer->_Xmin; + const GLint ypos = ctx->DrawBuffer->_Ymin; + const GLint width = ctx->DrawBuffer->_Xmax - xpos; + const GLint height = ctx->DrawBuffer->_Ymax - ypos; + + /* make sure color bufs aren't cached */ + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + switch (op) { + case GL_ADD: + if (value != 0.0F) { + accum_mad(pipe, 1.0, value, xpos, ypos, width, height, acc_ps); + } + break; + case GL_MULT: + if (value != 1.0F) { + accum_mad(pipe, value, 0.0, xpos, ypos, width, height, acc_ps); + } + break; + case GL_ACCUM: + if (value != 0.0F) { + accum_accum(pipe, value, xpos, ypos, width, height, acc_ps, color_ps); + } + break; + case GL_LOAD: + accum_load(pipe, value, xpos, ypos, width, height, acc_ps, color_ps); + break; + case GL_RETURN: + accum_return(ctx, value, xpos, ypos, width, height, acc_ps, color_ps); + break; + default: + assert(0); + } +} + + + +void st_init_accum_functions(struct dd_function_table *functions) +{ + functions->Accum = st_Accum; +} diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index a992e08ff68..cf3a99e7e9d 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -42,7 +42,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index d5696a909f7..a0c305d66ff 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -50,7 +50,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_draw_quad.h" #include "util/u_simple_shaders.h" #include "shader/prog_instruction.h" diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 0c5e21d4ff0..4ec7c752dfe 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -55,7 +55,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_draw_quad.h" #include "shader/prog_instruction.h" #include "cso_cache/cso_context.h" diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 39f5856f94e..c8015327885 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -41,7 +41,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "st_context.h" #include "st_cb_bitmap.h" #include "st_cb_readpixels.h" diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 6177ac63f03..16bbf3d80f2 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -51,7 +51,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_blit.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 325d95e8658..936a6e32ea1 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -55,7 +55,7 @@ #define TGSI_DEBUG 0 -/** XXX we should use the version of this from p_util.h but including +/** XXX we should use the version of this from u_memory.h but including * that header causes symbol collisions. */ static INLINE void * diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 63046a0ecce..73cebff33f7 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -36,7 +36,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "util/u_rect.h" -- cgit v1.2.3 From 2c4661f8fce8a27f2082c6ac498f9fb188878476 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Mon, 25 Aug 2008 10:42:00 +0200 Subject: gallium: Add missing includes. --- src/gallium/auxiliary/util/u_memory.h | 2 +- src/gallium/drivers/i915simple/i915_fpc_emit.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index 148a5cb9972..ec32d60fbe7 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -51,7 +51,7 @@ /* memory debugging */ -#include "p_debug.h" +#include "pipe/p_debug.h" #define MALLOC( _size ) \ debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c index 4bdeefb449b..b054ce41d39 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -28,6 +28,7 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_fpc.h" +#include "util/u_math.h" #define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) -- cgit v1.2.3 From 6ba9fb9b6693904054ad4e1506ba42e324334b0a Mon Sep 17 00:00:00 2001 From: Brian <brian.paul@tungstengraphics.com> Date: Mon, 25 Aug 2008 11:31:59 -0600 Subject: cell: asst fixes to get driver building/running again. Note that SPU vertex transformation is disabled at this time. --- src/gallium/drivers/cell/ppu/cell_clear.c | 4 +- src/gallium/drivers/cell/ppu/cell_context.c | 4 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 35 ++++++++++-- src/gallium/drivers/cell/ppu/cell_draw_arrays.h | 24 ++++++-- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 6 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 14 ++--- src/gallium/drivers/cell/ppu/cell_state_shader.c | 4 +- src/gallium/drivers/cell/ppu/cell_surface.c | 11 ++-- src/gallium/drivers/cell/ppu/cell_texture.c | 69 ++++++++++++++++++++--- src/gallium/drivers/cell/ppu/cell_texture.h | 1 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 1 + src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 4 ++ src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 5 ++ src/gallium/drivers/cell/spu/spu_exec.h | 2 +- src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_tri.c | 1 + src/gallium/drivers/cell/spu/spu_util.c | 2 + src/gallium/drivers/cell/spu/spu_vertex_shader.c | 26 ++++++++- 18 files changed, 176 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index cee0917b631..a421c95c8e8 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -48,6 +48,7 @@ void cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { + struct pipe_screen *screen = pipe->screen; struct cell_context *cell = cell_context(pipe); uint surfIndex; @@ -56,7 +57,8 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, if (!cell->cbuf_map[0]) - cell->cbuf_map[0] = pipe_surface_map(ps); + cell->cbuf_map[0] = screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_GPU_WRITE); if (ps == cell->framebuffer.zsbuf) { surfIndex = 1; diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 5af95a3c103..9ff4e86943b 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -73,11 +73,13 @@ cell_draw_create(struct cell_context *cell) { struct draw_context *draw = draw_create(); +#if 0 /* broken */ if (getenv("GALLIUM_CELL_VS")) { /* plug in SPU-based vertex transformation code */ draw->shader_queue_flush = cell_vertex_shader_queue_flush; draw->driver_private = cell; } +#endif return draw; } @@ -108,6 +110,8 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.draw_arrays = cell_draw_arrays; cell->pipe.draw_elements = cell_draw_elements; + cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.set_edgeflags = cell_set_edgeflags; cell->pipe.clear = cell_clear_surface; cell->pipe.flush = cell_flush; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 6e08cf6fe88..f02dffe1245 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -59,7 +59,8 @@ cell_map_constant_buffers(struct cell_context *sp) } draw_set_mapped_constant_buffer(sp->draw, - sp->mapped_constants[PIPE_SHADER_VERTEX]); + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[PIPE_SHADER_VERTEX].size); } static void @@ -92,10 +93,12 @@ cell_draw_arrays(struct pipe_context *pipe, unsigned mode, * XXX should the element buffer be specified/bound with a separate function? */ boolean -cell_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) { struct cell_context *sp = cell_context(pipe); struct draw_context *draw = sp->draw; @@ -152,3 +155,25 @@ cell_draw_elements(struct pipe_context *pipe, return TRUE; } + + +boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + + +void +cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ + struct cell_context *cell = cell_context(pipe); + draw_set_edgeflags(cell->draw, edgeflags); +} diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h index d5df4aa05f8..cd35ec17b4e 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -29,14 +29,26 @@ #define CELL_DRAW_ARRAYS_H -boolean cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); +extern boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); -boolean cell_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); +extern boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +extern boolean +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count); + +extern void +cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); #endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 971d65d09e4..fe5437023b9 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -294,6 +294,8 @@ cell_set_framebuffer_state(struct pipe_context *pipe, struct pipe_surface *csurf = fb->cbufs[0]; struct pipe_surface *zsurf = fb->zsbuf; uint i; + uint flags = (PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ); /* unmap old surfaces */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { @@ -313,10 +315,10 @@ cell_set_framebuffer_state(struct pipe_context *pipe, /* map new surfaces */ if (csurf) - cell->cbuf_map[0] = pipe_surface_map(csurf); + cell->cbuf_map[0] = pipe_surface_map(csurf, flags); if (zsurf) - cell->zsbuf_map = pipe_surface_map(zsurf); + cell->zsbuf_map = pipe_surface_map(zsurf, flags); cell->dirty |= CELL_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 3646a0ee4f3..9d88c1cf3d2 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -162,13 +162,13 @@ cell_emit_state(struct cell_context *cell) const struct draw_context *const draw = cell->draw; struct cell_shader_info info; - info.num_outputs = draw->num_vs_outputs; - info.declarations = (uintptr_t) draw->machine.Declarations; - info.num_declarations = draw->machine.NumDeclarations; - info.instructions = (uintptr_t) draw->machine.Instructions; - info.num_instructions = draw->machine.NumInstructions; - info.immediates = (uintptr_t) draw->machine.Imms; - info.num_immediates = draw->machine.ImmLimit / 4; + info.num_outputs = draw_num_vs_outputs(draw); + info.declarations = (uintptr_t) draw->vs.machine.Declarations; + info.num_declarations = draw->vs.machine.NumDeclarations; + info.instructions = (uintptr_t) draw->vs.machine.Instructions; + info.num_instructions = draw->vs.machine.NumInstructions; + info.immediates = (uintptr_t) draw->vs.machine.Imms; + info.num_immediates = draw->vs.machine.ImmLimit / 4; emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, & info, sizeof(info)); diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index cd96b317faa..86bcad05e9e 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -41,7 +41,7 @@ static INLINE struct cell_fragment_shader_state * cell_fragment_shader_state(void *shader) { - return (struct pipe_shader_state *) shader; + return (struct cell_fragment_shader_state *) shader; } @@ -49,7 +49,7 @@ cell_fragment_shader_state(void *shader) static INLINE struct cell_vertex_shader_state * cell_vertex_shader_state(void *shader) { - return (struct pipe_shader_state *) shader; + return (struct cell_vertex_shader_state *) shader; } diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 2d31ad89a66..d9e3b510dc0 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -26,11 +26,12 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_tile.h" + #include "cell_context.h" #include "cell_surface.h" @@ -46,12 +47,12 @@ cell_surface_copy(struct pipe_context *pipe, { assert( dst->cpp == src->cpp ); - pipe_copy_rect(pipe_surface_map(dst), + pipe_copy_rect(pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE), &dst->block, dst->stride, dstx, dsty, width, height, - pipe_surface_map(src), + pipe_surface_map(src, PIPE_BUFFER_USAGE_CPU_READ), do_flip ? -src->stride : src->stride, srcx, do_flip ? height - 1 - srcy : srcy); @@ -81,7 +82,7 @@ cell_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { unsigned i, j; - void *dst_map = pipe_surface_map(dst); + void *dst_map = pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE); assert(dst->stride > 0); diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 1add81373db..5a0942bbd6e 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -33,8 +33,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/u_memory.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "cell_context.h" #include "cell_state.h" @@ -68,11 +69,13 @@ cell_texture_layout(struct cell_texture * spt) pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + spt->stride[level] = pt->nblocksx[level] * pt->block.size; + spt->level_offset[level] = spt->buffer_size; spt->buffer_size += (pt->nblocksy[level] * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - pt->nblocksx[level] * pt->block.size; + pt->nblocksx[level] * pt->block.size); width = minify(width); height = minify(height); @@ -147,7 +150,8 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, static struct pipe_surface * cell_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) + unsigned face, unsigned level, unsigned zslice, + unsigned usage) { struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = cell_texture(pt); @@ -166,6 +170,10 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, ps->nblocksy = pt->nblocksy[level]; ps->stride = spt->stride[level]; ps->offset = spt->level_offset[level]; + ps->usage = usage; + + /* XXX may need to override usage flags (see sp_texture.c) */ + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * @@ -228,10 +236,11 @@ cell_tile_texture(struct cell_context *cell, assert(w % TILE_SIZE == 0); assert(h % TILE_SIZE == 0); - surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice); + surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); ASSERT(surf); - src = (const uint *) pipe_surface_map(surf); + src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE); if (texture->tiled_data) { align_free(texture->tiled_data); @@ -246,11 +255,12 @@ cell_tile_texture(struct cell_context *cell, } - void cell_update_texture_mapping(struct cell_context *cell) { +#if 0 uint face = 0, level = 0, zslice = 0; +#endif uint i; for (i = 0; i < CELL_MAX_SAMPLERS; i++) { @@ -275,10 +285,52 @@ cell_update_texture_mapping(struct cell_context *cell) } +static void * +cell_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + ubyte *map; + + if (flags & ~surface->usage) { + assert(0); + return NULL; + } + + map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + if (map == NULL) + return NULL; + + /* May want to different things here depending on read/write nature + * of the map: + */ + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + { + /* Do something to notify sharing contexts of a texture change. + * In softpipe, that would mean flushing the texture cache. + */ +#if 0 + cell_screen(screen)->timestamp++; +#endif + } + + return map + surface->offset; +} + + +static void +cell_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); +} + + void cell_init_texture_functions(struct cell_context *cell) { - cell->pipe.texture_update = cell_texture_update; + /*cell->pipe.texture_update = cell_texture_update;*/ } void @@ -287,4 +339,7 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen) screen->texture_create = cell_texture_create_screen; screen->texture_release = cell_texture_release_screen; screen->get_tex_surface = cell_get_tex_surface_screen; + + screen->surface_map = cell_surface_map; + screen->surface_unmap = cell_surface_unmap; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index fcee069d055..6d37e95ebce 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -41,6 +41,7 @@ struct cell_texture struct pipe_texture base; unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; /* The data is held here: */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index 3a181b585c5..e4230c7a5ff 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -37,6 +37,7 @@ #include "cell_spu.h" #include "cell_vbuf.h" #include "draw/draw_vbuf.h" +#include "util/u_memory.h" /** Allow vertex data to be inlined after RENDER command */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 49d5443cde7..2ece0250f6f 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -260,6 +260,7 @@ emit_fetch(struct spe_function *p, void cell_update_vertex_fetch(struct draw_context *draw) { +#if 0 struct cell_context *const cell = (struct cell_context *) draw->driver_private; struct spe_function *p = &cell->attrib_fetch; @@ -337,4 +338,7 @@ void cell_update_vertex_fetch(struct draw_context *draw) cell->attrib_fetch_offsets[function_index[i]]; } } +#else + assert(0); +#endif } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index f753960a0fb..3658947715f 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -50,6 +51,7 @@ void cell_vertex_shader_queue_flush(struct draw_context *draw) { +#if 0 struct cell_context *const cell = (struct cell_context *) draw->driver_private; struct cell_command_vs *const vs = &cell_global.command[0].vs; @@ -138,4 +140,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) draw->vs.post_nr = draw->vs.queue_nr; draw->vs.queue_nr = 0; +#else + assert(0); +#endif } diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index c68f78f59b6..86056799405 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -99,7 +99,7 @@ struct spu_exec_machine * 1 address */ struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_ADDRS + 1] + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] ALIGN16_ATTRIB; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 6df59abd36d..305dc988810 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -35,7 +35,7 @@ #include "spu_tri.h" #include "spu_tile.h" #include "cell/common.h" - +#include "util/u_memory.h" /** diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 8944ef171eb..2a4e0b423ca 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -32,6 +32,7 @@ #include <transpose_matrix4x4.h> #include "pipe/p_compiler.h" #include "pipe/p_format.h" +#include "util/u_math.h" #include "spu_colorpack.h" #include "spu_main.h" #include "spu_texture.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index dbcf4b0eb93..b25ca4eafc0 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,4 +1,6 @@ + #include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" #include "tgsi/tgsi_parse.h" //#include "tgsi_build.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index a1e81975e65..f81d19fea1c 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -36,13 +36,35 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "spu_vertex_shader.h" -#include "spu_exec.h" +#include "util/u_math.h" #include "draw/draw_private.h" #include "draw/draw_context.h" #include "cell/common.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" #include "spu_main.h" + +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +#define CLIP_RIGHT_BIT 0x01 +#define CLIP_LEFT_BIT 0x02 +#define CLIP_TOP_BIT 0x04 +#define CLIP_BOTTOM_BIT 0x08 +#define CLIP_FAR_BIT 0x10 +#define CLIP_NEAR_BIT 0x20 + + +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + static INLINE unsigned compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) { -- cgit v1.2.3 From 248831e741602450fa957d7f63b1ff1e1395c412 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sat, 30 Aug 2008 22:30:03 +0200 Subject: nv30: activate fp texture units when needed, to get texturing --- src/gallium/drivers/nv30/nv30_state_emit.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 40fed621b24..9c96085408e 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -71,6 +71,10 @@ nv30_state_emit(struct nv30_context *nv30) state->dirty = 0; + /* FIXME/TODO: Try to find a way to reemit only when changed */ + BEGIN_RING(rankine, NV34TCL_TX_UNITS_ENABLE, 1); + OUT_RING(state->fp_samplers); + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { if (!(samplers & (1 << i))) -- cgit v1.2.3 From 0c47bd0374e533f614ca3013f429fc32946e5be7 Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sun, 31 Aug 2008 11:10:08 +0200 Subject: nv30: set fp samplers with fragprog generation --- src/gallium/drivers/nv30/nv30_fragprog.c | 4 +++- src/gallium/drivers/nv30/nv30_state_emit.c | 4 ---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 02f30ad9bad..320ba3f4bf4 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -852,7 +852,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); - so = so_new(6, 1); + so = so_new(8, 1); so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, @@ -861,6 +861,8 @@ nv30_fragprog_validate(struct nv30_context *nv30) so_data (so, fp->fp_control); so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); so_data (so, fp->fp_reg_control); + so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); + so_data (so, fp->samplers); so_ref(so, &fp->so); update_constants: diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 9c96085408e..40fed621b24 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -71,10 +71,6 @@ nv30_state_emit(struct nv30_context *nv30) state->dirty = 0; - /* FIXME/TODO: Try to find a way to reemit only when changed */ - BEGIN_RING(rankine, NV34TCL_TX_UNITS_ENABLE, 1); - OUT_RING(state->fp_samplers); - so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { if (!(samplers & (1 << i))) -- cgit v1.2.3 From 4d27c027ab301cff64e7edb8a3eb731c2e8585c5 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz <jakob@aurora.(none)> Date: Tue, 2 Sep 2008 17:28:42 +0200 Subject: i915: Small fixes for tiled textures --- src/gallium/drivers/i915simple/i915_texture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 32344da4d5a..a853a5a3f68 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -80,7 +80,7 @@ static unsigned power_of_two(unsigned x) { unsigned value = 1; - while (value <= x) + while (value < x) value = value << 1; return value; } @@ -207,7 +207,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) unsigned nblocksy = pt->nblocksy[0]; #if 0 /* used for tiled display targets */ - if (pt->last_level == 0 && pt->cpp == 4) + if (pt->last_level == 0 && pt->block.size == 4) if (i915_displaytarget_layout(tex)) return; #endif -- cgit v1.2.3 From f637a96e85a51a66f2c53b91118a6815bb61d6e6 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Wed, 3 Sep 2008 11:48:05 +0900 Subject: gallium: Have pipe_buffer_* receive a pipe_screen instead of a pipe_context. We want to use the pipe_buffer_* inlines everywhere, but a pipe context is not always available nor is it needed. --- src/gallium/auxiliary/util/u_draw_quad.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 4 +-- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_texture.c | 8 ++--- src/gallium/drivers/i965simple/brw_state_pool.c | 2 +- src/gallium/drivers/i965simple/brw_tex_layout.c | 4 +-- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_state_fs.c | 2 +- src/gallium/drivers/softpipe/sp_texture.c | 6 ++-- src/gallium/include/pipe/p_inlines.h | 28 +++++++-------- src/gallium/state_trackers/python/p_context.i | 2 +- src/gallium/state_trackers/python/st_device.c | 2 +- .../state_trackers/python/st_softpipe_winsys.c | 2 +- src/gallium/winsys/drm/intel/dri/intel_screen.c | 2 +- src/gallium/winsys/egl_xlib/sw_winsys.c | 2 +- src/gallium/winsys/gdi/wmesa.c | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 2 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/state_tracker/st_atom_constbuf.c | 8 ++--- src/mesa/state_tracker/st_cb_bitmap.c | 8 ++--- src/mesa/state_tracker/st_cb_bufferobjects.c | 18 +++++----- src/mesa/state_tracker/st_cb_clear.c | 8 ++--- src/mesa/state_tracker/st_cb_drawpixels.c | 8 ++--- src/mesa/state_tracker/st_context.c | 2 +- src/mesa/state_tracker/st_draw.c | 40 +++++++++++----------- src/mesa/state_tracker/st_gen_mipmap.c | 8 ++--- 27 files changed, 89 insertions(+), 89 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index bf143815d8b..d643ee9ab75 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -127,6 +127,6 @@ util_draw_texquad(struct pipe_context *pipe, util_draw_vertex_buffer(pipe, vbuf, PIPE_PRIM_TRIANGLE_FAN, 4, 2); } - pipe_buffer_reference(pipe->winsys, &vbuf, NULL); + pipe_buffer_reference(pipe->screen, &vbuf, NULL); } } diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 86bcad05e9e..3d1b887da94 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -166,7 +166,7 @@ cell_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); /* note: reference counting */ - pipe_buffer_reference(ws, + winsys_buffer_reference(ws, &cell->constants[shader].buffer, buf->buffer); cell->constants[shader].size = buf->size; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 5a0942bbd6e..5c01aa21b85 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -130,7 +130,7 @@ cell_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); */ - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen, &spt->buffer, NULL); FREE(spt); } @@ -161,7 +161,7 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + winsys_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index e4ece550985..9397a2ca1a8 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -124,7 +124,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { } else { i915->vbo_flushed = 0; - pipe_buffer_reference(winsys, &i915_render->vbo, NULL); + winsys_buffer_reference(winsys, &i915_render->vbo, NULL); } if (!i915_render->vbo) { diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index a853a5a3f68..bd87217063c 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -645,7 +645,7 @@ i915_texture_release(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(screen->winsys, &tex->buffer, NULL); + pipe_buffer_reference(screen, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -684,7 +684,7 @@ i915_get_tex_surface(struct pipe_screen *screen, ps->refcount = 1; ps->winsys = ws; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(ws, &ps->buffer, tex->buffer); + pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -728,7 +728,7 @@ i915_texture_blanket(struct pipe_screen * screen, i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - pipe_buffer_reference(screen->winsys, &tex->buffer, buffer); + pipe_buffer_reference(screen, &tex->buffer, buffer); return &tex->base; } @@ -756,7 +756,7 @@ i915_tex_surface_release(struct pipe_screen *screen, } pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen->winsys, &surf->buffer, NULL); + pipe_buffer_reference(screen, &surf->buffer, NULL); FREE(surf); } diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index 78d4c0e411b..d0dc1ef74d6 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -103,7 +103,7 @@ static void brw_destroy_pool( struct brw_context *brw, { struct brw_mem_pool *pool = &brw->pool[pool_id]; - pipe_buffer_reference( pool->brw->pipe.winsys, + winsys_buffer_reference( pool->brw->pipe.winsys, &pool->buffer, NULL ); } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 05eda9d1f26..cc0c665e021 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -330,7 +330,7 @@ brw_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(ws, &tex->buffer, NULL); + winsys_buffer_reference(ws, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -369,7 +369,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { assert(ps->format); assert(ps->refcount); - pipe_buffer_reference(ws, &ps->buffer, tex->buffer); + winsys_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index dda90f760a3..6f12390cf79 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -113,7 +113,7 @@ static void softpipe_destroy( struct pipe_context *pipe ) for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { - pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); } } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 1be461b3a46..e5b609cf6c9 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -152,7 +152,7 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); /* note: reference counting */ - pipe_buffer_reference(ws, + winsys_buffer_reference(ws, &softpipe->constants[shader].buffer, buf ? buf->buffer : NULL); softpipe->constants[shader].size = buf ? buf->size : 0; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 3a737d6f722..c283e3e4108 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -192,7 +192,7 @@ softpipe_texture_blanket(struct pipe_screen * screen, spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); spt->stride[0] = stride[0]; - pipe_buffer_reference(screen->winsys, &spt->buffer, buffer); + pipe_buffer_reference(screen, &spt->buffer, buffer); return &spt->base; } @@ -208,7 +208,7 @@ softpipe_texture_release(struct pipe_screen *screen, if (--(*pt)->refcount <= 0) { struct softpipe_texture *spt = softpipe_texture(*pt); - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen, &spt->buffer, NULL); FREE(spt); } *pt = NULL; @@ -231,7 +231,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + pipe_buffer_reference(screen, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 1e4b98edb48..d70de8e3011 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -109,7 +109,7 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) /* XXX: thread safety issues! */ static INLINE void -pipe_buffer_reference(struct pipe_winsys *winsys, +winsys_buffer_reference(struct pipe_winsys *winsys, struct pipe_buffer **ptr, struct pipe_buffer *buf) { @@ -164,48 +164,48 @@ pipe_texture_release(struct pipe_texture **ptr) */ static INLINE struct pipe_buffer * -pipe_buffer_create( struct pipe_context *pipe, +pipe_buffer_create( struct pipe_screen *screen, unsigned alignment, unsigned usage, unsigned size ) { - return pipe->winsys->buffer_create(pipe->winsys, alignment, usage, size); + return screen->winsys->buffer_create(screen->winsys, alignment, usage, size); } static INLINE struct pipe_buffer * -pipe_user_buffer_create( struct pipe_context *pipe, void *ptr, unsigned size ) +pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size ) { - return pipe->winsys->user_buffer_create(pipe->winsys, ptr, size); + return screen->winsys->user_buffer_create(screen->winsys, ptr, size); } static INLINE void -pipe_buffer_destroy( struct pipe_context *pipe, struct pipe_buffer *buf ) +pipe_buffer_destroy( struct pipe_screen *screen, struct pipe_buffer *buf ) { - pipe->winsys->buffer_destroy(pipe->winsys, buf); + screen->winsys->buffer_destroy(screen->winsys, buf); } static INLINE void * -pipe_buffer_map(struct pipe_context *pipe, +pipe_buffer_map(struct pipe_screen *screen, struct pipe_buffer *buf, unsigned usage) { - return pipe->winsys->buffer_map(pipe->winsys, buf, usage); + return screen->winsys->buffer_map(screen->winsys, buf, usage); } static INLINE void -pipe_buffer_unmap(struct pipe_context *pipe, +pipe_buffer_unmap(struct pipe_screen *screen, struct pipe_buffer *buf) { - pipe->winsys->buffer_unmap(pipe->winsys, buf); + screen->winsys->buffer_unmap(screen->winsys, buf); } /* XXX when we're using this everywhere, get rid of - * pipe_buffer_reference() above. + * winsys_buffer_reference() above. */ static INLINE void -pipe_reference_buffer(struct pipe_context *pipe, +pipe_buffer_reference(struct pipe_screen *screen, struct pipe_buffer **ptr, struct pipe_buffer *buf) { - pipe_buffer_reference(pipe->winsys, ptr, buf); + winsys_buffer_reference(screen->winsys, ptr, buf); } diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 0b2621f7c31..231e07cd637 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -248,7 +248,7 @@ struct st_context { util_draw_vertex_buffer(pipe, vbuf, prim, num_verts, num_attribs); error2: - pipe_buffer_reference(pipe->winsys, &vbuf, NULL); + pipe_buffer_reference(pipe->screen, &vbuf, NULL); error1: ; } diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index f71d85dd9b4..bd71755f0b1 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -293,7 +293,7 @@ st_buffer_destroy(struct st_buffer *st_buf) { if(st_buf) { struct pipe_winsys *winsys = st_buf->st_dev->screen->winsys; - pipe_buffer_reference(winsys, &st_buf->buffer, NULL); + pipe_buffer_reference(pipe->screen, &st_buf->buffer, NULL); FREE(st_buf); } } diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index 2d4f5434b35..f62113a4691 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -221,7 +221,7 @@ st_softpipe_surface_release(struct pipe_winsys *winsys, surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/gallium/winsys/drm/intel/dri/intel_screen.c b/src/gallium/winsys/drm/intel/dri/intel_screen.c index 46d4861e77c..3a486481f56 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_screen.c +++ b/src/gallium/winsys/drm/intel/dri/intel_screen.c @@ -83,7 +83,7 @@ intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, buffer); /* Unref the buffer we don't need it anyways */ - pipe_buffer_reference(screen->winsys, &buffer, NULL); + pipe_buffer_reference(screen, &buffer, NULL); surface = screen->get_tex_surface(screen, texture, diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index ae81d7f801b..2fd190da52e 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -216,7 +216,7 @@ surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/gallium/winsys/gdi/wmesa.c b/src/gallium/winsys/gdi/wmesa.c index 730fb1b5417..ed3dd2b9277 100644 --- a/src/gallium/winsys/gdi/wmesa.c +++ b/src/gallium/winsys/gdi/wmesa.c @@ -463,7 +463,7 @@ wm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 68ead7f528e..70f01e0ef83 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -543,7 +543,7 @@ xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index 35c4ebc4ba3..b7c10b6bcae 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -308,7 +308,7 @@ aub_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index d3aadf5074d..d02e51cb9a5 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -73,8 +73,8 @@ void st_upload_constants( struct st_context *st, /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_reference_buffer(pipe, &cbuf->buffer, NULL ); - cbuf->buffer = pipe_buffer_create(pipe, 16, PIPE_BUFFER_USAGE_CONSTANT, + pipe_buffer_reference(pipe->screen, &cbuf->buffer, NULL ); + cbuf->buffer = pipe_buffer_create(pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, paramBytes ); if (0) @@ -86,10 +86,10 @@ void st_upload_constants( struct st_context *st, /* load Mesa constants into the constant buffer */ if (cbuf->buffer) { - void *map = pipe_buffer_map(pipe, cbuf->buffer, + void *map = pipe_buffer_map(pipe->screen, cbuf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(map, params->ParameterValues, paramBytes); - pipe_buffer_unmap(pipe, cbuf->buffer); + pipe_buffer_unmap(pipe->screen, cbuf->buffer); } cbuf->size = paramBytes; diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index a0c305d66ff..694104f9cfb 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -378,7 +378,7 @@ setup_bitmap_vertex_data(struct st_context *st, void *buf; if (!st->bitmap.vbuf) { - st->bitmap.vbuf = pipe_buffer_create(pipe, 32, PIPE_BUFFER_USAGE_VERTEX, + st->bitmap.vbuf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, sizeof(st->bitmap.vertices)); } @@ -418,9 +418,9 @@ setup_bitmap_vertex_data(struct st_context *st, } /* put vertex data into vbuf */ - buf = pipe_buffer_map(pipe, st->bitmap.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + buf = pipe_buffer_map(pipe->screen, st->bitmap.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(buf, st->bitmap.vertices, sizeof(st->bitmap.vertices)); - pipe_buffer_unmap(pipe, st->bitmap.vbuf); + pipe_buffer_unmap(pipe->screen, st->bitmap.vbuf); } @@ -779,7 +779,7 @@ st_destroy_bitmap(struct st_context *st) } if (st->bitmap.vbuf) { - pipe_buffer_destroy(pipe, st->bitmap.vbuf); + pipe_buffer_destroy(pipe->screen, st->bitmap.vbuf); st->bitmap.vbuf = NULL; } diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index af79aefa968..07fa2afce05 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -78,7 +78,7 @@ st_bufferobj_free(GLcontext *ctx, struct gl_buffer_object *obj) struct st_buffer_object *st_obj = st_buffer_object(obj); if (st_obj->buffer) - pipe_reference_buffer(pipe, &st_obj->buffer, NULL); + pipe_buffer_reference(pipe->screen, &st_obj->buffer, NULL); free(st_obj); } @@ -105,9 +105,9 @@ st_bufferobj_subdata(GLcontext *ctx, if (offset >= st_obj->size || size > (st_obj->size - offset)) return; - map = pipe_buffer_map(pipe, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = pipe_buffer_map(pipe->screen, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(map + offset, data, size); - pipe_buffer_unmap(pipe, st_obj->buffer); + pipe_buffer_unmap(pipe->screen, st_obj->buffer); } @@ -128,9 +128,9 @@ st_bufferobj_get_subdata(GLcontext *ctx, if (offset >= st_obj->size || size > (st_obj->size - offset)) return; - map = pipe_buffer_map(pipe, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(pipe->screen, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_READ); memcpy(data, map + offset, size); - pipe_buffer_unmap(pipe, st_obj->buffer); + pipe_buffer_unmap(pipe->screen, st_obj->buffer); } @@ -171,9 +171,9 @@ st_bufferobj_data(GLcontext *ctx, buffer_usage = 0; } - pipe_reference_buffer( pipe, &st_obj->buffer, NULL ); + pipe_buffer_reference( pipe->screen, &st_obj->buffer, NULL ); - st_obj->buffer = pipe_buffer_create( pipe, 32, buffer_usage, size ); + st_obj->buffer = pipe_buffer_create( pipe->screen, 32, buffer_usage, size ); st_obj->size = size; @@ -207,7 +207,7 @@ st_bufferobj_map(GLcontext *ctx, GLenum target, GLenum access, break; } - obj->Pointer = pipe_buffer_map(pipe, st_obj->buffer, flags); + obj->Pointer = pipe_buffer_map(pipe->screen, st_obj->buffer, flags); return obj->Pointer; } @@ -221,7 +221,7 @@ st_bufferobj_unmap(GLcontext *ctx, GLenum target, struct gl_buffer_object *obj) struct pipe_context *pipe = st_context(ctx)->pipe; struct st_buffer_object *st_obj = st_buffer_object(obj); - pipe_buffer_unmap(pipe, st_obj->buffer); + pipe_buffer_unmap(pipe->screen, st_obj->buffer); obj->Pointer = NULL; return GL_TRUE; } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index e475f022d34..013b9a9c9cc 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -116,7 +116,7 @@ st_destroy_clear(struct st_context *st) st->clear.vs = NULL; } if (st->clear.vbuf) { - pipe_buffer_destroy(pipe, st->clear.vbuf); + pipe_buffer_destroy(pipe->screen, st->clear.vbuf); st->clear.vbuf = NULL; } } @@ -152,7 +152,7 @@ draw_quad(GLcontext *ctx, void *buf; if (!st->clear.vbuf) { - st->clear.vbuf = pipe_buffer_create(pipe, 32, PIPE_BUFFER_USAGE_VERTEX, + st->clear.vbuf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, sizeof(st->clear.vertices)); } @@ -180,9 +180,9 @@ draw_quad(GLcontext *ctx, } /* put vertex data into vbuf */ - buf = pipe_buffer_map(pipe, st->clear.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + buf = pipe_buffer_map(pipe->screen, st->clear.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(buf, st->clear.vertices, sizeof(st->clear.vertices)); - pipe_buffer_unmap(pipe, st->clear.vbuf); + pipe_buffer_unmap(pipe->screen, st->clear.vbuf); /* draw */ util_draw_vertex_buffer(pipe, st->clear.vbuf, diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 4ec7c752dfe..00bbcae32ae 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -487,17 +487,17 @@ draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, ubyte *map; /* allocate/load buffer object with vertex data */ - buf = pipe_buffer_create(pipe, 32, PIPE_BUFFER_USAGE_VERTEX, + buf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, sizeof(verts)); - map = pipe_buffer_map(pipe, buf, PIPE_BUFFER_USAGE_CPU_WRITE); + map = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(map, verts, sizeof(verts)); - pipe_buffer_unmap(pipe, buf); + pipe_buffer_unmap(pipe->screen, buf); util_draw_vertex_buffer(pipe, buf, PIPE_PRIM_QUADS, 4, /* verts */ 3); /* attribs/vert */ - pipe_buffer_reference(pipe->winsys, &buf, NULL); + pipe_buffer_reference(pipe->screen, &buf, NULL); } } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 83b0be06daa..08d4db7f7f4 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -196,7 +196,7 @@ static void st_destroy_context_priv( struct st_context *st ) for (i = 0; i < Elements(st->state.constants); i++) { if (st->state.constants[i].buffer) { - pipe_reference_buffer(st->pipe, &st->state.constants[i].buffer, NULL); + pipe_buffer_reference(st->pipe->screen, &st->state.constants[i].buffer, NULL); } } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 2c807011864..bdf8648ef7c 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -229,7 +229,7 @@ setup_edgeflags(GLcontext *ctx, GLenum primMode, GLint start, GLint count, if (!vec) return NULL; - map = pipe_buffer_map(pipe, stobj->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(pipe->screen, stobj->buffer, PIPE_BUFFER_USAGE_CPU_READ); map = ADD_POINTERS(map, array->Ptr); for (i = 0; i < count; i++) { @@ -239,7 +239,7 @@ setup_edgeflags(GLcontext *ctx, GLenum primMode, GLint start, GLint count, map += array->StrideB; } - pipe_buffer_unmap(pipe, stobj->buffer); + pipe_buffer_unmap(pipe->screen, stobj->buffer); pipe->set_edgeflags(pipe, vec); @@ -373,13 +373,13 @@ setup_interleaved_attribs(GLcontext *ctx, get_user_arrays_bounds(vp, arrays, max_index, &low, &high); /*printf("user buffer range: %p %p %d\n", low, high, high-low);*/ vbuffer->buffer = - pipe_user_buffer_create(pipe, (void *) low, high - low); + pipe_user_buffer_create(pipe->screen, (void *) low, high - low); vbuffer->buffer_offset = 0; offset0 = low; } else { vbuffer->buffer = NULL; - pipe_reference_buffer(pipe, &vbuffer->buffer, stobj->buffer); + pipe_buffer_reference(pipe->screen, &vbuffer->buffer, stobj->buffer); vbuffer->buffer_offset = (unsigned) arrays[mesaAttr]->Ptr; offset0 = arrays[mesaAttr]->Ptr; } @@ -432,7 +432,7 @@ setup_non_interleaved_attribs(GLcontext *ctx, /*printf("stobj %u = %p\n", attr, (void*) stobj);*/ vbuffer[attr].buffer = NULL; - pipe_reference_buffer(pipe, &vbuffer[attr].buffer, stobj->buffer); + pipe_buffer_reference(pipe->screen, &vbuffer[attr].buffer, stobj->buffer); vbuffer[attr].buffer_offset = (unsigned) arrays[mesaAttr]->Ptr; velements[attr].src_offset = 0; } @@ -451,13 +451,13 @@ setup_non_interleaved_attribs(GLcontext *ctx, bytes = arrays[mesaAttr]->Size * _mesa_sizeof_type(arrays[mesaAttr]->Type); } - vbuffer[attr].buffer = pipe_user_buffer_create(pipe, + vbuffer[attr].buffer = pipe_user_buffer_create(pipe->screen, (void *) arrays[mesaAttr]->Ptr, bytes); } else { /* no array, use ctx->Current.Attrib[] value */ bytes = sizeof(ctx->Current.Attrib[0]); - vbuffer[attr].buffer = pipe_user_buffer_create(pipe, + vbuffer[attr].buffer = pipe_user_buffer_create(pipe->screen, (void *) ctx->Current.Attrib[mesaAttr], bytes); stride = 0; } @@ -581,12 +581,12 @@ st_draw_vbo(GLcontext *ctx, if (bufobj && bufobj->Name) { /* elements/indexes are in a real VBO */ struct st_buffer_object *stobj = st_buffer_object(bufobj); - pipe_reference_buffer(pipe, &indexBuf, stobj->buffer); + pipe_buffer_reference(pipe->screen, &indexBuf, stobj->buffer); indexOffset = (unsigned) ib->ptr / indexSize; } else { /* element/indicies are in user space memory */ - indexBuf = pipe_user_buffer_create(pipe, (void *) ib->ptr, + indexBuf = pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, ib->count * indexSize); indexOffset = 0; } @@ -621,7 +621,7 @@ st_draw_vbo(GLcontext *ctx, } } - pipe_reference_buffer(pipe, &indexBuf, NULL); + pipe_buffer_reference(pipe->screen, &indexBuf, NULL); } else { /* non-indexed */ @@ -637,7 +637,7 @@ st_draw_vbo(GLcontext *ctx, /* unreference buffers (frees wrapped user-space buffer objects) */ for (attr = 0; attr < num_vbuffers; attr++) { - pipe_reference_buffer(pipe, &vbuffer[attr].buffer, NULL); + pipe_buffer_reference(pipe->screen, &vbuffer[attr].buffer, NULL); assert(!vbuffer[attr].buffer); } pipe->set_vertex_buffers(pipe, vp->num_inputs, vbuffer); @@ -750,7 +750,7 @@ st_feedback_draw_vbo(GLcontext *ctx, assert(stobj->buffer); vbuffers[attr].buffer = NULL; - pipe_reference_buffer(pipe, &vbuffers[attr].buffer, stobj->buffer); + pipe_buffer_reference(pipe->screen, &vbuffers[attr].buffer, stobj->buffer); vbuffers[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ velements[attr].src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; } @@ -762,7 +762,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* wrap user data */ vbuffers[attr].buffer - = pipe_user_buffer_create(pipe, (void *) arrays[mesaAttr]->Ptr, + = pipe_user_buffer_create(pipe->screen, (void *) arrays[mesaAttr]->Ptr, bytes); vbuffers[attr].buffer_offset = 0; velements[attr].src_offset = 0; @@ -784,7 +784,7 @@ st_feedback_draw_vbo(GLcontext *ctx, #endif /* map the attrib buffer */ - map = pipe_buffer_map(pipe, vbuffers[attr].buffer, + map = pipe_buffer_map(pipe->screen, vbuffers[attr].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, attr, map); } @@ -812,7 +812,7 @@ st_feedback_draw_vbo(GLcontext *ctx, return; } - map = pipe_buffer_map(pipe, index_buffer_handle, + map = pipe_buffer_map(pipe->screen, index_buffer_handle, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, map); } @@ -823,7 +823,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* map constant buffers */ - mapped_constants = pipe_buffer_map(pipe, + mapped_constants = pipe_buffer_map(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_constant_buffer(st->draw, mapped_constants, @@ -837,20 +837,20 @@ st_feedback_draw_vbo(GLcontext *ctx, /* unmap constant buffers */ - pipe_buffer_unmap(pipe, st->state.constants[PIPE_SHADER_VERTEX].buffer); + pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX].buffer); /* * unmap vertex/index buffers */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (draw->pt.vertex_buffer[i].buffer) { - pipe_buffer_unmap(pipe, draw->pt.vertex_buffer[i].buffer); - pipe_reference_buffer(pipe, &draw->pt.vertex_buffer[i].buffer, NULL); + pipe_buffer_unmap(pipe->screen, draw->pt.vertex_buffer[i].buffer); + pipe_buffer_reference(pipe->screen, &draw->pt.vertex_buffer[i].buffer, NULL); draw_set_mapped_vertex_buffer(draw, i, NULL); } } if (ib) { - pipe_buffer_unmap(pipe, index_buffer_handle); + pipe_buffer_unmap(pipe->screen, index_buffer_handle); draw_set_mapped_element_buffer(draw, 0, NULL); } } diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 6db9bc0dd58..b9d114b1c97 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -128,10 +128,10 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcData = (ubyte *) pipe_buffer_map(pipe, srcSurf->buffer, + srcData = (ubyte *) pipe_buffer_map(pipe->screen, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset; - dstData = (ubyte *) pipe_buffer_map(pipe, dstSurf->buffer, + dstData = (ubyte *) pipe_buffer_map(pipe->screen, dstSurf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset; @@ -144,8 +144,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstSurf->stride, /* stride in bytes */ dstData); - pipe_buffer_unmap(pipe, srcSurf->buffer); - pipe_buffer_unmap(pipe, dstSurf->buffer); + pipe_buffer_unmap(pipe->screen, srcSurf->buffer); + pipe_buffer_unmap(pipe->screen, dstSurf->buffer); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); -- cgit v1.2.3 From 01a76f4fee3dda55447dc93e6198ede105bdaef0 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 27 Aug 2008 16:45:41 -0600 Subject: cell: add missing cell_tex_surface_release() --- src/gallium/drivers/cell/ppu/cell_texture.c | 39 +++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 5c01aa21b85..486e6cfb97a 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -85,8 +85,8 @@ cell_texture_layout(struct cell_texture * spt) static struct pipe_texture * -cell_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) +cell_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) { struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = CALLOC_STRUCT(cell_texture); @@ -113,8 +113,8 @@ cell_texture_create_screen(struct pipe_screen *screen, static void -cell_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) +cell_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -148,10 +148,10 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, static struct pipe_surface * -cell_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned usage) +cell_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned usage) { struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = cell_texture(pt); @@ -285,6 +285,21 @@ cell_update_texture_mapping(struct cell_context *cell) } +static void +cell_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) +{ + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For softpipe, nothing to do. + */ + assert ((*s)->texture); + pipe_texture_reference(&(*s)->texture, NULL); + + screen->winsys->surface_release(screen->winsys, s); +} + + static void * cell_surface_map( struct pipe_screen *screen, struct pipe_surface *surface, @@ -336,9 +351,11 @@ cell_init_texture_functions(struct cell_context *cell) void cell_init_screen_texture_funcs(struct pipe_screen *screen) { - screen->texture_create = cell_texture_create_screen; - screen->texture_release = cell_texture_release_screen; - screen->get_tex_surface = cell_get_tex_surface_screen; + screen->texture_create = cell_texture_create; + screen->texture_release = cell_texture_release; + + screen->get_tex_surface = cell_get_tex_surface; + screen->tex_surface_release = cell_tex_surface_release; screen->surface_map = cell_surface_map; screen->surface_unmap = cell_surface_unmap; -- cgit v1.2.3 From fafc36920eb79ddbe7049f6bbce18bcc279982d0 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 09:31:36 -0600 Subject: cell: add -DDEBUG flag, fixes to Cell Makefiles --- configs/linux-cell-debug | 2 +- src/gallium/drivers/cell/ppu/Makefile | 2 +- src/gallium/drivers/cell/spu/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/configs/linux-cell-debug b/configs/linux-cell-debug index ba333bba686..42f3245edc9 100644 --- a/configs/linux-cell-debug +++ b/configs/linux-cell-debug @@ -6,5 +6,5 @@ include $(TOP)/configs/linux-cell CONFIG_NAME = linux-cell-debug -OPT_FLAGS = -g +OPT_FLAGS = -g -DDEBUG diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 0389a9554cf..25473e200cf 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -5,7 +5,7 @@ TOP = ../../../../.. -include $(TOP)/configs/linux-cell +include $(TOP)/configs/current # This is the "top-level" cell PPU driver code, will get pulled into libGL.so diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 8e83610790e..d49abb2e825 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -5,7 +5,7 @@ TOP = ../../../../.. -include $(TOP)/configs/linux-cell +include $(TOP)/configs/current PROG = g3d -- cgit v1.2.3 From 8b8952aa69bbaa0f87526cd08aaca7659595a675 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 11:43:11 -0600 Subject: cell: fix texture/surface allocation to allocate by multiple of tile size This fixes the garbage blocks/pixels seen along the bottom of some windows. --- src/gallium/drivers/cell/ppu/cell_texture.c | 32 +++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 486e6cfb97a..452ff132693 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -63,19 +63,30 @@ cell_texture_layout(struct cell_texture * spt) spt->buffer_size = 0; for ( level = 0 ; level <= pt->last_level ; level++ ) { + unsigned size; + unsigned w_tile, h_tile; + + /* width, height, rounded up to tile size */ + w_tile = align(width, TILE_SIZE); + h_tile = align(height, TILE_SIZE); + pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); spt->stride[level] = pt->nblocksx[level] * pt->block.size; spt->level_offset[level] = spt->buffer_size; - spt->buffer_size += (pt->nblocksy[level] * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - pt->nblocksx[level] * pt->block.size); + size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + if (pt->target == PIPE_TEXTURE_CUBE) + size *= 6; + else + size *= depth; + + spt->buffer_size += size; width = minify(width); height = minify(height); @@ -138,6 +149,7 @@ cell_texture_release(struct pipe_screen *screen, } +#if 0 static void cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, uint face, uint levelsMask) @@ -145,6 +157,7 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, /* XXX TO DO: re-tile the texture data ... */ } +#endif static struct pipe_surface * @@ -179,7 +192,8 @@ cell_get_tex_surface(struct pipe_screen *screen, ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * ps->nblocksy * ps->stride; - } else { + } + else { assert(face == 0); assert(zslice == 0); } @@ -189,6 +203,11 @@ cell_get_tex_surface(struct pipe_screen *screen, +/** + * Copy tile data from linear layout to tiled layout. + * XXX this should be rolled into the future surface-creation code. + * XXX also need "untile" code... + */ static void tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) { @@ -219,6 +238,7 @@ tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) /** * Convert linear texture image data to tiled format for SPU usage. + * XXX recast this in terms of pipe_surfaces (aka texture views). */ static void cell_tile_texture(struct cell_context *cell, -- cgit v1.2.3 From ba2812f23e05c63e0ea2a042dcb788c30fcbc37f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 11:45:28 -0600 Subject: cell: comments --- src/gallium/drivers/cell/spu/spu_tile.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c index 12dc2463283..216a33126b7 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.c +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -31,6 +31,9 @@ #include "spu_main.h" +/** + * Get tile of color or Z values from main memory, put into SPU memory. + */ void get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) { @@ -56,6 +59,9 @@ get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) } +/** + * Move tile of color or Z values from SPU memory to main memory. + */ void put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) { -- cgit v1.2.3 From b035c85b3e3eaae071f1401f23be40b16cdee34f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 13:30:46 -0600 Subject: cell: updated assertion --- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index d9e3b510dc0..995ae341d70 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -45,7 +45,7 @@ cell_surface_copy(struct pipe_context *pipe, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - assert( dst->cpp == src->cpp ); + assert( dst->format == src->format ); pipe_copy_rect(pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE), &dst->block, -- cgit v1.2.3 From 439dca49920018e557d70b828f10aa815a8a9066 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 13:31:14 -0600 Subject: cell: add pipe_texture_reference() call in cell_get_tex_surface() --- src/gallium/drivers/cell/ppu/cell_texture.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 452ff132693..0fe525170be 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -187,6 +187,10 @@ cell_get_tex_surface(struct pipe_screen *screen, /* XXX may need to override usage flags (see sp_texture.c) */ + pipe_texture_reference(&ps->texture, pt); + ps->face = face; + ps->level = level; + ps->zslice = zslice; if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * -- cgit v1.2.3 From 6c84652dc3877593b8b151366521289833707b40 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 13:31:51 -0600 Subject: cell: replace assert() with special spu ASSERT() macro --- src/gallium/drivers/cell/spu/spu_exec.c | 150 ++++++++++----------- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 4 +- src/gallium/drivers/cell/spu/spu_util.c | 21 +-- src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 2 +- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 2 +- 5 files changed, 90 insertions(+), 89 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 89c61136a4c..e27df2dfb38 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -382,10 +382,10 @@ fetch_src_file_channel( break; case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - assert( index->i[1] < (int) mach->ImmLimit ); - assert( index->i[2] < (int) mach->ImmLimit ); - assert( index->i[3] < (int) mach->ImmLimit ); + ASSERT( index->i[0] < (int) mach->ImmLimit ); + ASSERT( index->i[1] < (int) mach->ImmLimit ); + ASSERT( index->i[2] < (int) mach->ImmLimit ); + ASSERT( index->i[3] < (int) mach->ImmLimit ); chan->f[0] = mach->Imms[index->i[0]][swizzle]; chan->f[1] = mach->Imms[index->i[1]][swizzle]; @@ -409,7 +409,7 @@ fetch_src_file_channel( break; default: - assert( 0 ); + ASSERT( 0 ); } break; @@ -422,7 +422,7 @@ fetch_src_file_channel( break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -471,7 +471,7 @@ fetch_source( index.q = si_shli(index.q, 12); break; default: - assert( 0 ); + ASSERT( 0 ); } index.i[0] += reg->SrcRegisterDim.Index; @@ -558,7 +558,7 @@ store_dest( break; default: - assert( 0 ); + ASSERT( 0 ); return; } @@ -582,11 +582,11 @@ store_dest( break; case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); + ASSERT( 0 ); break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -769,7 +769,7 @@ exec_tex(struct spu_exec_machine *mach, break; default: - assert (0); + ASSERT (0); } FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { @@ -861,7 +861,7 @@ exec_declaration(struct spu_exec_machine *mach, break; default: - assert( 0 ); + ASSERT( 0 ); } if( mask == TGSI_WRITEMASK_XYZW ) { @@ -971,11 +971,11 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_LOG: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_MUL: @@ -1151,24 +1151,24 @@ exec_instruction( break; case TGSI_OPCODE_CND: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CND0: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_DOT2ADD: /* TGSI_OPCODE_DP2A */ - assert (0); + ASSERT (0); break; case TGSI_OPCODE_INDEX: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_NEGATE: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_FRAC: @@ -1181,7 +1181,7 @@ exec_instruction( break; case TGSI_OPCODE_CLAMP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_FLOOR: @@ -1276,7 +1276,7 @@ exec_instruction( break; case TGSI_OPCODE_MULTIPLYMATRIX: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ABS: @@ -1290,7 +1290,7 @@ exec_instruction( break; case TGSI_OPCODE_RCC: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_DPH: @@ -1353,23 +1353,23 @@ exec_instruction( break; case TGSI_OPCODE_PK2H: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK2US: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK4B: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK4UB: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_RFL: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_SEQ: @@ -1384,7 +1384,7 @@ exec_instruction( break; case TGSI_OPCODE_SFL: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_SGT: @@ -1429,7 +1429,7 @@ exec_instruction( break; case TGSI_OPCODE_STR: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TEX: @@ -1452,7 +1452,7 @@ exec_instruction( /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXL: @@ -1470,35 +1470,35 @@ exec_instruction( break; case TGSI_OPCODE_UP2H: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP2US: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP4B: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP4UB: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_X2D: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ARA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ARR: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_BRA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CAL: @@ -1507,14 +1507,14 @@ exec_instruction( /* do the call */ /* push the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* note that PC was already incremented above */ @@ -1538,13 +1538,13 @@ exec_instruction( *pc = mach->CallStack[--mach->CallStackTop]; /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); + ASSERT(mach->LoopStackTop > 0); mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->FuncStackTop > 0); + ASSERT(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; UPDATE_EXEC_MASK(mach); @@ -1552,7 +1552,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CMP: @@ -1592,11 +1592,11 @@ exec_instruction( break; case TGSI_OPCODE_NRM: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_DIV: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_DP2: @@ -1615,7 +1615,7 @@ exec_instruction( case TGSI_OPCODE_IF: /* push CondMask */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; FETCH( &r[0], 0, CHAN_X ); /* update CondMask */ @@ -1639,7 +1639,7 @@ exec_instruction( /* invert CondMask wrt previous mask */ { uint prevMask; - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); prevMask = mach->CondStack[mach->CondStackTop - 1]; mach->CondMask = ~mach->CondMask & prevMask; UPDATE_EXEC_MASK(mach); @@ -1649,7 +1649,7 @@ exec_instruction( case TGSI_OPCODE_ENDIF: /* pop CondMask */ - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); mach->CondMask = mach->CondStack[--mach->CondStackTop]; UPDATE_EXEC_MASK(mach); break; @@ -1660,19 +1660,19 @@ exec_instruction( break; case TGSI_OPCODE_REP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ENDREP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PUSHA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_POPA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CEIL: @@ -1746,7 +1746,7 @@ exec_instruction( break; case TGSI_OPCODE_MOD: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_XOR: @@ -1759,15 +1759,15 @@ exec_instruction( break; case TGSI_OPCODE_SAD: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXF: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXQ: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_EMIT: @@ -1784,9 +1784,9 @@ exec_instruction( /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP2: /* push LoopMask and ContMasks */ - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; @@ -1794,7 +1794,7 @@ exec_instruction( /* fall-through (for now at least) */ case TGSI_OPCODE_ENDLOOP2: /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; if (mach->LoopMask) { /* repeat loop: jump to instruction just past BGNLOOP */ @@ -1802,10 +1802,10 @@ exec_instruction( } else { /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); + ASSERT(mach->LoopStackTop > 0); mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; /* pop ContMask */ - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; } UPDATE_EXEC_MASK(mach); @@ -1834,26 +1834,26 @@ exec_instruction( break; case TGSI_OPCODE_NOISE1: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE2: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE3: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE4: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOP: break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -1874,11 +1874,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; - mach->CondStackTop = 0; /* temporarily subvert this assertion */ - assert(mach->CondStackTop == 0); - assert(mach->LoopStackTop == 0); - assert(mach->ContStackTop == 0); - assert(mach->CallStackTop == 0); + mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ + ASSERT(mach->CondStackTop == 0); + ASSERT(mach->LoopStackTop == 0); + ASSERT(mach->ContStackTop == 0); + ASSERT(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index b4cffeeb32a..c0a729b3d2c 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -95,7 +95,7 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y, default: - assert(0); + ASSERT(0); break; } } @@ -153,7 +153,7 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, default: - assert(0); + ASSERT(0); break; } } diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index b25ca4eafc0..b8a0d4a265f 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,4 +1,5 @@ +#include "cell/common.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_debug.h" #include "tgsi/tgsi_parse.h" @@ -20,7 +21,7 @@ tgsi_util_get_src_register_swizzle( case 3: return reg->SwizzleW; default: - assert( 0 ); + ASSERT( 0 ); } return 0; } @@ -40,7 +41,7 @@ tgsi_util_get_src_register_extswizzle( case 3: return reg->ExtSwizzleW; default: - assert( 0 ); + ASSERT( 0 ); } return 0; } @@ -60,12 +61,12 @@ tgsi_util_get_full_src_register_extswizzle( ®->SrcRegisterExtSwz, component ); - assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); /* * Second, calculate the simple swizzle for the unswizzled channel index. @@ -95,7 +96,7 @@ tgsi_util_get_src_register_extnegate( case 3: return reg->NegateW; default: - assert( 0 ); + ASSERT( 0 ); } return 0; } @@ -120,7 +121,7 @@ tgsi_util_set_src_register_extnegate( reg->NegateW = negate; break; default: - assert( 0 ); + ASSERT( 0 ); } } diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 26f23637492..03375d84a57 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -92,7 +92,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned nr_attrs = draw->vertex_fetch.nr_attrs; unsigned attr; - assert(count <= 4); + ASSERT(count <= 4); #if DRAW_DBG printf("SPU: %s count = %u, nr_attrs = %u\n", diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index f81d19fea1c..fbe5b34d397 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -112,7 +112,7 @@ run_vertex_program(struct spu_vs_context *draw, const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; - assert(count <= 4); + ASSERT(count <= 4); machine->Processor = TGSI_PROCESSOR_VERTEX; -- cgit v1.2.3 From f89b74d97eff6c9a9875475af34fb15974ad75a2 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 14:31:11 -0600 Subject: cell: change context type passed to cell_flush_int() --- src/gallium/drivers/cell/ppu/cell_flush.c | 5 ++--- src/gallium/drivers/cell/ppu/cell_flush.h | 2 +- src/gallium/drivers/cell/ppu/cell_vbuf.c | 7 ++++--- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 3aaf3de6684..ff0d61764bc 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -50,16 +50,15 @@ cell_flush(struct pipe_context *pipe, unsigned flags, flags |= CELL_FLUSH_WAIT; draw_flush( cell->draw ); - cell_flush_int(pipe, flags); + cell_flush_int(cell, flags); } /** internal flush */ void -cell_flush_int(struct pipe_context *pipe, unsigned flags) +cell_flush_int(struct cell_context *cell, unsigned flags) { static boolean flushing = FALSE; /* recursion catcher */ - struct cell_context *cell = cell_context(pipe); uint i; ASSERT(!flushing); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h index 8f0645c4293..509ae6239ac 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -36,7 +36,7 @@ cell_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence); extern void -cell_flush_int(struct pipe_context *pipe, unsigned flags); +cell_flush_int(struct cell_context *cell, unsigned flags); extern void cell_flush_buffer_range(struct cell_context *cell, void *ptr, diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index e4230c7a5ff..1bf0a24bccc 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -113,7 +113,7 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, } cvbr->vertex_buf = ~0; - cell_flush_int(&cell->pipe, 0x0); + cell_flush_int(cell, 0x0); assert(vertices == cvbr->vertex_buffer); cvbr->vertex_buffer = NULL; @@ -121,12 +121,13 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, -static void +static boolean cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); cvbr->prim = prim; /*printf("cell_set_prim %u\n", prim);*/ + return TRUE; } @@ -244,7 +245,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, #if 0 /* helpful for debug */ - cell_flush_int(&cell->pipe, CELL_FLUSH_WAIT); + cell_flush_int(cell, CELL_FLUSH_WAIT); #endif } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 3658947715f..2b10c116fa3 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -135,7 +135,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) vs->num_elts = n; send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); - cell_flush_int(& cell->pipe, CELL_FLUSH_WAIT); + cell_flush_int(cell, CELL_FLUSH_WAIT); } draw->vs.post_nr = draw->vs.queue_nr; -- cgit v1.2.3 From e082298e3115d12e2acc80763a49acfb762072ae Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 14:36:56 -0600 Subject: cell: update comments, fix typos --- src/gallium/drivers/cell/ppu/cell_batch.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index f45e5f25b64..5d007d4c8ce 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -93,11 +93,11 @@ cell_batch_flush(struct cell_context *cell) /* printf("cell_batch_dispatch: buf %u at %p, size %u\n", - batch, &cell->batch_buffer[batch][0], size); + batch, &cell->buffer[batch][0], size); */ /* - * Build "BATCH" command and sent to all SPUs. + * Build "BATCH" command and send to all SPUs. */ cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); @@ -130,6 +130,8 @@ cell_batch_free_space(const struct cell_context *cell) /** * Append data to current batch. + * \param data address of block of bytes to append + * \param bytes size of block of bytes */ void cell_batch_append(struct cell_context *cell, const void *data, uint bytes) @@ -165,6 +167,10 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes) } +/** + * Allocate space in the current batch buffer for 'bytes' space. + * \return address in batch buffer to put data + */ void * cell_batch_alloc(struct cell_context *cell, uint bytes) { @@ -172,6 +178,10 @@ cell_batch_alloc(struct cell_context *cell, uint bytes) } +/** + * Same as \sa cell_batch_alloc, but return an address at a particular + * alignment. + */ void * cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, uint alignment) -- cgit v1.2.3 From e5085b83d0e3e16fcdc67bdab5cf7678d35984d4 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 3 Sep 2008 14:38:02 -0600 Subject: cell: flush rendering to current surfaces before installing new ones This fixes crashes when resizing windows. --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index fe5437023b9..a11ffac3932 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -34,6 +34,7 @@ #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cell_context.h" +#include "cell_flush.h" #include "cell_state.h" #include "cell_texture.h" #include "cell_state_per_fragment.h" @@ -310,8 +311,21 @@ cell_set_framebuffer_state(struct pipe_context *pipe, cell->zsbuf_map = NULL; } - /* update my state */ - cell->framebuffer = *fb; + /* Finish any pending rendering to the current surface before + * installing a new surface! + */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + + /* update my state + * (this is also where old surfaces will finally get freed) + */ + cell->framebuffer.width = fb->width; + cell->framebuffer.height = fb->height; + cell->framebuffer.num_cbufs = fb->num_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); + } + pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); /* map new surfaces */ if (csurf) -- cgit v1.2.3 From c93fff86ea5f8fb175159652e5b3ffa3882c2c2b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 4 Sep 2008 09:33:27 +0900 Subject: softpipe: Use pipe_buffer_* inlines as much as possible. --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 8 ++++---- src/gallium/drivers/softpipe/sp_texture.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 12b44a82118..54f7bfa88af 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -135,7 +135,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < sp->num_vertex_buffers; i++) { void *buf - = pipe->winsys->buffer_map(pipe->winsys, + = pipe_buffer_map(pipe->screen, sp->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -143,7 +143,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + = pipe_buffer_map(pipe->screen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(draw, indexSize, min_index, @@ -164,11 +164,11 @@ softpipe_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe_buffer_unmap(pipe->screen, indexBuffer); } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index c283e3e4108..cb48035771b 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -307,7 +307,7 @@ softpipe_surface_map( struct pipe_screen *screen, return NULL; } - map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + map = pipe_buffer_map( screen, surface->buffer, flags ); if (map == NULL) return NULL; @@ -331,7 +331,7 @@ static void softpipe_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); + pipe_buffer_unmap( screen, surface->buffer ); } -- cgit v1.2.3 From 65a094101f8463cd0d26104a25a77f24bc2f6949 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 4 Sep 2008 09:33:47 +0900 Subject: i965: Use pipe_buffer_* inlines as much as possible. --- src/gallium/drivers/i965simple/brw_state_pool.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index d0dc1ef74d6..007dc8f9deb 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -92,10 +92,10 @@ static void brw_init_pool( struct brw_context *brw, pool->size = size; pool->brw = brw; - pool->buffer = brw->pipe.winsys->buffer_create(brw->pipe.winsys, - 4096, - 0 /* DRM_BO_FLAG_MEM_TT */, - size); + pool->buffer = pipe_buffer_create(brw->pipe.screen, + 4096, + 0 /* DRM_BO_FLAG_MEM_TT */, + size); } static void brw_destroy_pool( struct brw_context *brw, @@ -103,7 +103,7 @@ static void brw_destroy_pool( struct brw_context *brw, { struct brw_mem_pool *pool = &brw->pool[pool_id]; - winsys_buffer_reference( pool->brw->pipe.winsys, + pipe_buffer_reference( pool->brw->pipe.screen, &pool->buffer, NULL ); } -- cgit v1.2.3 From 34cc7f5f1990e8e84628c24677a6bc715aeaf661 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 4 Sep 2008 09:34:04 +0900 Subject: i915: Use pipe_buffer_* inlines as much as possible. --- src/gallium/drivers/i915simple/i915_context.c | 8 +++--- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 38 ++++++++++++------------- src/gallium/drivers/i915simple/i915_screen.c | 5 ++-- 3 files changed, 26 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index c6776716a2f..4d9f2f030d7 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -72,7 +72,7 @@ i915_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < i915->num_vertex_buffers; i++) { void *buf - = pipe->winsys->buffer_map(pipe->winsys, + = pipe_buffer_map(pipe->screen, i915->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -80,7 +80,7 @@ i915_draw_range_elements(struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + = pipe_buffer_map(pipe->screen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(draw, indexSize, min_index, @@ -105,11 +105,11 @@ i915_draw_range_elements(struct pipe_context *pipe, * unmap vertex/index buffers */ for (i = 0; i < i915->num_vertex_buffers; i++) { - pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer); + pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); } if (indexBuffer) { - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 9397a2ca1a8..4fda1ab64f5 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -115,7 +115,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct pipe_winsys *winsys = i915->pipe.winsys; + struct pipe_screen *screen = i915->pipe.screen; size_t size = (size_t)vertex_size * (size_t)nr_vertices; /* FIXME: handle failure */ @@ -124,20 +124,20 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { } else { i915->vbo_flushed = 0; - winsys_buffer_reference(winsys, &i915_render->vbo, NULL); + pipe_buffer_reference(screen, &i915_render->vbo, NULL); } if (!i915_render->vbo) { i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); i915_render->vbo_offset = 0; - i915_render->vbo = winsys->buffer_create(winsys, - 64, - I915_BUFFER_USAGE_LIT_VERTEX, - i915_render->vbo_size); - i915_render->vbo_ptr = winsys->buffer_map(winsys, - i915_render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - winsys->buffer_unmap(winsys, i915_render->vbo); + i915_render->vbo = pipe_buffer_create(screen, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, i915_render->vbo); } i915->vbo = i915_render->vbo; @@ -488,7 +488,7 @@ static struct vbuf_render * i915_vbuf_render_create( struct i915_context *i915 ) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); - struct pipe_winsys *winsys = i915->pipe.winsys; + struct pipe_screen *screen = i915->pipe.screen; i915_render->i915 = i915; @@ -510,14 +510,14 @@ i915_vbuf_render_create( struct i915_context *i915 ) i915_render->vbo_alloc_size = 128 * 4096; i915_render->vbo_size = i915_render->vbo_alloc_size; i915_render->vbo_offset = 0; - i915_render->vbo = winsys->buffer_create(winsys, - 64, - I915_BUFFER_USAGE_LIT_VERTEX, - i915_render->vbo_size); - i915_render->vbo_ptr = winsys->buffer_map(winsys, - i915_render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - winsys->buffer_unmap(winsys, i915_render->vbo); + i915_render->vbo = pipe_buffer_create(screen, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, i915_render->vbo); return &i915_render->base; } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index e9e40c3f0b2..1c976082df7 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -28,6 +28,7 @@ #include "util/u_memory.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "util/u_string.h" #include "i915_reg.h" @@ -207,7 +208,7 @@ i915_surface_map( struct pipe_screen *screen, struct pipe_surface *surface, unsigned flags ) { - char *map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + char *map = pipe_buffer_map( screen, surface->buffer, flags ); if (map == NULL) return NULL; @@ -226,7 +227,7 @@ static void i915_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); + pipe_buffer_unmap( screen, surface->buffer ); } -- cgit v1.2.3 From dffad1751e953c10742d5aee191d6f07482cdeea Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 4 Sep 2008 09:34:12 +0900 Subject: cell: Use pipe_buffer_* inlines as much as possible. --- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 8 ++++---- src/gallium/drivers/cell/ppu/cell_texture.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index f02dffe1245..41c85ecd6c8 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -116,7 +116,7 @@ cell_draw_range_elements(struct pipe_context *pipe, * Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf = pipe->winsys->buffer_map(pipe->winsys, + void *buf = pipe_buffer_map(pipe->screen, sp->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); @@ -124,7 +124,7 @@ cell_draw_range_elements(struct pipe_context *pipe, } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, + void *mapped_indexes = pipe_buffer_map(pipe->screen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); @@ -143,11 +143,11 @@ cell_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe_buffer_unmap(pipe->screen, indexBuffer); } /* Note: leave drawing surfaces mapped */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 0fe525170be..34d002c3dc3 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -336,7 +336,7 @@ cell_surface_map( struct pipe_screen *screen, return NULL; } - map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + map = pipe_buffer_map( screen, surface->buffer, flags ); if (map == NULL) return NULL; @@ -362,7 +362,7 @@ static void cell_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); + pipe_buffer_unmap( screen, surface->buffer ); } -- cgit v1.2.3 From f754b45befee2e1c6650ed7c7ad13c82fc7b598b Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 4 Sep 2008 11:14:59 +0900 Subject: i915: Add missing include. --- src/gallium/drivers/i915simple/i915_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 4d9f2f030d7..6dd3eda85dc 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -35,6 +35,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "util/u_memory.h" #include "pipe/p_screen.h" -- cgit v1.2.3 From 1d7a213dad0fcc9aa16cdd36eff64d79ec37ae85 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Thu, 4 Sep 2008 11:15:08 +0900 Subject: softpipe: Add missing include. --- src/gallium/drivers/softpipe/sp_draw_arrays.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 54f7bfa88af..424bd568460 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "sp_context.h" #include "sp_state.h" -- cgit v1.2.3 From dd82c06ca371bacfda77d0eece5e29adee47e058 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 4 Sep 2008 10:32:29 -0600 Subject: softpipe: use the new util_surface_copy/fill() functions --- src/gallium/drivers/softpipe/sp_surface.c | 122 +----------------------------- 1 file changed, 2 insertions(+), 120 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 389aceb27ce..6ade7326982 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -25,132 +25,14 @@ * **************************************************************************/ -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" -#include "util/u_tile.h" #include "util/u_rect.h" #include "sp_context.h" -#include "sp_surface.h" -/** - * Copy a rectangular region from one surface to another. - * Surfaces must have same bpp. - * - * Note that it's always the case that Y=0=top of the raster. - * If do_flip is non-zero, the region being copied will be flipped vertically. - * - * Assumes all values are within bounds -- no checking at this level - - * do it higher up if required. - */ -static void -sp_surface_copy(struct pipe_context *pipe, - boolean do_flip, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - const void *src_map = pipe->screen->surface_map( pipe->screen, - src, - PIPE_BUFFER_USAGE_CPU_READ ); - - assert(dst->block.size == src->block.size); - assert(dst->block.width == src->block.width); - assert(dst->block.height == src->block.height); - assert(src_map); - assert(dst_map); - - /* If do_flip, invert src_y position and pass negative src stride */ - pipe_copy_rect(dst_map, - &dst->block, - dst->stride, - dstx, dsty, - width, height, - src_map, - do_flip ? -(int) src->stride : src->stride, - srcx, srcy); - - pipe->screen->surface_unmap(pipe->screen, src); - pipe->screen->surface_unmap(pipe->screen, dst); -} - - -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size; -} - - -#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) - - -/** - * Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void -sp_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - unsigned i, j; - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - assert(dst->stride > 0); - - - switch (dst->block.size) { - case 1: - case 2: - case 4: - pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); - break; - case 8: - { - /* expand the 4-byte clear value to an 8-byte value */ - ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); - ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); - ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); - ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); - ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); - val0 = (val0 << 8) | val0; - val1 = (val1 << 8) | val1; - val2 = (val2 << 8) | val2; - val3 = (val3 << 8) | val3; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - row[j*4+0] = val0; - row[j*4+1] = val1; - row[j*4+2] = val2; - row[j*4+3] = val3; - } - row += dst->stride/2; - } - } - break; - default: - assert(0); - break; - } - - pipe->screen->surface_unmap(pipe->screen, dst); -} - - void sp_init_surface_functions(struct softpipe_context *sp) { - sp->pipe.surface_copy = sp_surface_copy; - sp->pipe.surface_fill = sp_surface_fill; + sp->pipe.surface_copy = util_surface_copy; + sp->pipe.surface_fill = util_surface_fill; } -- cgit v1.2.3 From 93ad2d5a5e812e3a80f8157b4371df2f17510621 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 4 Sep 2008 10:37:30 -0600 Subject: cell: include p_inlines.h --- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 41c85ecd6c8..9736f445cbf 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "cell_context.h" #include "cell_draw_arrays.h" -- cgit v1.2.3 From 27ae1bcabfd441c13f52f96a72f54ea70452781c Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 4 Sep 2008 10:38:00 -0600 Subject: cell: use util_surface_copy/fill() --- src/gallium/drivers/cell/ppu/cell_surface.c | 99 +---------------------------- 1 file changed, 2 insertions(+), 97 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 995ae341d70..732c64082ef 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -25,108 +25,13 @@ * **************************************************************************/ -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" -#include "util/u_memory.h" #include "util/u_rect.h" -#include "util/u_tile.h" - #include "cell_context.h" -#include "cell_surface.h" - - -static void -cell_surface_copy(struct pipe_context *pipe, - boolean do_flip, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - assert( dst->format == src->format ); - - pipe_copy_rect(pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE), - &dst->block, - dst->stride, - dstx, dsty, - width, height, - pipe_surface_map(src, PIPE_BUFFER_USAGE_CPU_READ), - do_flip ? -src->stride : src->stride, - srcx, do_flip ? height - 1 - srcy : srcy); - - pipe_surface_unmap(src); - pipe_surface_unmap(dst); -} - - -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size; -} - - -#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) - - -/** - * Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void -cell_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - unsigned i, j; - void *dst_map = pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE); - - assert(dst->stride > 0); - - switch (dst->block.size) { - case 1: - case 2: - case 4: - pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); - break; - case 8: - { - /* expand the 4-byte clear value to an 8-byte value */ - ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); - ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); - ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); - ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); - ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); - val0 = (val0 << 8) | val0; - val1 = (val1 << 8) | val1; - val2 = (val2 << 8) | val2; - val3 = (val3 << 8) | val3; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - row[j*4+0] = val0; - row[j*4+1] = val1; - row[j*4+2] = val2; - row[j*4+3] = val3; - } - row += dst->stride/2; - } - } - break; - default: - assert(0); - break; - } - - pipe_surface_unmap( dst ); -} void cell_init_surface_functions(struct cell_context *cell) { - cell->pipe.surface_copy = cell_surface_copy; - cell->pipe.surface_fill = cell_surface_fill; + cell->pipe.surface_copy = util_surface_copy; + cell->pipe.surface_fill = util_surface_fill; } -- cgit v1.2.3 From dc248fc2881f4443bdc20a3b53b2ad24fee430ec Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 4 Sep 2008 12:36:20 -0600 Subject: cell: assorted comments, clean-ups, etc. --- src/gallium/drivers/cell/ppu/cell_batch.c | 17 ++++++++++++- src/gallium/drivers/cell/ppu/cell_clear.c | 6 +++++ src/gallium/drivers/cell/ppu/cell_context.h | 27 +++++++++++++++++---- src/gallium/drivers/cell/ppu/cell_flush.c | 9 ++++++- src/gallium/drivers/cell/ppu/cell_spu.c | 18 +++++++++++--- src/gallium/drivers/cell/ppu/cell_state_derived.c | 29 +++++++---------------- src/gallium/drivers/cell/ppu/cell_state_emit.c | 18 +++++++------- src/gallium/drivers/cell/ppu/cell_state_shader.c | 24 ++++++++++++++++++- src/gallium/drivers/cell/ppu/cell_texture.c | 1 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 5 ++++ 10 files changed, 115 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index 5d007d4c8ce..a32a8c5633a 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -32,6 +32,13 @@ +/** + * Search the buffer pool for an empty/free buffer and return its index. + * Buffers are used for storing vertex data, state and commands which + * will be sent to the SPUs. + * If no empty buffers are available, wait for one. + * \return buffer index in [0, CELL_NUM_BUFFERS-1] + */ uint cell_get_empty_buffer(struct cell_context *cell) { @@ -74,6 +81,11 @@ cell_get_empty_buffer(struct cell_context *cell) } +/** + * Flush the current batch buffer to the SPUs. + * An empty buffer will be found and set as the new current batch buffer + * for subsequent commands/data. + */ void cell_batch_flush(struct cell_context *cell) { @@ -120,6 +132,9 @@ cell_batch_flush(struct cell_context *cell) } +/** + * Return the number of bytes free in the current batch buffer. + */ uint cell_batch_free_space(const struct cell_context *cell) { @@ -129,7 +144,7 @@ cell_batch_free_space(const struct cell_context *cell) /** - * Append data to current batch. + * Append data to the current batch buffer. * \param data address of block of bytes to append * \param bytes size of block of bytes */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index a421c95c8e8..207c96b9f6d 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -44,6 +44,9 @@ #include "cell_state.h" +/** + * Called via pipe->clear() + */ void cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) @@ -61,13 +64,16 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, PIPE_BUFFER_USAGE_GPU_WRITE); if (ps == cell->framebuffer.zsbuf) { + /* clear z/stencil buffer */ surfIndex = 1; } else { + /* clear color buffer */ surfIndex = 0; } + /* Build a CLEAR command and place it in the current batch buffer */ { struct cell_command_clear_surface *clr = (struct cell_command_clear_surface *) diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index f1d1ca89a97..9ca153d52f2 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -39,8 +39,13 @@ #include "rtasm/rtasm_ppc_spe.h" #include "tgsi/tgsi_scan.h" + struct cell_vbuf_render; + +/** + * Cell vertex shader state, subclass of pipe_shader_state. + */ struct cell_vertex_shader_state { struct pipe_shader_state shader; @@ -49,6 +54,9 @@ struct cell_vertex_shader_state }; +/** + * Cell fragment shader state, subclass of pipe_shader_state. + */ struct cell_fragment_shader_state { struct pipe_shader_state shader; @@ -57,7 +65,11 @@ struct cell_fragment_shader_state }; -struct cell_blend_state { +/** + * Cell blend state atom, subclass of pipe_blend_state. + */ +struct cell_blend_state +{ struct pipe_blend_state base; /** @@ -67,17 +79,24 @@ struct cell_blend_state { }; -struct cell_depth_stencil_alpha_state { - struct pipe_depth_stencil_alpha_state base; +/** + * Cell depth/stencil/alpha state atom, subclass of + * pipe_depth_stencil_alpha_state. + */ +struct cell_depth_stencil_alpha_state +{ + struct pipe_depth_stencil_alpha_state base; /** * Generated code to perform alpha, stencil, and depth testing on the SPE */ struct spe_function code; - }; +/** + * Per-context state, subclass of pipe_context. + */ struct cell_context { struct pipe_context pipe; diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index ff0d61764bc..6596b720101 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -34,6 +34,9 @@ #include "draw/draw_context.h" +/** + * Called via pipe->flush() + */ void cell_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) @@ -54,7 +57,11 @@ cell_flush(struct pipe_context *pipe, unsigned flags, } -/** internal flush */ +/** + * Cell internal flush function. Send the current batch buffer to all SPUs. + * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. + * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero + */ void cell_flush_int(struct cell_context *cell, unsigned flags) { diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 973c0b1aa12..5e75f409a3b 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -26,6 +26,11 @@ **************************************************************************/ +/** + * Utility/wrappers for communicating with the SPUs. + */ + + #include <pthread.h> #include "cell_spu.h" @@ -74,7 +79,11 @@ wait_mbox_message(spe_context_ptr_t ctx) } -static void *cell_thread_function(void *arg) +/** + * Called by pthread_create() to spawn an SPU thread. + */ +static void * +cell_thread_function(void *arg) { struct cell_init_info *init = (struct cell_init_info *) arg; unsigned entry = SPE_DEFAULT_ENTRY; @@ -92,7 +101,10 @@ static void *cell_thread_function(void *arg) /** - * Create the SPU threads + * Create the SPU threads. This is done once during driver initialization. + * This involves setting the the "init" message which is sent to each SPU. + * The init message specifies an SPU id, total number of SPUs, location + * and number of batch buffers, etc. */ void cell_start_spus(struct cell_context *cell) @@ -100,7 +112,6 @@ cell_start_spus(struct cell_context *cell) static boolean one_time_init = FALSE; uint i, j; - if (one_time_init) { fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " "on Cell.\n"); @@ -145,6 +156,7 @@ cell_start_spus(struct cell_context *cell) /** * Tell all the SPUs to stop/exit. + * This is done when the driver's exiting / cleaning up. */ void cell_spu_exit(struct cell_context *cell) diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 8ab938a02aa..efc4f78364b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -35,21 +35,6 @@ #include "cell_state_emit.h" -static int -find_vs_output(const struct cell_vertex_shader_state *vs, - uint semantic_name, - uint semantic_index) -{ - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == semantic_name && - vs->info.output_semantic_index[i] == semantic_index) - return i; - } - return -1; -} - - /** * Determine how to map vertex program outputs to fragment program inputs. * Basically, this will be used when computing the triangle interpolation @@ -58,7 +43,6 @@ find_vs_output(const struct cell_vertex_shader_state *vs, static void calculate_vertex_layout( struct cell_context *cell ) { - const struct cell_vertex_shader_state *vs = cell->vs; const struct cell_fragment_shader_state *fs = cell->fs; const enum interp_mode colorInterp = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; @@ -82,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell ) vinfo->num_attribs = 0; /* we always want to emit vertex pos */ - src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); @@ -98,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell ) break; case TGSI_SEMANTIC_COLOR: - src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, - fs->info.input_semantic_index[i]); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); #if 1 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ src = 0; @@ -116,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); @@ -163,6 +147,9 @@ compute_cliprect(struct cell_context *sp) +/** + * Update derived state, send current state to SPUs prior to rendering. + */ void cell_update_derived( struct cell_context *cell ) { if (cell->dirty & (CELL_NEW_RASTERIZER | diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9d88c1cf3d2..f2feaa329ab 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -47,7 +47,10 @@ emit_state_cmd(struct cell_context *cell, uint cmd, } - +/** + * For state marked as 'dirty', construct a state-update command block + * and insert it into the current batch buffer. + */ void cell_emit_state(struct cell_context *cell) { @@ -90,7 +93,8 @@ cell_emit_state(struct cell_context *cell) blend.size = (char *) cell->blend->code.csr - (char *) cell->blend->code.store; blend.read_fb = TRUE; - } else { + } + else { blend.base = 0; blend.size = 0; blend.read_fb = FALSE; @@ -101,7 +105,6 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { struct cell_command_depth_stencil_alpha_test dsat; - if (cell->depth_stencil != NULL) { dsat.base = (intptr_t) cell->depth_stencil->code.store; @@ -109,15 +112,15 @@ cell_emit_state(struct cell_context *cell) - (char *) cell->depth_stencil->code.store; dsat.read_depth = TRUE; dsat.read_stencil = FALSE; - } else { + } + else { dsat.base = 0; dsat.size = 0; dsat.read_depth = FALSE; dsat.read_stencil = FALSE; } - emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, - sizeof(dsat)); + emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, sizeof(dsat)); } if (cell->dirty & CELL_NEW_SAMPLER) { @@ -170,7 +173,6 @@ cell_emit_state(struct cell_context *cell) info.immediates = (uintptr_t) draw->vs.machine.Imms; info.num_immediates = draw->vs.machine.ImmLimit / 4; - emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, - & info, sizeof(info)); + emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); } } diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 3d1b887da94..97e44eeb1a4 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -53,7 +53,10 @@ cell_vertex_shader_state(void *shader) } - +/** + * Create fragment shader state. + * Called via pipe->create_fs_state() + */ static void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) @@ -77,6 +80,9 @@ cell_create_fs_state(struct pipe_context *pipe, } +/** + * Called via pipe->bind_fs_state() + */ static void cell_bind_fs_state(struct pipe_context *pipe, void *fs) { @@ -88,6 +94,9 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) } +/** + * Called via pipe->delete_fs_state() + */ static void cell_delete_fs_state(struct pipe_context *pipe, void *fs) { @@ -98,6 +107,10 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs) } +/** + * Create vertex shader state. + * Called via pipe->create_vs_state() + */ static void * cell_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) @@ -128,6 +141,9 @@ cell_create_vs_state(struct pipe_context *pipe, } +/** + * Called via pipe->bind_vs_state() + */ static void cell_bind_vs_state(struct pipe_context *pipe, void *vs) { @@ -142,6 +158,9 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) } +/** + * Called via pipe->delete_vs_state() + */ static void cell_delete_vs_state(struct pipe_context *pipe, void *vs) { @@ -154,6 +173,9 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) } +/** + * Called via pipe->set_constant_buffer() + */ static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 34d002c3dc3..b6590dfb86e 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -372,6 +372,7 @@ cell_init_texture_functions(struct cell_context *cell) /*cell->pipe.texture_update = cell_texture_update;*/ } + void cell_init_screen_texture_funcs(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index 1bf0a24bccc..aa63435b934 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -26,6 +26,11 @@ **************************************************************************/ /** + * Vertex buffer code. The draw module transforms vertices to window + * coords, etc. and emits the vertices into buffer supplied by this module. + * When a vertex buffer is full, or we flush, we'll send the vertex data + * to the SPUs. + * * Authors * Brian Paul */ -- cgit v1.2.3 From 5cf2e226548f08c4b79a4eb289fd636a00079fb3 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 4 Sep 2008 18:36:22 -0600 Subject: cell: implement CELL_DEBUG env/options var Options so far: "checker" module tile clear color by SPU ID to see where the tiles are "sync" to do synchronous DMA (only partially implemented) --- src/gallium/drivers/cell/common.h | 4 ++ src/gallium/drivers/cell/ppu/cell_context.c | 13 ++++++ src/gallium/drivers/cell/ppu/cell_context.h | 2 + src/gallium/drivers/cell/ppu/cell_spu.c | 1 + src/gallium/drivers/cell/spu/spu_main.c | 71 +++++++++++++++++++---------- 5 files changed, 66 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 6bace0bb11a..c0ca201e1d1 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -106,6 +106,9 @@ #define CELL_BUFFER_STATUS_USED 20 +#define CELL_DEBUG_CHECKER (1 << 0) +#define CELL_DEBUG_SYNC (1 << 1) + /** */ @@ -263,6 +266,7 @@ struct cell_init_info { unsigned id; unsigned num_spus; + unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ struct cell_command *cmd; /** Buffers for command batches, vertex/index data */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 9ff4e86943b..c8828e644c9 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -85,6 +85,15 @@ cell_draw_create(struct cell_context *cell) } +#ifdef DEBUG +static const struct debug_named_value cell_debug_flags[] = { + {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ + {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ + {NULL, 0} +}; +#endif + + struct pipe_context * cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws) @@ -136,6 +145,10 @@ cell_create_context(struct pipe_screen *screen, draw_wide_point_threshold(cell->draw, 0.0); draw_wide_line_threshold(cell->draw, 0.0); + cell->debug_flags = debug_get_flags_option("CELL_DEBUG", + cell_debug_flags, + 0 ); + /* * SPU stuff */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 9ca153d52f2..8cec9f45b2e 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -163,6 +163,8 @@ struct cell_context struct spe_function attrib_fetch; unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; + + unsigned debug_flags; }; diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 5e75f409a3b..2df90fdcb77 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -131,6 +131,7 @@ cell_start_spus(struct cell_context *cell) for (i = 0; i < cell->num_spus; i++) { cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; + cell_global.inits[i].debug_flags = cell->debug_flags; cell_global.inits[i].cmd = &cell_global.command[i]; for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index e04ffeb9b16..0d4cdfae85a 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -136,54 +136,75 @@ really_clear_tiles(uint surfaceIndex) static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - if (Debug) printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, clear->surface, clear->value); -#define CLEAR_OPT 1 -#if CLEAR_OPT - /* set all tile's status to CLEAR */ if (clear->surface == 0) { - memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); spu.fb.color_clear_value = clear->value; + if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { + uint x = (spu.init.id << 4) | (spu.init.id << 12) | + (spu.init.id << 20) | (spu.init.id << 28); + spu.fb.color_clear_value ^= x; + } } else { - memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); spu.fb.depth_clear_value = clear->value; } - return; -#endif +#define CLEAR_OPT 1 +#if CLEAR_OPT + + /* Simply set all tiles' status to CLEAR. + * When we actually begin rendering into a tile, we'll initialize it to + * the clear value. If any tiles go untouched during the frame, + * really_clear_tiles() will set them to the clear value. + */ if (clear->surface == 0) { - spu.fb.color_clear_value = clear->value; - clear_c_tile(&spu.ctile); + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); } else { - spu.fb.depth_clear_value = clear->value; - clear_z_tile(&spu.ztile); + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); } +#else + + /* + * This path clears the whole framebuffer to the clear color right now. + */ + /* printf("SPU: %s num=%d w=%d h=%d\n", __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); */ - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (clear->surface == 0) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - else - put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); - /* XXX we don't want this here, but it fixes bad tile results */ + /* init a single tile to the clear value */ + if (clear->surface == 0) { + clear_c_tile(&spu.ctile); + } + else { + clear_z_tile(&spu.ztile); } -#if 0 - wait_on_mask(1 << TAG_SURFACE_CLEAR); -#endif + /* walk over my tiles, writing the 'clear' tile's data */ + { + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + } + } + + if (spu.init.debug_flags & CELL_DEBUG_SYNC) { + wait_on_mask(1 << TAG_SURFACE_CLEAR); + } + +#endif /* CLEAR_OPT */ if (Debug) printf("SPU %u: CLEAR SURF done\n", spu.init.id); -- cgit v1.2.3 From 2ebeab0422e6f492e9f40eebf2be92067de9d6f1 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 4 Sep 2008 19:00:56 -0600 Subject: cell: more cell_init_*_functions() --- src/gallium/drivers/cell/ppu/cell_context.c | 11 ++------ src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 33 ++++++++++++++++-------- src/gallium/drivers/cell/ppu/cell_draw_arrays.h | 20 +------------- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 5 ++-- src/gallium/drivers/cell/ppu/cell_state.h | 16 +++++------- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 12 +++++++-- 6 files changed, 45 insertions(+), 52 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index c8828e644c9..e840b7fa88f 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -113,15 +113,6 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.screen = screen; cell->pipe.destroy = cell_destroy_context; - /* state setters */ - cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; - cell->pipe.set_vertex_elements = cell_set_vertex_elements; - - cell->pipe.draw_arrays = cell_draw_arrays; - cell->pipe.draw_elements = cell_draw_elements; - cell->pipe.draw_range_elements = cell_draw_range_elements; - cell->pipe.set_edgeflags = cell_set_edgeflags; - cell->pipe.clear = cell_clear_surface; cell->pipe.flush = cell_flush; @@ -131,10 +122,12 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.wait_query = cell_wait_query; #endif + cell_init_draw_functions(cell); cell_init_state_functions(cell); cell_init_shader_functions(cell); cell_init_surface_functions(cell); cell_init_texture_functions(cell); + cell_init_vertex_functions(cell); cell->draw = cell_draw_create(cell); diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 9736f445cbf..880d5353207 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -77,14 +77,6 @@ cell_unmap_constant_buffers(struct cell_context *sp) } -boolean -cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - return cell_draw_elements(pipe, NULL, 0, mode, start, count); -} - - /** * Draw vertex arrays, with optional indexing. @@ -93,7 +85,7 @@ cell_draw_arrays(struct pipe_context *pipe, unsigned mode, * * XXX should the element buffer be specified/bound with a separate function? */ -boolean +static boolean cell_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -158,7 +150,7 @@ cell_draw_range_elements(struct pipe_context *pipe, } -boolean +static boolean cell_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -171,10 +163,29 @@ cell_draw_elements(struct pipe_context *pipe, } +static boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + -void +static void cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) { struct cell_context *cell = cell_context(pipe); draw_set_edgeflags(cell->draw, edgeflags); } + + + +void +cell_init_draw_functions(struct cell_context *cell) +{ + cell->pipe.draw_arrays = cell_draw_arrays; + cell->pipe.draw_elements = cell_draw_elements; + cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.set_edgeflags = cell_set_edgeflags; +} + diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h index cd35ec17b4e..148873aa675 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -29,26 +29,8 @@ #define CELL_DRAW_ARRAYS_H -extern boolean -cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -extern boolean -cell_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); - -extern boolean -cell_draw_range_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count); - extern void -cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); +cell_init_draw_functions(struct cell_context *cell); #endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index a11ffac3932..e04cf5f274a 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -131,8 +131,9 @@ cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) } -static void cell_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) +static void +cell_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) { struct cell_context *cell = cell_context(pipe); diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index 82580ea35ab..a7771a55a31 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -48,19 +48,17 @@ #define CELL_NEW_VERTEX_INFO 0x8000 -void cell_set_vertex_elements(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); +extern void +cell_update_derived( struct cell_context *softpipe ); -void cell_set_vertex_buffers(struct pipe_context *, - unsigned count, - const struct pipe_vertex_buffer *); -void cell_update_derived( struct cell_context *softpipe ); +extern void +cell_init_shader_functions(struct cell_context *cell); -void -cell_init_shader_functions(struct cell_context *cell); +extern void +cell_init_vertex_functions(struct cell_context *cell); + #endif /* CELL_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 114684c2a33..fbe55c84721 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" -void +static void cell_set_vertex_elements(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *elements) @@ -53,7 +53,7 @@ cell_set_vertex_elements(struct pipe_context *pipe, } -void +static void cell_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *buffers) @@ -69,3 +69,11 @@ cell_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(cell->draw, count, buffers); } + + +void +cell_init_vertex_functions(struct cell_context *cell) +{ + cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_vertex_elements = cell_set_vertex_elements; +} -- cgit v1.2.3 From c9cd0f46282fb2b42b238aa0c267ad0585783040 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 4 Sep 2008 19:09:50 -0600 Subject: cell: comments --- src/gallium/drivers/cell/ppu/cell_spu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 2df90fdcb77..9508227e298 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -45,6 +45,9 @@ helpful headers: */ +/** + * Cell/SPU info that's not per-context. + */ struct cell_global_info cell_global; @@ -149,8 +152,10 @@ cell_start_spus(struct cell_context *cell) exit(1); } - pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function, - &cell_global.inits[i]); + pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ + NULL, /* pthread attribs */ + &cell_thread_function, /* start routine */ + &cell_global.inits[i]); /* thread argument */ } } -- cgit v1.2.3 From 2b53512073c0ecad66cfe1415cd19079ef2d4634 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 4 Sep 2008 19:10:05 -0600 Subject: cell: move batch buffer init code --- src/gallium/drivers/cell/ppu/cell_batch.c | 25 +++++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_batch.h | 3 +++ src/gallium/drivers/cell/ppu/cell_context.c | 20 ++++---------------- 3 files changed, 32 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index a32a8c5633a..16882c01295 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -240,3 +240,28 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, return pos; } + + +/** + * One-time init of batch buffers. + */ +void +cell_init_batch_buffers(struct cell_context *cell) +{ + uint spu, buf; + + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; + + /* init batch buffer status values, + * mark 0th buffer as used, rest as free. + */ + for (spu = 0; spu < cell->num_spus; spu++) { + if (buf == 0) + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + else + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h index a6eee0a8b18..f74dd600791 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.h +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -54,5 +54,8 @@ extern void * cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, uint alignment); +extern void +cell_init_batch_buffers(struct cell_context *cell); + #endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index e840b7fa88f..71f1a3049d1 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -43,11 +43,11 @@ #include "draw/draw_private.h" #include "cell/common.h" +#include "cell_batch.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" #include "cell_flush.h" -#include "cell_render.h" #include "cell_state.h" #include "cell_surface.h" #include "cell_spu.h" @@ -99,7 +99,6 @@ cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws) { struct cell_context *cell; - uint spu, buf; /* some fields need to be 16-byte aligned, so align the whole object */ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); @@ -132,12 +131,14 @@ cell_create_context(struct pipe_screen *screen, cell->draw = cell_draw_create(cell); cell_init_vbuf(cell); + draw_set_rasterize_stage(cell->draw, cell->vbuf); /* convert all points/lines to tris for the time being */ draw_wide_point_threshold(cell->draw, 0.0); draw_wide_line_threshold(cell->draw, 0.0); + /* get env vars or read config file to get debug flags */ cell->debug_flags = debug_get_flags_option("CELL_DEBUG", cell_debug_flags, 0 ); @@ -152,20 +153,7 @@ cell_create_context(struct pipe_screen *screen, cell_start_spus(cell); - /* init command, vertex/index buffer info */ - for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { - cell->buffer_size[buf] = 0; - - /* init batch buffer status values, - * mark 0th buffer as used, rest as free. - */ - for (spu = 0; spu < cell->num_spus; spu++) { - if (buf == 0) - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - else - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; - } - } + cell_init_batch_buffers(cell); return &cell->pipe; } -- cgit v1.2.3 From a1886d539158fcc692d0d45985465be8e4bbe077 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 5 Sep 2008 10:11:22 -0600 Subject: softpipe: convert clear color to surface format if needed --- src/gallium/drivers/softpipe/sp_clear.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index 12367068917..dfa46c9fb70 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" +#include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_surface.h" @@ -38,9 +39,29 @@ #include "sp_tile_cache.h" +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + /** * Clear the given surface to the specified value. * No masking, no scissor (clear entire buffer). + * Note: when clearing a color buffer, the clearValue is always + * encoded as PIPE_FORMAT_A8R8G8B8_UNORM. */ void softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, @@ -66,7 +87,15 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { - sp_tile_cache_clear(softpipe->cbuf_cache[i], clearValue); + unsigned cv; + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + cv = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } + else { + cv = clearValue; + } + sp_tile_cache_clear(softpipe->cbuf_cache[i], cv); softpipe->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_CLEAR; } } -- cgit v1.2.3 From a267934b23aca1c39d228c23392862f8068c2968 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 5 Sep 2008 10:16:27 -0600 Subject: cell: convert clear color if needed --- src/gallium/drivers/cell/ppu/cell_clear.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 207c96b9f6d..c9c0c721bbe 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -35,6 +35,7 @@ #include <stdint.h> #include "pipe/p_inlines.h" #include "util/u_memory.h" +#include "util/u_pack_color.h" #include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" @@ -44,6 +45,24 @@ #include "cell_state.h" +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + /** * Called via pipe->clear() */ @@ -70,6 +89,11 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, else { /* clear color buffer */ surfIndex = 0; + + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } } -- cgit v1.2.3 From 8af4794afcfa04351d4131d826daeb1312634f82 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 5 Sep 2008 10:18:00 -0600 Subject: cell: code clean-up, comments --- src/gallium/drivers/cell/spu/spu_main.c | 67 +++++++++++++++++++-------------- src/gallium/drivers/cell/spu/spu_main.h | 8 ++-- 2 files changed, 43 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 0d4cdfae85a..d223f32d941 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -55,6 +55,10 @@ struct spu_global spu; struct spu_vs_context draw; + +/** + * Buffers containing dynamically generated SPU code: + */ static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] ALIGN16_ATTRIB; @@ -332,6 +336,21 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat } +static void +cmd_state_logicop(const struct cell_command_logicop * code) +{ + mfc_get(logicop_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + spu.logicop = (logicop_func) logicop_code_buffer; +} + + static void cmd_state_sampler(const struct cell_command_sampler *sampler) { @@ -401,6 +420,21 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info) } +static void +cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) +{ + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; +} + + static void cmd_finish(void) { @@ -539,38 +573,15 @@ cmd_batch(uint opcode) (struct cell_shader_info *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); break; - case CELL_CMD_STATE_ATTRIB_FETCH: { - struct cell_attribute_fetch_code *code = - (struct cell_attribute_fetch_code *) &buffer[pos+1]; - - mfc_get(attribute_fetch_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - draw.vertex_fetch.code = attribute_fetch_code_buffer; + case CELL_CMD_STATE_ATTRIB_FETCH: + cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) + &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); break; - } - case CELL_CMD_STATE_LOGICOP: { - struct cell_command_logicop *code = - (struct cell_command_logicop *) &buffer[pos+1]; - - mfc_get(logicop_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - spu.logicop = (logicop_func) logicop_code_buffer; + case CELL_CMD_STATE_LOGICOP: + cmd_state_logicop((struct cell_command_logicop *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8); break; - } case CELL_CMD_FLUSH_BUFFER_RANGE: { struct cell_buffer_range *br = (struct cell_buffer_range *) &buffer[pos+1]; diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index e962e1426c6..4879f8c9c8d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -124,13 +124,13 @@ struct spu_global struct spu_framebuffer fb; boolean read_depth; boolean read_stencil; - frag_test_func frag_test; + frag_test_func frag_test; /**< Current depth/stencil test code */ - boolean read_fb; - blend_func blend; + boolean read_fb; /**< Does current blend mode require framebuffer read? */ + blend_func blend; /**< Current blend code */ qword const_blend_color[4] ALIGN16_ATTRIB; - logicop_func logicop; + logicop_func logicop; /**< Current logicop code **/ struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct spu_texture texture[PIPE_MAX_SAMPLERS]; -- cgit v1.2.3 From 2444c0c81acae9e2162a20002f8f72335133ead0 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 8 Sep 2008 11:09:48 +0900 Subject: trace: Use util's stream. --- src/gallium/drivers/trace/SConscript | 2 - src/gallium/drivers/trace/tr_dump.c | 12 +- src/gallium/drivers/trace/tr_stream.h | 59 ---------- src/gallium/drivers/trace/tr_stream_stdc.c | 104 ---------------- src/gallium/drivers/trace/tr_stream_wd.c | 183 ----------------------------- 5 files changed, 6 insertions(+), 354 deletions(-) delete mode 100644 src/gallium/drivers/trace/tr_stream.h delete mode 100644 src/gallium/drivers/trace/tr_stream_stdc.c delete mode 100644 src/gallium/drivers/trace/tr_stream_wd.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript index 5c49468c4eb..0a6bfb8f4c7 100644 --- a/src/gallium/drivers/trace/SConscript +++ b/src/gallium/drivers/trace/SConscript @@ -9,8 +9,6 @@ trace = env.ConvenienceLibrary( 'tr_dump.c', 'tr_screen.c', 'tr_state.c', - 'tr_stream_stdc.c', - 'tr_stream_wd.c', 'tr_texture.c', 'tr_winsys.c', ]) diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 48032c1617f..0a42aacaecc 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -48,12 +48,12 @@ #include "pipe/p_debug.h" #include "util/u_memory.h" #include "util/u_string.h" +#include "util/u_stream.h" -#include "tr_stream.h" #include "tr_dump.h" -static struct trace_stream *stream = NULL; +static struct util_stream *stream = NULL; static unsigned refcount = 0; @@ -61,7 +61,7 @@ static INLINE void trace_dump_write(const char *buf, size_t size) { if(stream) - trace_stream_write(stream, buf, size); + util_stream_write(stream, buf, size); } @@ -212,7 +212,7 @@ trace_dump_trace_close(void) { if(stream) { trace_dump_writes("</trace>\n"); - trace_stream_close(stream); + util_stream_close(stream); stream = NULL; refcount = 0; } @@ -228,7 +228,7 @@ boolean trace_dump_trace_begin() if(!stream) { - stream = trace_stream_create(filename); + stream = util_stream_create(filename); if(!stream) return FALSE; @@ -272,7 +272,7 @@ void trace_dump_call_end(void) trace_dump_indent(1); trace_dump_tag_end("call"); trace_dump_newline(); - trace_stream_flush(stream); + util_stream_flush(stream); } void trace_dump_arg_begin(const char *name) diff --git a/src/gallium/drivers/trace/tr_stream.h b/src/gallium/drivers/trace/tr_stream.h deleted file mode 100644 index 6111174d6a8..00000000000 --- a/src/gallium/drivers/trace/tr_stream.h +++ /dev/null @@ -1,59 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Cross-platform sequential access stream abstraction. - * - * These are really general purpose file access functions, and might one day - * be moved into the util module. - */ - -#ifndef TR_STREAM_H -#define TR_STREAM_H - - -#include "pipe/p_compiler.h" - - -struct trace_stream; - - -struct trace_stream * -trace_stream_create(const char *filename); - -boolean -trace_stream_write(struct trace_stream *stream, const void *data, size_t size); - -void -trace_stream_flush(struct trace_stream *stream); - -void -trace_stream_close(struct trace_stream *stream); - - -#endif /* TR_STREAM_H */ diff --git a/src/gallium/drivers/trace/tr_stream_stdc.c b/src/gallium/drivers/trace/tr_stream_stdc.c deleted file mode 100644 index 4c19ec0b243..00000000000 --- a/src/gallium/drivers/trace/tr_stream_stdc.c +++ /dev/null @@ -1,104 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Stream implementation based on the Standard C Library. - */ - -#include "pipe/p_config.h" - -#if defined(PIPE_OS_LINUX) - -#include <stdio.h> - -#include "util/u_memory.h" - -#include "tr_stream.h" - - -struct trace_stream -{ - FILE *file; -}; - - -struct trace_stream * -trace_stream_create(const char *filename) -{ - struct trace_stream *stream; - - stream = CALLOC_STRUCT(trace_stream); - if(!stream) - goto error1; - - stream->file = fopen(filename, "w"); - if(!stream->file) - goto error2; - - return stream; - -error2: - FREE(stream); -error1: - return NULL; -} - - -boolean -trace_stream_write(struct trace_stream *stream, const void *data, size_t size) -{ - if(!stream) - return FALSE; - - return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; -} - - -void -trace_stream_flush(struct trace_stream *stream) -{ - if(!stream) - return; - - fflush(stream->file); -} - - -void -trace_stream_close(struct trace_stream *stream) -{ - if(!stream) - return; - - fclose(stream->file); - - FREE(stream); -} - - -#endif diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c deleted file mode 100644 index 704eb15bd71..00000000000 --- a/src/gallium/drivers/trace/tr_stream_wd.c +++ /dev/null @@ -1,183 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Stream implementation for the Windows Display driver. - */ - -#include "pipe/p_config.h" - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -#include <windows.h> -#include <winddi.h> - -#include "util/u_memory.h" -#include "util/u_string.h" - -#include "tr_stream.h" - - -#define MAP_FILE_SIZE (4*1024*1024) - - -struct trace_stream -{ - char filename[MAX_PATH + 1]; - WCHAR wFileName[MAX_PATH + 1]; - ULONG_PTR iFile; - char *pMap; - size_t written; - unsigned suffix; -}; - - -static INLINE boolean -trace_stream_map(struct trace_stream *stream) -{ - ULONG BytesInUnicodeString; - static char filename[MAX_PATH + 1]; - unsigned filename_len; - - filename_len = util_snprintf(filename, - sizeof(filename), - "\\??\\%s.%04x", - stream->filename, - stream->suffix++); - - EngMultiByteToUnicodeN( - stream->wFileName, - sizeof(stream->wFileName), - &BytesInUnicodeString, - filename, - filename_len); - - stream->pMap = EngMapFile(stream->wFileName, MAP_FILE_SIZE, &stream->iFile); - if(!stream->pMap) - return FALSE; - - memset(stream->pMap, 0, MAP_FILE_SIZE); - stream->written = 0; - - return TRUE; -} - - -static INLINE void -trace_stream_unmap(struct trace_stream *stream) -{ - EngUnmapFile(stream->iFile); - if(stream->written < MAP_FILE_SIZE) { - /* Truncate file size */ - stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); - if(stream->pMap) - EngUnmapFile(stream->iFile); - } - - stream->pMap = NULL; -} - - -struct trace_stream * -trace_stream_create(const char *filename) -{ - struct trace_stream *stream; - - stream = CALLOC_STRUCT(trace_stream); - if(!stream) - goto error1; - - strncpy(stream->filename, filename, sizeof(stream->filename)); - - if(!trace_stream_map(stream)) - goto error2; - - return stream; - -error2: - FREE(stream); -error1: - return NULL; -} - - -static INLINE void -trace_stream_copy(struct trace_stream *stream, const char *data, size_t size) -{ - assert(stream->written + size <= MAP_FILE_SIZE); - memcpy(stream->pMap + stream->written, data, size); - stream->written += size; -} - - -boolean -trace_stream_write(struct trace_stream *stream, const void *data, size_t size) -{ - if(!stream) - return FALSE; - - if(!stream->pMap) - return FALSE; - - while(stream->written + size > MAP_FILE_SIZE) { - size_t step = MAP_FILE_SIZE - stream->written; - trace_stream_copy(stream, data, step); - data = (const char *)data + step; - size -= step; - - trace_stream_unmap(stream); - if(!trace_stream_map(stream)) - return FALSE; - } - - trace_stream_copy(stream, data, size); - - return TRUE; -} - - -void -trace_stream_flush(struct trace_stream *stream) -{ - (void)stream; -} - - -void -trace_stream_close(struct trace_stream *stream) -{ - if(!stream) - return; - - trace_stream_unmap(stream); - - FREE(stream); -} - - -#endif -- cgit v1.2.3 From d25611ede005adddfd9c004b037d9202d94df69e Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Mon, 8 Sep 2008 22:57:01 +0900 Subject: trace: Request a growable file. --- src/gallium/drivers/trace/tr_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 0a42aacaecc..a0ead0ded33 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -228,7 +228,7 @@ boolean trace_dump_trace_begin() if(!stream) { - stream = util_stream_create(filename); + stream = util_stream_create(filename, 0); if(!stream) return FALSE; -- cgit v1.2.3 From ebe6160d7c9ccbddd8b1cc4b0e25b3d61c54293d Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@ubuntu-vbox.(none)> Date: Fri, 5 Sep 2008 23:21:08 +0200 Subject: softpipe: First attempts at multithreaded softpipe. Configured for 2 cores. --- src/gallium/drivers/softpipe/sp_context.c | 49 ++++---- src/gallium/drivers/softpipe/sp_context.h | 10 +- src/gallium/drivers/softpipe/sp_quad.c | 102 ++++++++------- src/gallium/drivers/softpipe/sp_setup.c | 200 +++++++++++++++++++++++++----- 4 files changed, 254 insertions(+), 107 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 6f12390cf79..cd1e6663d86 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -92,17 +92,19 @@ static void softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.colormask->destroy( softpipe->quad.colormask ); - softpipe->quad.output->destroy( softpipe->quad.output ); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple ); + softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz ); + softpipe->quad[i].shade->destroy( softpipe->quad[i].shade ); + softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test ); + softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test ); + softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test ); + softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion ); + softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage ); + softpipe->quad[i].blend->destroy( softpipe->quad[i].blend ); + softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask ); + softpipe->quad[i].output->destroy( softpipe->quad[i].output ); + } for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) sp_destroy_tile_cache(softpipe->cbuf_cache[i]); @@ -205,17 +207,19 @@ softpipe_create( struct pipe_screen *screen, /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); - softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad.output = sp_quad_output_stage(softpipe); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad[i].shade = sp_quad_shade_stage(softpipe); + softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad[i].blend = sp_quad_blend_stage(softpipe); + softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad[i].output = sp_quad_output_stage(softpipe); + } /* * Create drawing context and plug our rendering stage into it. @@ -257,3 +261,4 @@ softpipe_create( struct pipe_screen *screen, softpipe_destroy(&softpipe->pipe); return NULL; } + diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 078886f93c9..e58f7c8b6fd 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -45,6 +45,10 @@ */ #define USE_DRAW_STAGE_PSTIPPLE 1 +/* Number of threads working on individual quads. + * Setting to 1 disables this feature. + */ +#define SP_NUM_QUAD_THREADS 2 struct softpipe_winsys; struct softpipe_vbuf_render; @@ -133,7 +137,7 @@ struct softpipe_context { struct quad_stage *output; struct quad_stage *first; /**< points to one of the above stages */ - } quad; + } quad[SP_NUM_QUAD_THREADS]; /** The primitive drawing context */ struct draw_context *draw; @@ -151,13 +155,11 @@ struct softpipe_context { }; - - static INLINE struct softpipe_context * softpipe_context( struct pipe_context *pipe ) { return (struct softpipe_context *)pipe; } - #endif /* SP_CONTEXT_H */ + diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index bc83d78ea16..892ef87ee9f 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -33,29 +33,33 @@ static void sp_push_quad_first( struct softpipe_context *sp, - struct quad_stage *quad ) + struct quad_stage *quad, + uint i ) { - quad->next = sp->quad.first; - sp->quad.first = quad; + quad->next = sp->quad[i].first; + sp->quad[i].first = quad; } static void sp_build_depth_stencil( - struct softpipe_context *sp ) + struct softpipe_context *sp, + uint i ) { if (sp->depth_stencil->stencil[0].enabled || sp->depth_stencil->stencil[1].enabled) { - sp_push_quad_first( sp, sp->quad.stencil_test ); + sp_push_quad_first( sp, sp->quad[i].stencil_test, i ); } else if (sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf) { - sp_push_quad_first( sp, sp->quad.depth_test ); + sp_push_quad_first( sp, sp->quad[i].depth_test, i ); } } void sp_build_quad_pipeline(struct softpipe_context *sp) { + uint i; + boolean early_depth_test = sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && @@ -64,49 +68,51 @@ sp_build_quad_pipeline(struct softpipe_context *sp) !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ - - sp->quad.first = sp->quad.output; - - if (sp->blend->colormask != 0xf) { - sp_push_quad_first( sp, sp->quad.colormask ); - } - - if (sp->blend->blend_enable || - sp->blend->logicop_enable) { - sp_push_quad_first( sp, sp->quad.blend ); - } - - if (sp->depth_stencil->depth.occlusion_count) { - sp_push_quad_first( sp, sp->quad.occlusion ); - } - - if (sp->rasterizer->poly_smooth || - sp->rasterizer->line_smooth || - sp->rasterizer->point_smooth) { - sp_push_quad_first( sp, sp->quad.coverage ); - } - - if (!early_depth_test) { - sp_build_depth_stencil( sp ); - } - - if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad.alpha_test ); - } - - /* XXX always enable shader? */ - if (1) { - sp_push_quad_first( sp, sp->quad.shade ); - } - - if (early_depth_test) { - sp_build_depth_stencil( sp ); - sp_push_quad_first( sp, sp->quad.earlyz ); - } + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first = sp->quad[i].output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad[i].colormask, i ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad[i].blend, i ); + } + + if (sp->depth_stencil->depth.occlusion_count) { + sp_push_quad_first( sp, sp->quad[i].occlusion, i ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad[i].coverage, i ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp, i ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad[i].alpha_test, i ); + } + + /* XXX always enable shader? */ + if (1) { + sp_push_quad_first( sp, sp->quad[i].shade, i ); + } + + if (early_depth_test) { + sp_build_depth_stencil( sp, i ); + sp_push_quad_first( sp, sp->quad[i].earlyz, i ); + } #if !USE_DRAW_STAGE_PSTIPPLE - if (sp->rasterizer->poly_stipple_enable) { - sp_push_quad_first( sp, sp->quad.polygon_stipple ); - } + if (sp->rasterizer->poly_stipple_enable) { + sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i ); + } #endif + } } + diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 87336ab6e31..706a412baf2 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -43,6 +43,7 @@ #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -61,6 +62,51 @@ struct edge { int lines; /**< number of lines on this edge */ }; +#if SP_NUM_QUAD_THREADS > 1 + +struct thread_info +{ + struct setup_context *setup; + uint id; + pipe_thread handle; +}; + +struct quad_job; + +typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job ); + +struct quad_job +{ + int x, y; + unsigned mask; + struct quad_header quad; + quad_job_routine routine; +}; + +#define NUM_QUAD_JOBS 64 + +struct quad_job_que +{ + struct quad_job jobs[NUM_QUAD_JOBS]; + uint first; + uint last; + pipe_mutex mutex; +}; + +static void +add_quad_job( struct quad_job_que *que, int x, int y, unsigned mask, struct quad_header *quad, quad_job_routine routine ) +{ + while ((que->last + 1) % NUM_QUAD_JOBS == que->first) + usleep( 10 ); + que->jobs[que->last].x = x; + que->jobs[que->last].y = y; + que->jobs[que->last].mask = mask; + que->jobs[que->last].quad = *quad; + que->jobs[que->last].routine = routine; + que->last = (que->last + 1) % NUM_QUAD_JOBS; +} + +#endif /** * Triangle setup info (derived from draw_stage). @@ -88,6 +134,12 @@ struct setup_context { struct tgsi_interp_coef posCoef; /* For Z, W */ struct quad_header quad; +#if SP_NUM_QUAD_THREADS > 1 + struct quad_job_que que; + struct thread_info threads[SP_NUM_QUAD_THREADS]; + +#endif + struct { int left[2]; /**< [0] = row0, [1] = row1 */ int right[2]; @@ -104,7 +156,36 @@ struct setup_context { unsigned winding; /* which winding to cull */ }; +#if SP_NUM_QUAD_THREADS > 1 +static PIPE_THREAD_ROUTINE( quad_thread, param ) +{ + struct thread_info *info = (struct thread_info *) param; + + for (;;) { + struct quad_job *job; + + while (info->setup->que.last == info->setup->que.first) + usleep( 10 ); + pipe_mutex_lock( info->setup->que.mutex ); + job = &info->setup->que.jobs[info->setup->que.first]; + info->setup->que.first = (info->setup->que.first + 1) % NUM_QUAD_JOBS; + job->routine( info->setup, info->id, job ); + pipe_mutex_unlock( info->setup->que.mutex ); + } +} + +#define WAIT_FOR_COMPLETION(setup) \ + do {\ + while (setup->que.last != setup->que.first)\ + usleep( 10 );\ + } while (0) + +#else + +#define WAIT_FOR_COMPLETION(setup) ((void) 0) + +#endif /** * Test if x is NaN or +/- infinity. @@ -143,7 +224,7 @@ static boolean cull_tri( struct setup_context *setup, * Clip setup->quad against the scissor/surface bounds. */ static INLINE void -quad_clip(struct setup_context *setup) +quad_clip( struct setup_context *setup, struct quad_header *quad ) { const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; const int minx = (int) cliprect->minx; @@ -151,22 +232,22 @@ quad_clip(struct setup_context *setup) const int miny = (int) cliprect->miny; const int maxy = (int) cliprect->maxy; - if (setup->quad.x0 >= maxx || - setup->quad.y0 >= maxy || - setup->quad.x0 + 1 < minx || - setup->quad.y0 + 1 < miny) { + if (quad->x0 >= maxx || + quad->y0 >= maxy || + quad->x0 + 1 < minx || + quad->y0 + 1 < miny) { /* totally clipped */ - setup->quad.mask = 0x0; + quad->mask = 0x0; return; } - if (setup->quad.x0 < minx) - setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup->quad.y0 < miny) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup->quad.x0 == maxx - 1) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup->quad.y0 == maxy - 1) - setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + if (quad->x0 < minx) + quad->mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (quad->y0 < miny) + quad->mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (quad->x0 == maxx - 1) + quad->mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (quad->y0 == maxy - 1) + quad->mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); } @@ -174,35 +255,52 @@ quad_clip(struct setup_context *setup) * Emit a quad (pass to next stage) with clipping. */ static INLINE void -clip_emit_quad(struct setup_context *setup) +clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) { - quad_clip(setup); - if (setup->quad.mask) { + quad_clip( setup, quad ); + if (quad->mask) { struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run(sp->quad.first, &setup->quad); + + sp->quad[thread].first->run( sp->quad[thread].first, quad ); } } +#if SP_NUM_QUAD_THREADS > 1 + +static void +clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + clip_emit_quad( setup, &job->quad, thread ); +} + +#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, 0, 0, 0, &setup->quad, clip_emit_quad_job ) + +#else + +#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 ) + +#endif /** * Emit a quad (pass to next stage). No clipping is done. */ static INLINE void -emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) +emit_quad( struct setup_context *setup, int x, int y, unsigned mask, struct quad_header *quad, uint thread ) { struct softpipe_context *sp = setup->softpipe; - setup->quad.x0 = x; - setup->quad.y0 = y; - setup->quad.mask = mask; + + quad->x0 = x; + quad->y0 = y; + quad->mask = mask; #if DEBUG_FRAGS if (mask & 1) setup->numFragsEmitted++; if (mask & 2) setup->numFragsEmitted++; if (mask & 4) setup->numFragsEmitted++; if (mask & 8) setup->numFragsEmitted++; #endif - sp->quad.first->run(sp->quad.first, &setup->quad); + sp->quad[thread].first->run( sp->quad[thread].first, quad ); #if DEBUG_FRAGS - mask = setup->quad.mask; + mask = quad->mask; if (mask & 1) setup->numFragsWritten++; if (mask & 2) setup->numFragsWritten++; if (mask & 4) setup->numFragsWritten++; @@ -210,6 +308,21 @@ emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) #endif } +#if SP_NUM_QUAD_THREADS > 1 + +static void +emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + emit_quad( setup, job->x, job->y, job->mask, &job->quad, thread ); +} + +#define EMIT_QUAD(setup,x,y,mask) add_quad_job( &setup->que, x, y, mask, &setup->quad, emit_quad_job ) + +#else + +#define EMIT_QUAD(setup,x,y,mask) emit_quad( setup, x, y, mask, &setup->quad, 0 ) + +#endif /** * Given an X or Y coordinate, return the block/quad coordinate that it @@ -249,7 +362,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_TOP_RIGHT; if (x+1 >= xleft1 && x+1 < xright1) mask |= MASK_BOTTOM_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -263,7 +376,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_TOP_LEFT; if (x+1 >= xleft0 && x+1 < xright0) mask |= MASK_TOP_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -277,7 +390,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_BOTTOM_LEFT; if (x+1 >= xleft1 && x+1 < xright1) mask |= MASK_BOTTOM_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -790,6 +903,8 @@ void setup_tri( struct setup_context *setup, flush_spans( setup ); + WAIT_FOR_COMPLETION(setup); + #if DEBUG_FRAGS printf("Tri: %u frags emitted, %u written\n", setup->numFragsEmitted, @@ -931,7 +1046,7 @@ plot(struct setup_context *setup, int x, int y) /* flush prev quad, start new quad */ if (setup->quad.x0 != -1) - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); setup->quad.x0 = quadX; setup->quad.y0 = quadY; @@ -1053,8 +1168,10 @@ setup_line(struct setup_context *setup, /* draw final quad */ if (setup->quad.mask) { - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); } + + WAIT_FOR_COMPLETION(setup); } @@ -1163,7 +1280,7 @@ setup_point( struct setup_context *setup, setup->quad.x0 = (int) x - ix; setup->quad.y0 = (int) y - iy; setup->quad.mask = (1 << ix) << (2 * iy); - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); } else { if (round) { @@ -1224,7 +1341,7 @@ setup_point( struct setup_context *setup, if (setup->quad.mask) { setup->quad.x0 = ix; setup->quad.y0 = iy; - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); } } } @@ -1271,11 +1388,13 @@ setup_point( struct setup_context *setup, setup->quad.mask = mask; setup->quad.x0 = ix; setup->quad.y0 = iy; - clip_emit_quad(setup); + CLIP_EMIT_QUAD(setup); } } } } + + WAIT_FOR_COMPLETION(setup); } void setup_prepare( struct setup_context *setup ) @@ -1300,7 +1419,9 @@ void setup_prepare( struct setup_context *setup ) /* Note: nr_attrs is only used for debugging (vertex printing) */ setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); - sp->quad.first->begin(sp->quad.first); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first->begin( sp->quad[i].first ); + } if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && @@ -1328,11 +1449,24 @@ void setup_destroy_context( struct setup_context *setup ) struct setup_context *setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); + uint i; setup->softpipe = softpipe; setup->quad.coef = setup->coef; setup->quad.posCoef = &setup->posCoef; +#if SP_NUM_QUAD_THREADS > 1 + setup->que.first = 0; + setup->que.last = 0; + pipe_mutex_init( setup->que.mutex ); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + setup->threads[i].setup = setup; + setup->threads[i].id = i; + setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] ); + } +#endif + return setup; } + -- cgit v1.2.3 From 84cde72b3e5fa6e39c0df30fdb7985c0745816ef Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@ubuntu-vbox.(none)> Date: Sat, 6 Sep 2008 15:19:02 +0200 Subject: softpipe: Improve multithreaded softpipe. Use condition vars to communicate between threads instead of stalling. --- src/gallium/drivers/softpipe/sp_setup.c | 103 +++++++++++++++++++++++++++----- 1 file changed, 87 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 706a412baf2..72298dfbb0c 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -64,6 +64,11 @@ struct edge { #if SP_NUM_QUAD_THREADS > 1 +/* Set to 1 if you want other threads to be instantly + * notified of pending jobs. + */ +#define INSTANT_NOTEMPTY_NOTIFY 0 + struct thread_info { struct setup_context *setup; @@ -90,20 +95,51 @@ struct quad_job_que struct quad_job jobs[NUM_QUAD_JOBS]; uint first; uint last; - pipe_mutex mutex; + pipe_mutex que_mutex; + pipe_condvar que_notfull_condvar; + pipe_condvar que_notempty_condvar; + uint jobs_added; + uint jobs_done; + pipe_condvar que_done_condvar; }; static void add_quad_job( struct quad_job_que *que, int x, int y, unsigned mask, struct quad_header *quad, quad_job_routine routine ) { - while ((que->last + 1) % NUM_QUAD_JOBS == que->first) - usleep( 10 ); +#if INSTANT_NOTEMPTY_NOTIFY + boolean empty; +#endif + + /* Wait for empty slot, see if the que is empty. + */ + pipe_mutex_lock( que->que_mutex ); + while ((que->last + 1) % NUM_QUAD_JOBS == que->first) { +#if !INSTANT_NOTEMPTY_NOTIFY + pipe_condvar_broadcast( que->que_notempty_condvar ); +#endif + pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex ); + } +#if INSTANT_NOTEMPTY_NOTIFY + empty = que->last == que->first; +#endif + pipe_mutex_unlock( que->que_mutex ); + + /* Submit new job. + */ que->jobs[que->last].x = x; que->jobs[que->last].y = y; que->jobs[que->last].mask = mask; que->jobs[que->last].quad = *quad; que->jobs[que->last].routine = routine; que->last = (que->last + 1) % NUM_QUAD_JOBS; + que->jobs_added++; + +#if INSTANT_NOTEMPTY_NOTIFY + /* If the que was empty, notify consumers there's a job to be done. + */ + if (empty) + pipe_condvar_broadcast( que->que_notempty_condvar ); +#endif } #endif @@ -137,7 +173,6 @@ struct setup_context { #if SP_NUM_QUAD_THREADS > 1 struct quad_job_que que; struct thread_info threads[SP_NUM_QUAD_THREADS]; - #endif struct { @@ -161,24 +196,55 @@ struct setup_context { static PIPE_THREAD_ROUTINE( quad_thread, param ) { struct thread_info *info = (struct thread_info *) param; + struct quad_job_que *que = &info->setup->que; for (;;) { - struct quad_job *job; - - while (info->setup->que.last == info->setup->que.first) - usleep( 10 ); - pipe_mutex_lock( info->setup->que.mutex ); - job = &info->setup->que.jobs[info->setup->que.first]; - info->setup->que.first = (info->setup->que.first + 1) % NUM_QUAD_JOBS; - job->routine( info->setup, info->id, job ); - pipe_mutex_unlock( info->setup->que.mutex ); + struct quad_job job; + boolean full; + + /* Wait for an available job. + */ + pipe_mutex_lock( que->que_mutex ); + while (que->last == que->first) + pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex ); + + /* See if the que is full. + */ + full = (que->last + 1) % NUM_QUAD_JOBS == que->first; + + /* Take a job and remove it from que. + */ + job = que->jobs[que->first]; + que->first = (que->first + 1) % NUM_QUAD_JOBS; + pipe_mutex_unlock( que->que_mutex ); + + /* Notify the producer if the que is not full. + */ + if (full) + pipe_condvar_signal( que->que_notfull_condvar ); + + job.routine( info->setup, info->id, &job ); + + /* Notify the producer if that's the last finished job. + */ + pipe_mutex_lock( que->que_mutex ); + que->jobs_done++; + if (que->jobs_added == que->jobs_done) + pipe_condvar_signal( que->que_done_condvar ); + pipe_mutex_unlock( que->que_mutex ); } + + return NULL; } #define WAIT_FOR_COMPLETION(setup) \ do {\ - while (setup->que.last != setup->que.first)\ - usleep( 10 );\ + if (!INSTANT_NOTEMPTY_NOTIFY)\ + pipe_condvar_broadcast( setup->que.que_notempty_condvar );\ + pipe_mutex_lock( setup->que.que_mutex );\ + while (setup->que.jobs_added != setup->que.jobs_done)\ + pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\ + pipe_mutex_unlock( setup->que.que_mutex );\ } while (0) #else @@ -1459,7 +1525,12 @@ struct setup_context *setup_create_context( struct softpipe_context *softpipe ) #if SP_NUM_QUAD_THREADS > 1 setup->que.first = 0; setup->que.last = 0; - pipe_mutex_init( setup->que.mutex ); + pipe_mutex_init( setup->que.que_mutex ); + pipe_condvar_init( setup->que.que_notfull_condvar ); + pipe_condvar_init( setup->que.que_notempty_condvar ); + setup->que.jobs_added = 0; + setup->que.jobs_done = 0; + pipe_condvar_init( setup->que.que_done_condvar ); for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { setup->threads[i].setup = setup; setup->threads[i].id = i; -- cgit v1.2.3 From 01f9e5120395f88bba8321e8639cac0bb9c85296 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@ubuntu-vbox.(none)> Date: Sat, 6 Sep 2008 16:02:24 +0200 Subject: softpipe: Split changing fields of quad_header into input, inout and output parts. --- src/gallium/drivers/softpipe/sp_fs_exec.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/drivers/softpipe/sp_headers.h | 29 ++++-- src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 7 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 16 +-- src/gallium/drivers/softpipe/sp_quad_colormask.c | 8 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 14 +-- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 52 +++++----- src/gallium/drivers/softpipe/sp_quad_earlyz.c | 12 +-- src/gallium/drivers/softpipe/sp_quad_fs.c | 22 ++--- src/gallium/drivers/softpipe/sp_quad_occlusion.c | 2 +- src/gallium/drivers/softpipe/sp_quad_output.c | 10 +- src/gallium/drivers/softpipe/sp_quad_stencil.c | 44 ++++----- src/gallium/drivers/softpipe/sp_quad_stipple.c | 22 ++--- src/gallium/drivers/softpipe/sp_setup.c | 114 +++++++++++----------- 15 files changed, 183 insertions(+), 173 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index d0456731bea..701ee4c72f2 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -106,7 +106,7 @@ exec_run( const struct sp_fragment_shader *base, /* Compute X, Y, Z, W vals for this quad */ sp_setup_pos_vector(quad->posCoef, - (float)quad->x0, (float)quad->y0, + (float)quad->input.x0, (float)quad->input.y0, &machine->QuadPos); return tgsi_exec_machine_run( machine ); diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 35653a8e48c..496ed43df26 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -88,7 +88,7 @@ fs_sse_run( const struct sp_fragment_shader *base, /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */ sp_setup_pos_vector(quad->posCoef, - (float)quad->x0, (float)quad->y0, + (float)quad->input.x0, (float)quad->input.y0, machine->Temps); /* init kill mask */ diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index ae2ee210fc9..4a42cb3c192 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -59,20 +59,31 @@ * Encodes everything we need to know about a 2x2 pixel block. Uses * "Channel-Serial" or "SoA" layout. */ -struct quad_header { +struct quad_header_input +{ int x0; int y0; - unsigned mask:4; + float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ unsigned facing:1; /**< Front (0) or back (1) facing? */ unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ +}; + +struct quad_header_inout +{ + unsigned mask:4; +}; - struct { - /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; - float depth[QUAD_SIZE]; - } outputs; +struct quad_header_output +{ + /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ + float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + float depth[QUAD_SIZE]; +}; - float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ +struct quad_header { + struct quad_header_input input; + struct quad_header_inout inout; + struct quad_header_output output; const struct tgsi_interp_coef *coef; const struct tgsi_interp_coef *posCoef; @@ -80,5 +91,5 @@ struct quad_header { unsigned nr_attrs; }; - #endif /* SP_HEADERS_H */ + diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 7d3580fb4f2..5bebd141e92 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -17,11 +17,10 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) const float ref = softpipe->depth_stencil->alpha.ref; unsigned passMask = 0x0, j; const uint cbuf = 0; /* only output[0].alpha is tested */ - const float *aaaa = quad->outputs.color[cbuf][3]; + const float *aaaa = quad->output.color[cbuf][3]; switch (softpipe->depth_stencil->alpha.func) { case PIPE_FUNC_NEVER: - quad->mask = 0x0; break; case PIPE_FUNC_LESS: /* @@ -76,9 +75,9 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } - quad->mask &= passMask; + quad->inout.mask &= passMask; - if (quad->mask) + if (quad->inout.mask) qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index a834accb863..6f64c6e584c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -114,14 +114,14 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_cached_tile * tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color[cbuf]; + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } @@ -244,14 +244,14 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color[cbuf]; + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index f72f31db973..f32bdfab784 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -56,14 +56,14 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color[cbuf]; + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index ad907ec25fe..ee29aa7dfea 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -47,19 +47,19 @@ coverage_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) || - (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) || - (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) { + if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) || + (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) || + (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) { uint cbuf; /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { - float (*quadColor)[4] = quad->outputs.color[cbuf]; + float (*quadColor)[4] = quad->output.color[cbuf]; unsigned j; for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->coverage[j] >= 0.0); - assert(quad->coverage[j] <= 1.0); - quadColor[3][j] *= quad->coverage[j]; + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; } } } diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 227cb2014e1..523bd3e0801 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -60,7 +60,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) unsigned zmask = 0; unsigned j; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0); + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); assert(ps); /* shouldn't get here if there's no zbuffer */ @@ -79,12 +79,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) float scale = 65535.0; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); bzzzz[j] = tile->data.depth16[y][x]; } } @@ -94,12 +94,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) double scale = (double) (uint) ~0UL; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); bzzzz[j] = tile->data.depth32[y][x]; } } @@ -111,12 +111,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; } } @@ -128,12 +128,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); bzzzz[j] = tile->data.depth32[y][x] >> 8; } } @@ -192,14 +192,14 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } - quad->mask &= zmask; + quad->inout.mask &= zmask; if (softpipe->depth_stencil->depth.writemask) { /* This is also efficient with sse / spe instructions: */ for (j = 0; j < QUAD_SIZE; j++) { - if (quad->mask & (1 << j)) { + if (quad->inout.mask & (1 << j)) { bzzzz[j] = qzzzz[j]; } } @@ -208,8 +208,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) switch (format) { case PIPE_FORMAT_Z16_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth16[y][x] = (ushort) bzzzz[j]; } break; @@ -218,15 +218,15 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) /* (yes, this falls through to a different case than above) */ case PIPE_FORMAT_Z32_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = bzzzz[j]; } break; case PIPE_FORMAT_S8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); uint s8z24 = tile->data.depth32[y][x]; s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; tile->data.depth32[y][x] = s8z24; @@ -234,8 +234,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) break; case PIPE_FORMAT_Z24S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); uint z24s8 = tile->data.depth32[y][x]; z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); tile->data.depth32[y][x] = z24s8; @@ -243,8 +243,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) break; case PIPE_FORMAT_Z24X8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = bzzzz[j] << 8; } break; @@ -260,7 +260,7 @@ depth_test_quad(struct quad_stage *qs, struct quad_header *quad) { sp_depth_test_quad(qs, quad); - if (quad->mask) + if (quad->inout.mask) qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c index 5a66a866993..6e2dde304ea 100644 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -45,16 +45,16 @@ earlyz_quad( struct quad_stage *qs, struct quad_header *quad ) { - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; const float dzdx = quad->posCoef->dadx[2]; const float dzdy = quad->posCoef->dady[2]; const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - quad->outputs.depth[0] = z0; - quad->outputs.depth[1] = z0 + dzdx; - quad->outputs.depth[2] = z0 + dzdy; - quad->outputs.depth[3] = z0 + dzdx + dzdy; + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; qs->next->run( qs->next, quad ); } diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 5499ba5361f..1f0cb3e0355 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -85,7 +85,7 @@ shade_quad( machine->InterpCoefs = quad->coef; /* run shader */ - quad->mask &= softpipe->fs->run( softpipe->fs, + quad->inout.mask &= softpipe->fs->run( softpipe->fs, &qss->machine, quad ); @@ -101,16 +101,16 @@ shade_quad( case TGSI_SEMANTIC_COLOR: { uint cbuf = sem_index[i]; - memcpy(quad->outputs.color[cbuf], + memcpy(quad->output.color[cbuf], &machine->Outputs[i].xyzw[0].f[0], - sizeof(quad->outputs.color[0]) ); + sizeof(quad->output.color[0]) ); } break; case TGSI_SEMANTIC_POSITION: { uint j; for (j = 0; j < 4; j++) { - quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; } z_written = TRUE; } @@ -122,20 +122,20 @@ shade_quad( if (!z_written) { /* compute Z values now, as in the quad earlyz stage */ /* XXX we should really only do this if the earlyz stage is not used */ - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; const float dzdx = quad->posCoef->dadx[2]; const float dzdy = quad->posCoef->dady[2]; const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - quad->outputs.depth[0] = z0; - quad->outputs.depth[1] = z0 + dzdx; - quad->outputs.depth[2] = z0 + dzdy; - quad->outputs.depth[3] = z0 + dzdx + dzdy; + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; } /* shader may cull fragments */ - if( quad->mask ) { + if( quad->inout.mask ) { qs->next->run( qs->next, quad ); } } diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c index db13e73ae35..169bd82876d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -54,7 +54,7 @@ occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - softpipe->occlusion_count += count_bits(quad->mask); + softpipe->occlusion_count += count_bits(quad->inout.mask); qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index b64646a449f..d05e12d1d95 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -41,8 +41,8 @@ static void output_quad(struct quad_stage *qs, struct quad_header *quad) { /* in-tile pos: */ - const int itx = quad->x0 % TILE_SIZE; - const int ity = quad->y0 % TILE_SIZE; + const int itx = quad->input.x0 % TILE_SIZE; + const int ity = quad->input.y0 % TILE_SIZE; struct softpipe_context *softpipe = qs->softpipe; uint cbuf; @@ -52,13 +52,13 @@ output_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color[cbuf]; + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; int i, j; /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - if (quad->mask & (1 << j)) { + if (quad->inout.mask & (1 << j)) { int x = itx + (j & 1); int y = ity + (j >> 1); for (i = 0; i < 4; i++) { /* loop over color chans */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index ce9562e07c6..abb54877487 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -206,9 +206,9 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) ubyte ref, wrtMask, valMask; ubyte stencilVals[QUAD_SIZE]; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0); + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); uint j; - uint face = quad->facing; + uint face = quad->input.facing; if (!softpipe->depth_stencil->stencil[1].enabled) { /* single-sided stencil test, use front (face=0) state */ @@ -231,22 +231,22 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) switch (ps->format) { case PIPE_FORMAT_S8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); stencilVals[j] = tile->data.depth32[y][x] >> 24; } break; case PIPE_FORMAT_Z24S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); stencilVals[j] = tile->data.depth32[y][x] & 0xff; } break; case PIPE_FORMAT_S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); stencilVals[j] = tile->data.stencil8[y][x]; } break; @@ -258,35 +258,35 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) { unsigned passMask, failMask; passMask = do_stencil_test(stencilVals, func, ref, valMask); - failMask = quad->mask & ~passMask; - quad->mask &= passMask; + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; if (failOp != PIPE_STENCIL_OP_KEEP) { apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); } } - if (quad->mask) { + if (quad->inout.mask) { /* now the pixels that passed the stencil test are depth tested */ if (softpipe->depth_stencil->depth.enabled) { - const unsigned origMask = quad->mask; + const unsigned origMask = quad->inout.mask; sp_depth_test_quad(qs, quad); /* quad->mask is updated */ /* update stencil buffer values according to z pass/fail result */ if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->mask; + const unsigned failMask = origMask & ~quad->inout.mask; apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); } if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->mask; + const unsigned passMask = origMask & quad->inout.mask; apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); } } else { /* no depth test, apply Zpass operator to stencil buffer values */ - apply_stencil_op(stencilVals, quad->mask, zPassOp, ref, wrtMask); + apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); } } @@ -295,8 +295,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) switch (ps->format) { case PIPE_FORMAT_S8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); uint s8z24 = tile->data.depth32[y][x]; s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); tile->data.depth32[y][x] = s8z24; @@ -304,8 +304,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) break; case PIPE_FORMAT_Z24S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); uint z24s8 = tile->data.depth32[y][x]; z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; tile->data.depth32[y][x] = z24s8; @@ -313,8 +313,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) break; case PIPE_FORMAT_S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.stencil8[y][x] = stencilVals[j]; } break; @@ -322,7 +322,7 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } - if (quad->mask) + if (quad->inout.mask) qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index a39ecc2e9d4..ccf37f6be59 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -19,17 +19,17 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) static const uint bit31 = 1 << 31; static const uint bit30 = 1 << 30; - if (quad->prim == PRIM_TRI) { + if (quad->input.prim == PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; /* need to invert Y to index into OpenGL's stipple pattern */ int y0, y1; uint stipple0, stipple1; if (softpipe->rasterizer->origin_lower_left) { - y0 = softpipe->framebuffer.height - 1 - quad->y0; + y0 = softpipe->framebuffer.height - 1 - quad->input.y0; y1 = y0 - 1; } else { - y0 = quad->y0; + y0 = quad->input.y0; y1 = y0 + 1; } stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; @@ -37,18 +37,18 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) #if 1 { - const int col0 = quad->x0 % 32; + const int col0 = quad->input.x0 % 32; if ((stipple0 & (bit31 >> col0)) == 0) - quad->mask &= ~MASK_TOP_LEFT; + quad->inout.mask &= ~MASK_TOP_LEFT; if ((stipple0 & (bit30 >> col0)) == 0) - quad->mask &= ~MASK_TOP_RIGHT; + quad->inout.mask &= ~MASK_TOP_RIGHT; if ((stipple1 & (bit31 >> col0)) == 0) - quad->mask &= ~MASK_BOTTOM_LEFT; + quad->inout.mask &= ~MASK_BOTTOM_LEFT; if ((stipple1 & (bit30 >> col0)) == 0) - quad->mask &= ~MASK_BOTTOM_RIGHT; + quad->inout.mask &= ~MASK_BOTTOM_RIGHT; } #else /* We'd like to use this code, but we'd need to redefine @@ -56,11 +56,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) * and similarly for the BOTTOM bits. But that may have undesirable * side effects elsewhere. */ - const int col0 = 30 - (quad->x0 % 32); - quad->mask &= (((stipple0 >> col0) & 0x3) | + const int col0 = 30 - (quad->input.x0 % 32); + quad->inout.mask &= (((stipple0 >> col0) & 0x3) | (((stipple1 >> col0) & 0x3) << 2)); #endif - if (!quad->mask) + if (!quad->inout.mask) return; } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 72298dfbb0c..965d62b29ef 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -298,22 +298,22 @@ quad_clip( struct setup_context *setup, struct quad_header *quad ) const int miny = (int) cliprect->miny; const int maxy = (int) cliprect->maxy; - if (quad->x0 >= maxx || - quad->y0 >= maxy || - quad->x0 + 1 < minx || - quad->y0 + 1 < miny) { + if (quad->input.x0 >= maxx || + quad->input.y0 >= maxy || + quad->input.x0 + 1 < minx || + quad->input.y0 + 1 < miny) { /* totally clipped */ - quad->mask = 0x0; + quad->inout.mask = 0x0; return; } - if (quad->x0 < minx) - quad->mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (quad->y0 < miny) - quad->mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (quad->x0 == maxx - 1) - quad->mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (quad->y0 == maxy - 1) - quad->mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + if (quad->input.x0 < minx) + quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (quad->input.y0 < miny) + quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (quad->input.x0 == maxx - 1) + quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (quad->input.y0 == maxy - 1) + quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); } @@ -324,7 +324,7 @@ static INLINE void clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) { quad_clip( setup, quad ); - if (quad->mask) { + if (quad->inout.mask) { struct softpipe_context *sp = setup->softpipe; sp->quad[thread].first->run( sp->quad[thread].first, quad ); @@ -355,9 +355,9 @@ emit_quad( struct setup_context *setup, int x, int y, unsigned mask, struct quad { struct softpipe_context *sp = setup->softpipe; - quad->x0 = x; - quad->y0 = y; - quad->mask = mask; + quad->input.x0 = x; + quad->input.y0 = y; + quad->inout.mask = mask; #if DEBUG_FRAGS if (mask & 1) setup->numFragsEmitted++; if (mask & 2) setup->numFragsEmitted++; @@ -366,7 +366,7 @@ emit_quad( struct setup_context *setup, int x, int y, unsigned mask, struct quad #endif sp->quad[thread].first->run( sp->quad[thread].first, quad ); #if DEBUG_FRAGS - mask = quad->mask; + mask = quad->inout.mask; if (mask & 1) setup->numFragsWritten++; if (mask & 2) setup->numFragsWritten++; if (mask & 4) setup->numFragsWritten++; @@ -577,7 +577,7 @@ static boolean setup_sort_vertices( struct setup_context *setup, * - the GLSL gl_FrontFacing fragment attribute (bool) * - two-sided stencil test */ - setup->quad.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); return TRUE; } @@ -774,7 +774,7 @@ static void setup_tri_coefficients( struct setup_context *setup ) if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; setup->coef[fragSlot].dadx[1] = 0.0; setup->coef[fragSlot].dady[1] = 0.0; } @@ -944,7 +944,7 @@ void setup_tri( struct setup_context *setup, setup_tri_coefficients( setup ); setup_tri_edges( setup ); - setup->quad.prim = PRIM_TRI; + setup->quad.input.prim = PRIM_TRI; setup->span.y = 0; setup->span.y_flags = 0; @@ -1085,7 +1085,7 @@ setup_line_coefficients(struct setup_context *setup, if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; setup->coef[fragSlot].dadx[1] = 0.0; setup->coef[fragSlot].dady[1] = 0.0; } @@ -1106,20 +1106,20 @@ plot(struct setup_context *setup, int x, int y) const int quadY = y - iy; const int mask = (1 << ix) << (2 * iy); - if (quadX != setup->quad.x0 || - quadY != setup->quad.y0) + if (quadX != setup->quad.input.x0 || + quadY != setup->quad.input.y0) { /* flush prev quad, start new quad */ - if (setup->quad.x0 != -1) + if (setup->quad.input.x0 != -1) CLIP_EMIT_QUAD(setup); - setup->quad.x0 = quadX; - setup->quad.y0 = quadY; - setup->quad.mask = 0x0; + setup->quad.input.x0 = quadX; + setup->quad.input.y0 = quadY; + setup->quad.inout.mask = 0x0; } - setup->quad.mask |= mask; + setup->quad.inout.mask |= mask; } @@ -1180,16 +1180,16 @@ setup_line(struct setup_context *setup, assert(dx >= 0); assert(dy >= 0); - setup->quad.x0 = setup->quad.y0 = -1; - setup->quad.mask = 0x0; - setup->quad.prim = PRIM_LINE; + setup->quad.input.x0 = setup->quad.input.y0 = -1; + setup->quad.inout.mask = 0x0; + setup->quad.input.prim = PRIM_LINE; /* XXX temporary: set coverage to 1.0 so the line appears * if AA mode happens to be enabled. */ - setup->quad.coverage[0] = - setup->quad.coverage[1] = - setup->quad.coverage[2] = - setup->quad.coverage[3] = 1.0; + setup->quad.input.coverage[0] = + setup->quad.input.coverage[1] = + setup->quad.input.coverage[2] = + setup->quad.input.coverage[3] = 1.0; if (dx > dy) { /*** X-major line ***/ @@ -1233,7 +1233,7 @@ setup_line(struct setup_context *setup, } /* draw final quad */ - if (setup->quad.mask) { + if (setup->quad.inout.mask) { CLIP_EMIT_QUAD(setup); } @@ -1331,21 +1331,21 @@ setup_point( struct setup_context *setup, if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; setup->coef[fragSlot].dadx[1] = 0.0; setup->coef[fragSlot].dady[1] = 0.0; } } - setup->quad.prim = PRIM_POINT; + setup->quad.input.prim = PRIM_POINT; if (halfSize <= 0.5 && !round) { /* special case for 1-pixel points */ const int ix = ((int) x) & 1; const int iy = ((int) y) & 1; - setup->quad.x0 = (int) x - ix; - setup->quad.y0 = (int) y - iy; - setup->quad.mask = (1 << ix) << (2 * iy); + setup->quad.input.x0 = (int) x - ix; + setup->quad.input.y0 = (int) y - iy; + setup->quad.inout.mask = (1 << ix) << (2 * iy); CLIP_EMIT_QUAD(setup); } else { @@ -1366,15 +1366,15 @@ setup_point( struct setup_context *setup, for (ix = ixmin; ix <= ixmax; ix += 2) { float dx, dy, dist2, cover; - setup->quad.mask = 0x0; + setup->quad.inout.mask = 0x0; dx = (ix + 0.5f) - x; dy = (iy + 0.5f) - y; dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_LEFT; + setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_TOP_LEFT; } dx = (ix + 1.5f) - x; @@ -1382,8 +1382,8 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_RIGHT; + setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_TOP_RIGHT; } dx = (ix + 0.5f) - x; @@ -1391,8 +1391,8 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_LEFT; + setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_BOTTOM_LEFT; } dx = (ix + 1.5f) - x; @@ -1400,13 +1400,13 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_RIGHT; + setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_BOTTOM_RIGHT; } - if (setup->quad.mask) { - setup->quad.x0 = ix; - setup->quad.y0 = iy; + if (setup->quad.inout.mask) { + setup->quad.input.x0 = ix; + setup->quad.input.y0 = iy; CLIP_EMIT_QUAD(setup); } } @@ -1451,9 +1451,9 @@ setup_point( struct setup_context *setup, mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); } - setup->quad.mask = mask; - setup->quad.x0 = ix; - setup->quad.y0 = iy; + setup->quad.inout.mask = mask; + setup->quad.input.x0 = ix; + setup->quad.input.y0 = iy; CLIP_EMIT_QUAD(setup); } } -- cgit v1.2.3 From b40732622fe670fbd13ad12b7c1848bd6c73eeb4 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@ubuntu-vbox.(none)> Date: Sat, 6 Sep 2008 16:35:23 +0200 Subject: softpipe: More improvements for multithreaded softpipe. Store only input and inout of a quad_header in job que. --- src/gallium/drivers/softpipe/sp_setup.c | 55 +++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 965d62b29ef..98b46b4552c 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -82,9 +82,8 @@ typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, str struct quad_job { - int x, y; - unsigned mask; - struct quad_header quad; + struct quad_header_input input; + struct quad_header_inout inout; quad_job_routine routine; }; @@ -104,7 +103,7 @@ struct quad_job_que }; static void -add_quad_job( struct quad_job_que *que, int x, int y, unsigned mask, struct quad_header *quad, quad_job_routine routine ) +add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine ) { #if INSTANT_NOTEMPTY_NOTIFY boolean empty; @@ -126,10 +125,8 @@ add_quad_job( struct quad_job_que *que, int x, int y, unsigned mask, struct quad /* Submit new job. */ - que->jobs[que->last].x = x; - que->jobs[que->last].y = y; - que->jobs[que->last].mask = mask; - que->jobs[que->last].quad = *quad; + que->jobs[que->last].input = quad->input; + que->jobs[que->last].inout = quad->inout; que->jobs[que->last].routine = routine; que->last = (que->last + 1) % NUM_QUAD_JOBS; que->jobs_added++; @@ -336,10 +333,17 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thre static void clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) { - clip_emit_quad( setup, &job->quad, thread ); + struct quad_header quad; + + quad.input = job->input; + quad.inout = job->inout; + quad.coef = setup->quad.coef; + quad.posCoef = setup->quad.posCoef; + quad.nr_attrs = setup->quad.nr_attrs; + clip_emit_quad( setup, &quad, thread ); } -#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, 0, 0, 0, &setup->quad, clip_emit_quad_job ) +#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job ) #else @@ -351,13 +355,13 @@ clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *j * Emit a quad (pass to next stage). No clipping is done. */ static INLINE void -emit_quad( struct setup_context *setup, int x, int y, unsigned mask, struct quad_header *quad, uint thread ) +emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) { struct softpipe_context *sp = setup->softpipe; +#if DEBUG_FRAGS + uint mask = quad->inout.mask; +#endif - quad->input.x0 = x; - quad->input.y0 = y; - quad->inout.mask = mask; #if DEBUG_FRAGS if (mask & 1) setup->numFragsEmitted++; if (mask & 2) setup->numFragsEmitted++; @@ -379,14 +383,31 @@ emit_quad( struct setup_context *setup, int x, int y, unsigned mask, struct quad static void emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) { - emit_quad( setup, job->x, job->y, job->mask, &job->quad, thread ); + struct quad_header quad; + + quad.input = job->input; + quad.inout = job->inout; + quad.coef = setup->quad.coef; + quad.posCoef = setup->quad.posCoef; + quad.nr_attrs = setup->quad.nr_attrs; + emit_quad( setup, &quad, thread ); } -#define EMIT_QUAD(setup,x,y,mask) add_quad_job( &setup->que, x, y, mask, &setup->quad, emit_quad_job ) +#define EMIT_QUAD(setup,x,y,mask) do {\ + setup->quad.input.x0 = x;\ + setup->quad.input.y0 = y;\ + setup->quad.inout.mask = mask;\ + add_quad_job( &setup->que, &setup->quad, emit_quad_job );\ + } while (0) #else -#define EMIT_QUAD(setup,x,y,mask) emit_quad( setup, x, y, mask, &setup->quad, 0 ) +#define EMIT_QUAD(setup,x,y,mask) do {\ + setup->quad.input.x0 = x;\ + setup->quad.input.y0 = y;\ + setup->quad.inout.mask = mask;\ + emit_quad( setup, &setup->quad, 0 );\ + } while (0) #endif -- cgit v1.2.3 From 68e672a86468be3dfa5b09fa71152d7134d62fb3 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@ubuntu-vbox.(none)> Date: Mon, 8 Sep 2008 17:21:13 +0200 Subject: softpipe: Set SP_NUM_QUAD_THREADS 1 effectively disabling multithreaded softpipe. We want to make it env variable, or even better, autodetect as the feature makes softpipe run slower on a single CPU. --- src/gallium/drivers/softpipe/sp_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index e58f7c8b6fd..2b9a2a8ee52 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -48,7 +48,7 @@ /* Number of threads working on individual quads. * Setting to 1 disables this feature. */ -#define SP_NUM_QUAD_THREADS 2 +#define SP_NUM_QUAD_THREADS 1 struct softpipe_winsys; struct softpipe_vbuf_render; -- cgit v1.2.3 From 56e7c5522e37508d8ca83410479d09f1eddfac15 Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@ubuntu-vbox.(none)> Date: Mon, 8 Sep 2008 23:04:17 +0200 Subject: softpipe: Protect pipe_condvar_signal/broadcast calls with a mutex. --- src/gallium/drivers/softpipe/sp_setup.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 98b46b4552c..fe88d8f1408 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -121,6 +121,7 @@ add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routi #if INSTANT_NOTEMPTY_NOTIFY empty = que->last == que->first; #endif + que->jobs_added++; pipe_mutex_unlock( que->que_mutex ); /* Submit new job. @@ -129,13 +130,15 @@ add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routi que->jobs[que->last].inout = quad->inout; que->jobs[que->last].routine = routine; que->last = (que->last + 1) % NUM_QUAD_JOBS; - que->jobs_added++; #if INSTANT_NOTEMPTY_NOTIFY /* If the que was empty, notify consumers there's a job to be done. */ - if (empty) + if (empty) { + pipe_mutex_lock( que->que_mutex ); pipe_condvar_broadcast( que->que_notempty_condvar ); + pipe_mutex_unlock( que->que_mutex ); + } #endif } @@ -213,12 +216,12 @@ static PIPE_THREAD_ROUTINE( quad_thread, param ) */ job = que->jobs[que->first]; que->first = (que->first + 1) % NUM_QUAD_JOBS; - pipe_mutex_unlock( que->que_mutex ); /* Notify the producer if the que is not full. */ if (full) pipe_condvar_signal( que->que_notfull_condvar ); + pipe_mutex_unlock( que->que_mutex ); job.routine( info->setup, info->id, &job ); @@ -236,9 +239,9 @@ static PIPE_THREAD_ROUTINE( quad_thread, param ) #define WAIT_FOR_COMPLETION(setup) \ do {\ + pipe_mutex_lock( setup->que.que_mutex );\ if (!INSTANT_NOTEMPTY_NOTIFY)\ pipe_condvar_broadcast( setup->que.que_notempty_condvar );\ - pipe_mutex_lock( setup->que.que_mutex );\ while (setup->que.jobs_added != setup->que.jobs_done)\ pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\ pipe_mutex_unlock( setup->que.que_mutex );\ -- cgit v1.2.3 From 05ecd6d4020244b1bb6eb4f06f2333006b1bbfbd Mon Sep 17 00:00:00 2001 From: Michal Krol <michal@tungstengraphics.com> Date: Tue, 9 Sep 2008 11:21:57 +0200 Subject: softpipe: Silence compiler warning on Windows. --- src/gallium/drivers/softpipe/sp_setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index fe88d8f1408..bc8263c33e3 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1539,7 +1539,9 @@ void setup_destroy_context( struct setup_context *setup ) struct setup_context *setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); +#if SP_NUM_QUAD_THREADS > 1 uint i; +#endif setup->softpipe = softpipe; -- cgit v1.2.3 From d671cf460f99693ded1eccc6b32816d430098725 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 9 Sep 2008 22:17:31 +0900 Subject: softpipe: Code (commented out) to dump BMPs. --- src/gallium/drivers/softpipe/sp_flush.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index e03994b63b7..401764bb439 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -73,6 +73,19 @@ softpipe_flush( struct pipe_context *pipe, softpipe_unmap_surfaces(softpipe); } + /* Enable to dump BMPs of the color/depth buffers each frame */ +#if 0 + if(flags & PIPE_FLUSH_FRAME) { + static unsigned frame_no = 1; + static char filename[256]; + util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); + debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]); + util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); + debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf); + ++frame_no; + } +#endif + if (fence) *fence = NULL; } -- cgit v1.2.3 From f302fca5eb63e4bca8af5b35c585451486143e6a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 11 Sep 2008 06:41:18 +1000 Subject: nouveau: gallium directory structure changed again.. --- src/gallium/drivers/nouveau/nouveau_stateobj.h | 1 - src/gallium/drivers/nv04/nv04_context.c | 1 - src/gallium/drivers/nv04/nv04_context.h | 4 + src/gallium/drivers/nv04/nv04_fragprog.c | 1 - src/gallium/drivers/nv04/nv04_miptree.c | 6 +- src/gallium/drivers/nv04/nv04_prim_vbuf.c | 5 +- src/gallium/drivers/nv04/nv04_screen.c | 1 - src/gallium/drivers/nv04/nv04_state.c | 1 - src/gallium/drivers/nv04/nv04_surface.c | 3 +- src/gallium/drivers/nv04/nv04_vbo.c | 1 - src/gallium/drivers/nv10/nv10_context.c | 1 - src/gallium/drivers/nv10/nv10_context.h | 4 + src/gallium/drivers/nv10/nv10_fragprog.c | 1 - src/gallium/drivers/nv10/nv10_miptree.c | 6 +- src/gallium/drivers/nv10/nv10_prim_vbuf.c | 6 +- src/gallium/drivers/nv10/nv10_screen.c | 1 - src/gallium/drivers/nv10/nv10_state.c | 1 - src/gallium/drivers/nv10/nv10_state_emit.c | 2 - src/gallium/drivers/nv10/nv10_surface.c | 3 +- src/gallium/drivers/nv10/nv10_vbo.c | 1 - src/gallium/drivers/nv30/nv30_context.c | 1 - src/gallium/drivers/nv30/nv30_context.h | 4 + src/gallium/drivers/nv30/nv30_draw.c | 1 - src/gallium/drivers/nv30/nv30_miptree.c | 9 +- src/gallium/drivers/nv30/nv30_screen.c | 1 - src/gallium/drivers/nv30/nv30_state.c | 1 - src/gallium/drivers/nv30/nv30_surface.c | 4 +- src/gallium/drivers/nv30/nv30_vbo.c | 1 - src/gallium/drivers/nv40/nv40_context.c | 1 - src/gallium/drivers/nv40/nv40_context.h | 4 + src/gallium/drivers/nv40/nv40_draw.c | 3 +- src/gallium/drivers/nv40/nv40_miptree.c | 11 +- src/gallium/drivers/nv40/nv40_screen.c | 1 - src/gallium/drivers/nv40/nv40_state.c | 1 - src/gallium/drivers/nv40/nv40_surface.c | 4 +- src/gallium/drivers/nv40/nv40_vbo.c | 1 - src/gallium/drivers/nv50/nv50_context.c | 1 - src/gallium/drivers/nv50/nv50_context.h | 4 + src/gallium/drivers/nv50/nv50_draw.c | 1 - src/gallium/drivers/nv50/nv50_miptree.c | 12 +- src/gallium/drivers/nv50/nv50_screen.c | 1 - src/gallium/drivers/nv50/nv50_state.c | 1 - src/gallium/drivers/nv50/nv50_surface.c | 12 +- src/gallium/drivers/nv50/nv50_vbo.c | 1 - src/gallium/winsys/dri/nouveau/Makefile | 45 -- src/gallium/winsys/dri/nouveau/nouveau_bo.c | 470 --------------------- src/gallium/winsys/dri/nouveau/nouveau_channel.c | 126 ------ src/gallium/winsys/dri/nouveau/nouveau_context.c | 346 --------------- src/gallium/winsys/dri/nouveau/nouveau_context.h | 113 ----- src/gallium/winsys/dri/nouveau/nouveau_device.c | 159 ------- src/gallium/winsys/dri/nouveau/nouveau_dma.c | 219 ---------- src/gallium/winsys/dri/nouveau/nouveau_dma.h | 143 ------- src/gallium/winsys/dri/nouveau/nouveau_dri.h | 28 -- src/gallium/winsys/dri/nouveau/nouveau_drmif.h | 310 -------------- src/gallium/winsys/dri/nouveau/nouveau_fence.c | 214 ---------- src/gallium/winsys/dri/nouveau/nouveau_grobj.c | 107 ----- src/gallium/winsys/dri/nouveau/nouveau_local.h | 117 ----- src/gallium/winsys/dri/nouveau/nouveau_lock.c | 94 ----- src/gallium/winsys/dri/nouveau/nouveau_notifier.c | 137 ------ src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c | 271 ------------ src/gallium/winsys/dri/nouveau/nouveau_resource.c | 116 ----- src/gallium/winsys/dri/nouveau/nouveau_screen.c | 310 -------------- src/gallium/winsys/dri/nouveau/nouveau_screen.h | 20 - .../winsys/dri/nouveau/nouveau_swapbuffers.c | 86 ---- .../winsys/dri/nouveau/nouveau_swapbuffers.h | 10 - src/gallium/winsys/dri/nouveau/nouveau_winsys.c | 158 ------- .../winsys/dri/nouveau/nouveau_winsys_pipe.c | 205 --------- .../winsys/dri/nouveau/nouveau_winsys_pipe.h | 34 -- .../winsys/dri/nouveau/nouveau_winsys_softpipe.c | 85 ---- src/gallium/winsys/dri/nouveau/nv04_surface.c | 314 -------------- src/gallium/winsys/dri/nouveau/nv50_surface.c | 194 --------- src/gallium/winsys/drm/nouveau/Makefile | 45 ++ src/gallium/winsys/drm/nouveau/nouveau_bo.c | 470 +++++++++++++++++++++ src/gallium/winsys/drm/nouveau/nouveau_channel.c | 126 ++++++ src/gallium/winsys/drm/nouveau/nouveau_context.c | 346 +++++++++++++++ src/gallium/winsys/drm/nouveau/nouveau_context.h | 113 +++++ src/gallium/winsys/drm/nouveau/nouveau_device.c | 159 +++++++ src/gallium/winsys/drm/nouveau/nouveau_dma.c | 219 ++++++++++ src/gallium/winsys/drm/nouveau/nouveau_dma.h | 143 +++++++ src/gallium/winsys/drm/nouveau/nouveau_dri.h | 28 ++ src/gallium/winsys/drm/nouveau/nouveau_drmif.h | 310 ++++++++++++++ src/gallium/winsys/drm/nouveau/nouveau_fence.c | 214 ++++++++++ src/gallium/winsys/drm/nouveau/nouveau_grobj.c | 107 +++++ src/gallium/winsys/drm/nouveau/nouveau_local.h | 117 +++++ src/gallium/winsys/drm/nouveau/nouveau_lock.c | 94 +++++ src/gallium/winsys/drm/nouveau/nouveau_notifier.c | 137 ++++++ src/gallium/winsys/drm/nouveau/nouveau_pushbuf.c | 271 ++++++++++++ src/gallium/winsys/drm/nouveau/nouveau_resource.c | 116 +++++ src/gallium/winsys/drm/nouveau/nouveau_screen.c | 310 ++++++++++++++ src/gallium/winsys/drm/nouveau/nouveau_screen.h | 20 + .../winsys/drm/nouveau/nouveau_swapbuffers.c | 86 ++++ .../winsys/drm/nouveau/nouveau_swapbuffers.h | 10 + src/gallium/winsys/drm/nouveau/nouveau_winsys.c | 158 +++++++ .../winsys/drm/nouveau/nouveau_winsys_pipe.c | 206 +++++++++ .../winsys/drm/nouveau/nouveau_winsys_pipe.h | 34 ++ .../winsys/drm/nouveau/nouveau_winsys_softpipe.c | 85 ++++ src/gallium/winsys/drm/nouveau/nv04_surface.c | 314 ++++++++++++++ src/gallium/winsys/drm/nouveau/nv50_surface.c | 194 +++++++++ 98 files changed, 4487 insertions(+), 4507 deletions(-) delete mode 100644 src/gallium/winsys/dri/nouveau/Makefile delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_bo.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_channel.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_context.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_context.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_device.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_dma.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_dma.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_dri.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_drmif.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_fence.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_grobj.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_local.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_lock.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_notifier.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_resource.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_screen.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_screen.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_winsys.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.h delete mode 100644 src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c delete mode 100644 src/gallium/winsys/dri/nouveau/nv04_surface.c delete mode 100644 src/gallium/winsys/dri/nouveau/nv50_surface.c create mode 100644 src/gallium/winsys/drm/nouveau/Makefile create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_bo.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_channel.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_context.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_context.h create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_device.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_dma.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_dma.h create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_dri.h create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_drmif.h create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_fence.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_grobj.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_local.h create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_lock.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_notifier.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_pushbuf.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_resource.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_screen.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_screen.h create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.h create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_winsys.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.c create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.h create mode 100644 src/gallium/winsys/drm/nouveau/nouveau_winsys_softpipe.c create mode 100644 src/gallium/winsys/drm/nouveau/nv04_surface.c create mode 100644 src/gallium/winsys/drm/nouveau/nv50_surface.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 998ec2d4ad4..729988b095e 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -1,7 +1,6 @@ #ifndef __NOUVEAU_STATEOBJ_H__ #define __NOUVEAU_STATEOBJ_H__ -#include "pipe/p_util.h" #include "pipe/p_debug.h" struct nouveau_stateobj_reloc { diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 852a8edf5ff..9f75253363f 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -1,7 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" #include "nv04_context.h" #include "nv04_screen.h" diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h index 5ba1d4ecdc1..3e6a0852702 100644 --- a/src/gallium/drivers/nv04/nv04_context.h +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -4,6 +4,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c index 215974eec00..8a2af41fe06 100644 --- a/src/gallium/drivers/nv04/nv04_fragprog.c +++ b/src/gallium/drivers/nv04/nv04_fragprog.c @@ -1,7 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 97f679731ea..02f7d210e31 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -1,6 +1,5 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "nv04_context.h" @@ -72,7 +71,6 @@ nv04_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) static void nv04_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) { - struct pipe_winsys *ws = screen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -80,7 +78,7 @@ nv04_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) struct nv04_miptree *nv04mt = (struct nv04_miptree *)mt; int l; - pipe_buffer_reference(ws, &nv04mt->buffer, NULL); + pipe_buffer_reference(screen, &nv04mt->buffer, NULL); for (l = 0; l <= mt->last_level; l++) { if (nv04mt->level[l].image_offset) FREE(nv04mt->level[l].image_offset); @@ -101,7 +99,7 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps = ws->surface_alloc(ws); if (!ps) return NULL; - pipe_buffer_reference(ws, &ps->buffer, nv04mt->buffer); + pipe_buffer_reference(pscreen, &ps->buffer, nv04mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index d3963d1f59d..19979fff795 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -1,9 +1,10 @@ -#include "draw/draw_vbuf.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" +#include "pipe/p_compiler.h" + +#include "draw/draw_vbuf.h" #include "nv04_context.h" #include "nv04_state.h" diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index da09a3a5fe8..3966a29ffa9 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -1,5 +1,4 @@ #include "pipe/p_screen.h" -#include "pipe/p_util.h" #include "nv04_context.h" #include "nv04_screen.h" diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index 668d875671f..ff1933b5508 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -1,7 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c index b13ebf9f9b5..57039483c62 100644 --- a/src/gallium/drivers/nv04/nv04_surface.c +++ b/src/gallium/drivers/nv04/nv04_surface.c @@ -28,10 +28,9 @@ #include "nv04_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" static void nv04_surface_copy(struct pipe_context *pipe, unsigned do_flip, diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index fbfe0cf406d..91f919d48ec 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -1,7 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "nv04_context.h" #include "nv04_state.h" diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 9fcd0b0fc3a..e9b61daae7f 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -1,7 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" #include "nv10_context.h" #include "nv10_screen.h" diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 2bdba53db8a..f3b56de25a7 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -4,6 +4,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c index 137de9d53ef..698db5a16a9 100644 --- a/src/gallium/drivers/nv10/nv10_fragprog.c +++ b/src/gallium/drivers/nv10/nv10_fragprog.c @@ -1,7 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 9a68df29250..ad084e72b81 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -1,6 +1,5 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "nv10_context.h" @@ -79,7 +78,6 @@ nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) static void nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) { - struct pipe_winsys *ws = screen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -87,7 +85,7 @@ nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) struct nv10_miptree *nv10mt = (struct nv10_miptree *)mt; int l; - pipe_buffer_reference(ws, &nv10mt->buffer, NULL); + pipe_buffer_reference(screen, &nv10mt->buffer, NULL); for (l = 0; l <= mt->last_level; l++) { if (nv10mt->level[l].image_offset) FREE(nv10mt->level[l].image_offset); @@ -115,7 +113,7 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, ps = ws->surface_alloc(ws); if (!ps) return NULL; - pipe_buffer_reference(ws, &ps->buffer, nv10mt->buffer); + pipe_buffer_reference(screen, &ps->buffer, nv10mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 930536b9468..62a8f6d89da 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -38,15 +38,14 @@ */ -#include "draw/draw_vbuf.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "nv10_context.h" #include "nv10_state.h" +#include "draw/draw_vbuf.h" /** * Primitive renderer for nv10. @@ -180,10 +179,11 @@ nv10_vbuf_render_release_vertices( struct vbuf_render *render, struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); struct nv10_context *nv10 = nv10_render->nv10; struct pipe_winsys *winsys = nv10->pipe.winsys; + struct pipe_screen *pscreen = &nv10->screen->pipe; assert(nv10_render->buffer); winsys->buffer_unmap(winsys, nv10_render->buffer); - pipe_buffer_reference(winsys, &nv10_render->buffer, NULL); + pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL); } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 403f7b98cde..27a9edf9bba 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -1,5 +1,4 @@ #include "pipe/p_screen.h" -#include "pipe/p_util.h" #include "nv10_context.h" #include "nv10_screen.h" diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index f902fd54b66..d2375aa2f64 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -1,7 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index d21368d33f3..46c7e1d7536 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -1,5 +1,3 @@ -#include "pipe/p_util.h" - #include "nv10_context.h" #include "nv10_state.h" diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c index 2e230ebbecc..875e4c58589 100644 --- a/src/gallium/drivers/nv10/nv10_surface.c +++ b/src/gallium/drivers/nv10/nv10_surface.c @@ -28,10 +28,9 @@ #include "nv10_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" static void nv10_surface_copy(struct pipe_context *pipe, unsigned do_flip, diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index f024f534209..d0e788ac036 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -1,7 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "nv10_context.h" #include "nv10_state.h" diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index eefc614e5b5..2bff28aca9c 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -1,7 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" #include "nv30_context.h" #include "nv30_screen.h" diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 823b34a7c3b..b9337697008 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -4,6 +4,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c index aeeaf58f20a..74fc138c051 100644 --- a/src/gallium/drivers/nv30/nv30_draw.c +++ b/src/gallium/drivers/nv30/nv30_draw.c @@ -1,5 +1,4 @@ #include "draw/draw_pipe.h" -#include "pipe/p_util.h" #include "nv30_context.h" diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index a0e488c09b1..5c4f4da9481 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -1,6 +1,5 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "nv30_context.h" @@ -33,7 +32,7 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt) if (swizzled) pitch = pt->nblocksx[l]; - pitch = align_int(pitch, 64); + pitch = align(pitch, 64); nv30mt->level[l].pitch = pitch * pt->block.size; nv30mt->level[l].image_offset = @@ -84,7 +83,6 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) static void nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) { - struct pipe_winsys *ws = pscreen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -92,7 +90,7 @@ nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) struct nv30_miptree *nv30mt = (struct nv30_miptree *)mt; int l; - pipe_buffer_reference(ws, &nv30mt->buffer, NULL); + pipe_buffer_reference(pscreen, &nv30mt->buffer, NULL); for (l = 0; l <= mt->last_level; l++) { if (nv30mt->level[l].image_offset) FREE(nv30mt->level[l].image_offset); @@ -106,7 +104,6 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - struct pipe_winsys *ws = pscreen->winsys; struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; struct pipe_surface *ps; @@ -114,7 +111,7 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(ws, &ps->buffer, nv30mt->buffer); + pipe_buffer_reference(pscreen, &ps->buffer, nv30mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index d5514c2aba5..a595e2eb225 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -1,5 +1,4 @@ #include "pipe/p_screen.h" -#include "pipe/p_util.h" #include "nv30_context.h" #include "nv30_screen.h" diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index eceb5353152..fc66075c83f 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -1,6 +1,5 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c index b22211ac86e..36f48877503 100644 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -28,10 +28,10 @@ #include "nv30_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" + +#include "util/u_tile.h" static void nv30_surface_copy(struct pipe_context *pipe, unsigned do_flip, diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index b1c73793bf2..556f981d4a5 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -1,6 +1,5 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "nv30_context.h" #include "nv30_state.h" diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index a40f14895f4..cc63dd734bc 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -1,7 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" #include "nv40_context.h" #include "nv40_screen.h" diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 8e60a81e68e..adcfbdd85a8 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -4,6 +4,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 2cf58e2950a..8e56cdc2fe0 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -1,6 +1,7 @@ -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_pack_color.h" + #include "draw/draw_context.h" #include "draw/draw_vertex.h" #include "draw/draw_pipe.h" diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 38e1a5f04ca..6c54c37ef71 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -1,6 +1,5 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "nv40_context.h" @@ -33,7 +32,7 @@ nv40_miptree_layout(struct nv40_miptree *nv40mt) if (swizzled) pitch = pt->nblocksx[l]; - pitch = align_int(pitch, 64); + pitch = align(pitch, 64); nv40mt->level[l].pitch = pitch * pt->block.size; nv40mt->level[l].image_offset = @@ -84,7 +83,6 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) static void nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) { - struct pipe_winsys *ws = pscreen->winsys; struct pipe_texture *mt = *pt; *pt = NULL; @@ -92,7 +90,7 @@ nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **pt) struct nv40_miptree *nv40mt = (struct nv40_miptree *)mt; int l; - pipe_buffer_reference(ws, &nv40mt->buffer, NULL); + pipe_buffer_reference(pscreen, &nv40mt->buffer, NULL); for (l = 0; l <= mt->last_level; l++) { if (nv40mt->level[l].image_offset) FREE(nv40mt->level[l].image_offset); @@ -106,7 +104,6 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - struct pipe_winsys *ws = pscreen->winsys; struct nv40_miptree *nv40mt = (struct nv40_miptree *)pt; struct pipe_surface *ps; @@ -114,7 +111,7 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(ws, &ps->buffer, nv40mt->buffer); + pipe_buffer_reference(pscreen, &ps->buffer, nv40mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -148,7 +145,7 @@ nv40_miptree_surface_del(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL); + pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 0e1df89ee85..ada0238511d 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -1,5 +1,4 @@ #include "pipe/p_screen.h" -#include "pipe/p_util.h" #include "nv40_context.h" #include "nv40_screen.h" diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 63d0ecc9158..255c4b294d1 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -1,6 +1,5 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c index 0916555d564..576af7c59ee 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -28,10 +28,10 @@ #include "nv40_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" + +#include "util/u_tile.h" static void nv40_surface_copy(struct pipe_context *pipe, boolean do_flip, diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 755d5586b7b..09f6e79d32a 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -1,6 +1,5 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "nv40_context.h" #include "nv40_state.h" diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 07987c7d02d..b02c53f2095 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -23,7 +23,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" #include "nv50_context.h" #include "nv50_screen.h" diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 1c069f1625f..5d377f2d067 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -4,6 +4,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c index 4fd81bd27a1..2f6f607261e 100644 --- a/src/gallium/drivers/nv50/nv50_draw.c +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -21,7 +21,6 @@ */ #include "draw/draw_pipe.h" -#include "pipe/p_util.h" #include "nv50_context.h" diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index a02ad41885b..b0e8fe2f0b7 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -22,7 +22,6 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "nv50_context.h" @@ -62,7 +61,6 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) static void nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) { - struct pipe_winsys *ws = pscreen->winsys; struct pipe_texture *pt = *ppt; *ppt = NULL; @@ -70,7 +68,7 @@ nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) if (--pt->refcount <= 0) { struct nv50_miptree *mt = nv50_miptree(pt); - pipe_buffer_reference(ws, &mt->buffer, NULL); + pipe_buffer_reference(pscreen, &mt->buffer, NULL); FREE(mt); } } @@ -80,7 +78,6 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - struct pipe_winsys *ws = pscreen->winsys; struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_surface *s; struct pipe_surface *ps; @@ -91,7 +88,7 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps = &s->base; ps->refcount = 1; - ps->winsys = ws; + ps->winsys = pscreen->winsys; ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -104,7 +101,7 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->status = PIPE_SURFACE_STATUS_DEFINED; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(ws, &ps->buffer, mt->buffer); + pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); return ps; } @@ -113,7 +110,6 @@ static void nv50_miptree_surface_del(struct pipe_screen *pscreen, struct pipe_surface **psurface) { - struct pipe_winsys *ws = pscreen->winsys; struct pipe_surface *ps = *psurface; struct nv50_surface *s = nv50_surface(ps); @@ -121,7 +117,7 @@ nv50_miptree_surface_del(struct pipe_screen *pscreen, if (--ps->refcount <= 0) { pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(ws, &ps->buffer, NULL); + pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(s); } } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ec439239298..b5aef7dadd8 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -21,7 +21,6 @@ */ #include "pipe/p_screen.h" -#include "pipe/p_util.h" #include "nv50_context.h" #include "nv50_screen.h" diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 4055527d9f8..95f9d408b5e 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -22,7 +22,6 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index a9daeee3694..5bf97d3a6bf 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -22,10 +22,10 @@ #include "nv50_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" + +#include "util/u_tile.h" static void nv50_surface_copy(struct pipe_context *pipe, boolean flip, @@ -90,10 +90,10 @@ nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, } static void -nv50_surface_unmap(struct pipe_screen *screen, struct pipe_surface *ps) +nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps) { - struct nouveau_winsys *nvws = nv50_screen(screen)->nvws; - struct pipe_winsys *ws = screen->winsys; + struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws; + struct pipe_winsys *ws = pscreen->winsys; struct nv50_surface *s = nv50_surface(ps); struct nv50_surface m = *s; @@ -104,7 +104,7 @@ nv50_surface_unmap(struct pipe_screen *screen, struct pipe_surface *ps) nvws->surface_copy(nvws, &s->base, 0, 0, &m.base, 0, 0, ps->width, ps->height); - pipe_buffer_reference(ws, &s->untiled, NULL); + pipe_buffer_reference(pscreen, &s->untiled, NULL); } void diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index c94531723bc..584336682e4 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -22,7 +22,6 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "nv50_context.h" diff --git a/src/gallium/winsys/dri/nouveau/Makefile b/src/gallium/winsys/dri/nouveau/Makefile deleted file mode 100644 index be630ff6d16..00000000000 --- a/src/gallium/winsys/dri/nouveau/Makefile +++ /dev/null @@ -1,45 +0,0 @@ - -TOP = ../../../../.. -include $(TOP)/configs/current - -LIBNAME = nouveau_dri.so - -MINIGLX_SOURCES = - -PIPE_DRIVERS = \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/nv04/libnv04.a \ - $(TOP)/src/gallium/drivers/nv10/libnv10.a \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ - $(TOP)/src/gallium/drivers/nv50/libnv50.a - -DRIVER_SOURCES = \ - nouveau_bo.c \ - nouveau_channel.c \ - nouveau_context.c \ - nouveau_device.c \ - nouveau_dma.c \ - nouveau_fence.c \ - nouveau_grobj.c \ - nouveau_lock.c \ - nouveau_notifier.c \ - nouveau_pushbuf.c \ - nouveau_resource.c \ - nouveau_screen.c \ - nouveau_swapbuffers.c \ - nouveau_winsys.c \ - nouveau_winsys_pipe.c \ - nouveau_winsys_softpipe.c \ - nv04_surface.c \ - nv50_surface.c - -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -include ../Makefile.template - -symlinks: diff --git a/src/gallium/winsys/dri/nouveau/nouveau_bo.c b/src/gallium/winsys/dri/nouveau/nouveau_bo.c deleted file mode 100644 index b5942994d94..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_bo.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdint.h> -#include <stdlib.h> -#include <errno.h> -#include <assert.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" -#include "nouveau_local.h" - -static void -nouveau_mem_free(struct nouveau_device *dev, struct drm_nouveau_mem_alloc *ma, - void **map) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - struct drm_nouveau_mem_free mf; - - if (map && *map) { - drmUnmap(*map, ma->size); - *map = NULL; - } - - if (ma->size) { - mf.offset = ma->offset; - mf.flags = ma->flags; - drmCommandWrite(nvdev->fd, DRM_NOUVEAU_MEM_FREE, - &mf, sizeof(mf)); - ma->size = 0; - } -} - -static int -nouveau_mem_alloc(struct nouveau_device *dev, unsigned size, unsigned align, - uint32_t flags, struct drm_nouveau_mem_alloc *ma, void **map) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - int ret; - - ma->alignment = align; - ma->size = size; - ma->flags = flags; - if (map) - ma->flags |= NOUVEAU_MEM_MAPPED; - ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_MEM_ALLOC, ma, - sizeof(struct drm_nouveau_mem_alloc)); - if (ret) - return ret; - - if (map) { - ret = drmMap(nvdev->fd, ma->map_handle, ma->size, map); - if (ret) { - *map = NULL; - nouveau_mem_free(dev, ma, map); - return ret; - } - } - - return 0; -} - -static void -nouveau_bo_tmp_del(void *priv) -{ - struct nouveau_resource *r = priv; - - nouveau_fence_ref(NULL, (struct nouveau_fence **)&r->priv); - nouveau_resource_free(&r); -} - -static unsigned -nouveau_bo_tmp_max(struct nouveau_device_priv *nvdev) -{ - struct nouveau_resource *r = nvdev->sa_heap; - unsigned max = 0; - - while (r) { - if (r->in_use && !nouveau_fence(r->priv)->emitted) { - r = r->next; - continue; - } - - if (max < r->size) - max = r->size; - r = r->next; - } - - return max; -} - -static struct nouveau_resource * -nouveau_bo_tmp(struct nouveau_channel *chan, unsigned size, - struct nouveau_fence *fence) -{ - struct nouveau_device_priv *nvdev = nouveau_device(chan->device); - struct nouveau_resource *r = NULL; - struct nouveau_fence *ref = NULL; - - if (fence) - nouveau_fence_ref(fence, &ref); - else - nouveau_fence_new(chan, &ref); - assert(ref); - - while (nouveau_resource_alloc(nvdev->sa_heap, size, ref, &r)) { - if (nouveau_bo_tmp_max(nvdev) < size) { - nouveau_fence_ref(NULL, &ref); - return NULL; - } - - nouveau_fence_flush(chan); - } - nouveau_fence_signal_cb(ref, nouveau_bo_tmp_del, r); - - return r; -} - -int -nouveau_bo_init(struct nouveau_device *dev) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - int ret; - - ret = nouveau_mem_alloc(dev, 128*1024, 0, NOUVEAU_MEM_AGP | - NOUVEAU_MEM_PCI, &nvdev->sa, &nvdev->sa_map); - if (ret) - return ret; - - ret = nouveau_resource_init(&nvdev->sa_heap, 0, nvdev->sa.size); - if (ret) { - nouveau_mem_free(dev, &nvdev->sa, &nvdev->sa_map); - return ret; - } - - return 0; -} - -void -nouveau_bo_takedown(struct nouveau_device *dev) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - nouveau_mem_free(dev, &nvdev->sa, &nvdev->sa_map); -} - -int -nouveau_bo_new(struct nouveau_device *dev, uint32_t flags, int align, - int size, struct nouveau_bo **bo) -{ - struct nouveau_bo_priv *nvbo; - int ret; - - if (!dev || !bo || *bo) - return -EINVAL; - - nvbo = calloc(1, sizeof(struct nouveau_bo_priv)); - if (!nvbo) - return -ENOMEM; - nvbo->base.device = dev; - nvbo->base.size = size; - nvbo->base.handle = bo_to_ptr(nvbo); - nvbo->drm.alignment = align; - nvbo->refcount = 1; - - if (flags & NOUVEAU_BO_TILED) { - nvbo->tiled = 1; - if (flags & NOUVEAU_BO_ZTILE) - nvbo->tiled |= 2; - flags &= ~NOUVEAU_BO_TILED; - } - - ret = nouveau_bo_set_status(&nvbo->base, flags); - if (ret) { - free(nvbo); - return ret; - } - - *bo = &nvbo->base; - return 0; -} - -int -nouveau_bo_user(struct nouveau_device *dev, void *ptr, int size, - struct nouveau_bo **bo) -{ - struct nouveau_bo_priv *nvbo; - - if (!dev || !bo || *bo) - return -EINVAL; - - nvbo = calloc(1, sizeof(*nvbo)); - if (!nvbo) - return -ENOMEM; - nvbo->base.device = dev; - - nvbo->sysmem = ptr; - nvbo->user = 1; - - nvbo->base.size = size; - nvbo->base.offset = nvbo->drm.offset; - nvbo->base.handle = bo_to_ptr(nvbo); - nvbo->refcount = 1; - *bo = &nvbo->base; - return 0; -} - -int -nouveau_bo_ref(struct nouveau_device *dev, uint64_t handle, - struct nouveau_bo **bo) -{ - struct nouveau_bo_priv *nvbo = ptr_to_bo(handle); - - if (!dev || !bo || *bo) - return -EINVAL; - - nvbo->refcount++; - *bo = &nvbo->base; - return 0; -} - -static void -nouveau_bo_del_cb(void *priv) -{ - struct nouveau_bo_priv *nvbo = priv; - - nouveau_fence_ref(NULL, &nvbo->fence); - nouveau_mem_free(nvbo->base.device, &nvbo->drm, &nvbo->map); - if (nvbo->sysmem && !nvbo->user) - free(nvbo->sysmem); - free(nvbo); -} - -void -nouveau_bo_del(struct nouveau_bo **bo) -{ - struct nouveau_bo_priv *nvbo; - - if (!bo || !*bo) - return; - nvbo = nouveau_bo(*bo); - *bo = NULL; - - if (--nvbo->refcount) - return; - - if (nvbo->pending) - nouveau_pushbuf_flush(nvbo->pending->channel, 0); - - if (nvbo->fence) - nouveau_fence_signal_cb(nvbo->fence, nouveau_bo_del_cb, nvbo); - else - nouveau_bo_del_cb(nvbo); -} - -int -nouveau_bo_map(struct nouveau_bo *bo, uint32_t flags) -{ - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - - if (!nvbo) - return -EINVAL; - - if (nvbo->pending && - (nvbo->pending->flags & NOUVEAU_BO_WR || flags & NOUVEAU_BO_WR)) { - nouveau_pushbuf_flush(nvbo->pending->channel, 0); - } - - if (flags & NOUVEAU_BO_WR) - nouveau_fence_wait(&nvbo->fence); - else - nouveau_fence_wait(&nvbo->wr_fence); - - if (nvbo->sysmem) - bo->map = nvbo->sysmem; - else - bo->map = nvbo->map; - return 0; -} - -void -nouveau_bo_unmap(struct nouveau_bo *bo) -{ - bo->map = NULL; -} - -static int -nouveau_bo_upload(struct nouveau_bo_priv *nvbo) -{ - if (nvbo->fence) - nouveau_fence_wait(&nvbo->fence); - memcpy(nvbo->map, nvbo->sysmem, nvbo->drm.size); - return 0; -} - -int -nouveau_bo_set_status(struct nouveau_bo *bo, uint32_t flags) -{ - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - struct drm_nouveau_mem_alloc new; - void *new_map = NULL, *new_sysmem = NULL; - unsigned new_flags = 0, ret; - - assert(!bo->map); - - /* Check current memtype vs requested, if they match do nothing */ - if ((nvbo->drm.flags & NOUVEAU_MEM_FB) && (flags & NOUVEAU_BO_VRAM)) - return 0; - if ((nvbo->drm.flags & (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI)) && - (flags & NOUVEAU_BO_GART)) - return 0; - if (nvbo->drm.size == 0 && nvbo->sysmem && (flags & NOUVEAU_BO_LOCAL)) - return 0; - - memset(&new, 0x00, sizeof(new)); - - /* Allocate new memory */ - if (flags & NOUVEAU_BO_VRAM) - new_flags |= NOUVEAU_MEM_FB; - else - if (flags & NOUVEAU_BO_GART) - new_flags |= (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI); - - if (nvbo->tiled && flags) { - new_flags |= NOUVEAU_MEM_TILE; - if (nvbo->tiled & 2) - new_flags |= NOUVEAU_MEM_TILE_ZETA; - } - - if (new_flags) { - ret = nouveau_mem_alloc(bo->device, bo->size, - nvbo->drm.alignment, new_flags, - &new, &new_map); - if (ret) - return ret; - } else - if (!nvbo->user) { - new_sysmem = malloc(bo->size); - } - - /* Copy old -> new */ - /*XXX: use M2MF */ - if (nvbo->sysmem || nvbo->map) { - struct nouveau_pushbuf_bo *pbo = nvbo->pending; - nvbo->pending = NULL; - nouveau_bo_map(bo, NOUVEAU_BO_RD); - memcpy(new_map, bo->map, bo->size); - nouveau_bo_unmap(bo); - nvbo->pending = pbo; - } - - /* Free old memory */ - if (nvbo->fence) - nouveau_fence_wait(&nvbo->fence); - nouveau_mem_free(bo->device, &nvbo->drm, &nvbo->map); - if (nvbo->sysmem && !nvbo->user) - free(nvbo->sysmem); - - nvbo->drm = new; - nvbo->map = new_map; - if (!nvbo->user) - nvbo->sysmem = new_sysmem; - bo->flags = flags; - bo->offset = nvbo->drm.offset; - return 0; -} - -static int -nouveau_bo_validate_user(struct nouveau_channel *chan, struct nouveau_bo *bo, - struct nouveau_fence *fence, uint32_t flags) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_device_priv *nvdev = nouveau_device(chan->device); - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - struct nouveau_resource *r; - - if (nvchan->user_charge + bo->size > nvdev->sa.size) - return 1; - - if (!(flags & NOUVEAU_BO_GART)) - return 1; - - r = nouveau_bo_tmp(chan, bo->size, fence); - if (!r) - return 1; - nvchan->user_charge += bo->size; - - memcpy(nvdev->sa_map + r->start, nvbo->sysmem, bo->size); - - nvbo->offset = nvdev->sa.offset + r->start; - nvbo->flags = NOUVEAU_BO_GART; - return 0; -} - -static int -nouveau_bo_validate_bo(struct nouveau_channel *chan, struct nouveau_bo *bo, - struct nouveau_fence *fence, uint32_t flags) -{ - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - int ret; - - ret = nouveau_bo_set_status(bo, flags); - if (ret) { - nouveau_fence_flush(chan); - - ret = nouveau_bo_set_status(bo, flags); - if (ret) - return ret; - } - - if (nvbo->user) - nouveau_bo_upload(nvbo); - - nvbo->offset = nvbo->drm.offset; - if (nvbo->drm.flags & (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI)) - nvbo->flags = NOUVEAU_BO_GART; - else - nvbo->flags = NOUVEAU_BO_VRAM; - - return 0; -} - -int -nouveau_bo_validate(struct nouveau_channel *chan, struct nouveau_bo *bo, - uint32_t flags) -{ - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - struct nouveau_fence *fence = nouveau_pushbuf(chan->pushbuf)->fence; - int ret; - - assert(bo->map == NULL); - - if (nvbo->user) { - ret = nouveau_bo_validate_user(chan, bo, fence, flags); - if (ret) { - ret = nouveau_bo_validate_bo(chan, bo, fence, flags); - if (ret) - return ret; - } - } else { - ret = nouveau_bo_validate_bo(chan, bo, fence, flags); - if (ret) - return ret; - } - - if (flags & NOUVEAU_BO_WR) - nouveau_fence_ref(fence, &nvbo->wr_fence); - nouveau_fence_ref(fence, &nvbo->fence); - return 0; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_channel.c b/src/gallium/winsys/dri/nouveau/nouveau_channel.c deleted file mode 100644 index 3b4dcd1ecf2..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_channel.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" - -int -nouveau_channel_alloc(struct nouveau_device *dev, uint32_t fb_ctxdma, - uint32_t tt_ctxdma, struct nouveau_channel **chan) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - struct nouveau_channel_priv *nvchan; - int ret; - - if (!nvdev || !chan || *chan) - return -EINVAL; - - nvchan = calloc(1, sizeof(struct nouveau_channel_priv)); - if (!nvchan) - return -ENOMEM; - nvchan->base.device = dev; - - nvchan->drm.fb_ctxdma_handle = fb_ctxdma; - nvchan->drm.tt_ctxdma_handle = tt_ctxdma; - ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_CHANNEL_ALLOC, - &nvchan->drm, sizeof(nvchan->drm)); - if (ret) { - free(nvchan); - return ret; - } - - nvchan->base.id = nvchan->drm.channel; - if (nouveau_grobj_ref(&nvchan->base, nvchan->drm.fb_ctxdma_handle, - &nvchan->base.vram) || - nouveau_grobj_ref(&nvchan->base, nvchan->drm.tt_ctxdma_handle, - &nvchan->base.gart)) { - nouveau_channel_free((void *)&nvchan); - return -EINVAL; - } - - ret = drmMap(nvdev->fd, nvchan->drm.ctrl, nvchan->drm.ctrl_size, - (void*)&nvchan->user); - if (ret) { - nouveau_channel_free((void *)&nvchan); - return ret; - } - nvchan->put = &nvchan->user[0x40/4]; - nvchan->get = &nvchan->user[0x44/4]; - nvchan->ref_cnt = &nvchan->user[0x48/4]; - - ret = drmMap(nvdev->fd, nvchan->drm.notifier, nvchan->drm.notifier_size, - (drmAddressPtr)&nvchan->notifier_block); - if (ret) { - nouveau_channel_free((void *)&nvchan); - return ret; - } - - ret = drmMap(nvdev->fd, nvchan->drm.cmdbuf, nvchan->drm.cmdbuf_size, - (void*)&nvchan->pushbuf); - if (ret) { - nouveau_channel_free((void *)&nvchan); - return ret; - } - - ret = nouveau_grobj_alloc(&nvchan->base, 0x00000000, 0x0030, - &nvchan->base.nullobj); - if (ret) { - nouveau_channel_free((void *)&nvchan); - return ret; - } - - nouveau_dma_channel_init(&nvchan->base); - nouveau_pushbuf_init(&nvchan->base); - - *chan = &nvchan->base; - return 0; -} - -void -nouveau_channel_free(struct nouveau_channel **chan) -{ - struct nouveau_channel_priv *nvchan; - struct nouveau_device_priv *nvdev; - struct drm_nouveau_channel_free cf; - - if (!chan || !*chan) - return; - nvchan = nouveau_channel(*chan); - *chan = NULL; - nvdev = nouveau_device(nvchan->base.device); - - FIRE_RING_CH(&nvchan->base); - - nouveau_grobj_free(&nvchan->base.vram); - nouveau_grobj_free(&nvchan->base.gart); - nouveau_grobj_free(&nvchan->base.nullobj); - - cf.channel = nvchan->drm.channel; - drmCommandWrite(nvdev->fd, DRM_NOUVEAU_CHANNEL_FREE, &cf, sizeof(cf)); - free(nvchan); -} - - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.c b/src/gallium/winsys/dri/nouveau/nouveau_context.c deleted file mode 100644 index 74413c408f3..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.c +++ /dev/null @@ -1,346 +0,0 @@ -#include "main/glheader.h" -#include "glapi/glthread.h" -#include <GL/internal/glcore.h> -#include "utils.h" - -#include "state_tracker/st_public.h" -#include "state_tracker/st_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_screen.h" - -#include "nouveau_context.h" -#include "nouveau_dri.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_winsys_pipe.h" - -#ifdef DEBUG -static const struct dri_debug_control debug_control[] = { - { "bo", DEBUG_BO }, - { NULL, 0 } -}; -int __nouveau_debug = 0; -#endif - -static void -nouveau_channel_context_destroy(struct nouveau_channel_context *nvc) -{ - nouveau_grobj_free(&nvc->NvCtxSurf2D); - nouveau_grobj_free(&nvc->NvImageBlit); - nouveau_grobj_free(&nvc->NvGdiRect); - nouveau_grobj_free(&nvc->NvM2MF); - nouveau_grobj_free(&nvc->Nv2D); - nouveau_grobj_free(&nvc->NvSwzSurf); - nouveau_grobj_free(&nvc->NvSIFM); - - nouveau_notifier_free(&nvc->sync_notifier); - - nouveau_channel_free(&nvc->channel); - - FREE(nvc); -} - -static struct nouveau_channel_context * -nouveau_channel_context_create(struct nouveau_device *dev) -{ - struct nouveau_channel_context *nvc; - int ret; - - nvc = CALLOC_STRUCT(nouveau_channel_context); - if (!nvc) - return NULL; - - if ((ret = nouveau_channel_alloc(dev, 0x8003d001, 0x8003d002, - &nvc->channel))) { - NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); - nouveau_channel_context_destroy(nvc); - return NULL; - } - - nvc->next_handle = 0x80000000; - - if ((ret = nouveau_notifier_alloc(nvc->channel, nvc->next_handle++, 1, - &nvc->sync_notifier))) { - NOUVEAU_ERR("Error creating channel sync notifier: %d\n", ret); - nouveau_channel_context_destroy(nvc); - return NULL; - } - - switch (dev->chipset & 0xf0) { - case 0x50: - case 0x80: - case 0x90: - ret = nouveau_surface_channel_create_nv50(nvc); - break; - default: - ret = nouveau_surface_channel_create_nv04(nvc); - break; - } - - if (ret) { - NOUVEAU_ERR("Error initialising surface objects: %d\n", ret); - nouveau_channel_context_destroy(nvc); - return NULL; - } - - return nvc; -} - -GLboolean -nouveau_context_create(const __GLcontextModes *glVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) -{ - __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv; - struct nouveau_screen *nv_screen = driScrnPriv->private; - struct nouveau_context *nv = CALLOC_STRUCT(nouveau_context); - struct pipe_context *pipe = NULL; - struct st_context *st_share = NULL; - struct nouveau_channel_context *nvc = NULL; - struct nouveau_device *dev = nv_screen->device; - int i; - - if (sharedContextPrivate) { - st_share = ((struct nouveau_context *)sharedContextPrivate)->st; - } - - switch (dev->chipset & 0xf0) { - case 0x10: - case 0x20: - /* NV10 */ - case 0x30: - /* NV30 */ - case 0x40: - case 0x60: - /* NV40 */ - case 0x50: - case 0x80: - case 0x90: - /* G80 */ - break; - default: - NOUVEAU_ERR("Unsupported chipset: NV%02x\n", dev->chipset); - return GL_FALSE; - } - - driContextPriv->driverPrivate = (void *)nv; - nv->nv_screen = nv_screen; - nv->dri_screen = driScrnPriv; - - { - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - nvdev->ctx = driContextPriv->hHWContext; - nvdev->lock = (drmLock *)&driScrnPriv->pSAREA->lock; - } - - driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, - nv->dri_screen->myNum, "nouveau"); -#ifdef DEBUG - __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), - debug_control); -#endif - - /*XXX: Hack up a fake region and buffer object for front buffer. - * This will go away with TTM, replaced with a simple reference - * of the front buffer handle passed to us by the DDX. - */ - { - struct pipe_surface *fb_surf; - struct nouveau_pipe_buffer *fb_buf; - struct nouveau_bo_priv *fb_bo; - - fb_bo = calloc(1, sizeof(struct nouveau_bo_priv)); - fb_bo->drm.offset = nv_screen->front_offset; - fb_bo->drm.flags = NOUVEAU_MEM_FB; - fb_bo->drm.size = nv_screen->front_pitch * - nv_screen->front_height; - fb_bo->refcount = 1; - fb_bo->base.flags = NOUVEAU_BO_PIN | NOUVEAU_BO_VRAM; - fb_bo->base.offset = fb_bo->drm.offset; - fb_bo->base.handle = (unsigned long)fb_bo; - fb_bo->base.size = fb_bo->drm.size; - fb_bo->base.device = nv_screen->device; - - fb_buf = calloc(1, sizeof(struct nouveau_pipe_buffer)); - fb_buf->bo = &fb_bo->base; - - fb_surf = calloc(1, sizeof(struct pipe_surface)); - if (nv_screen->front_cpp == 2) - fb_surf->format = PIPE_FORMAT_R5G6B5_UNORM; - else - fb_surf->format = PIPE_FORMAT_A8R8G8B8_UNORM; - pf_get_block(fb_surf->format, &fb_surf->block); - fb_surf->width = nv_screen->front_pitch / nv_screen->front_cpp; - fb_surf->height = nv_screen->front_height; - fb_surf->stride = fb_surf->width * fb_surf->block.size; - fb_surf->refcount = 1; - fb_surf->buffer = &fb_buf->base; - - nv->frontbuffer = fb_surf; - } - - /* Attempt to share a single channel between multiple contexts from - * a single process. - */ - nvc = nv_screen->nvc; - if (!nvc && st_share) { - struct nouveau_context *snv = st_share->pipe->priv; - if (snv) { - nvc = snv->nvc; - } - } - - /*XXX: temporary - disable multi-context/single-channel on pre-NV4x */ - switch (dev->chipset & 0xf0) { - case 0x40: - case 0x60: - /* NV40 class */ - case 0x50: - case 0x80: - case 0x90: - /* G80 class */ - break; - default: - nvc = NULL; - break; - } - - if (!nvc) { - nvc = nouveau_channel_context_create(dev); - if (!nvc) { - NOUVEAU_ERR("Failed initialising GPU context\n"); - return GL_FALSE; - } - nv_screen->nvc = nvc; - } - - nvc->refcount++; - nv->nvc = nvc; - - /* Find a free slot for a pipe context, allocate a new one if needed */ - nv->pctx_id = -1; - for (i = 0; i < nvc->nr_pctx; i++) { - if (nvc->pctx[i] == NULL) { - nv->pctx_id = i; - break; - } - } - - if (nv->pctx_id < 0) { - nv->pctx_id = nvc->nr_pctx++; - nvc->pctx = - realloc(nvc->pctx, - sizeof(struct pipe_context *) * nvc->nr_pctx); - } - - /* Create pipe */ - switch (dev->chipset & 0xf0) { - case 0x50: - case 0x80: - case 0x90: - if (nouveau_surface_init_nv50(nv)) - return GL_FALSE; - break; - default: - if (nouveau_surface_init_nv04(nv)) - return GL_FALSE; - break; - } - - if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { - struct pipe_screen *pscreen; - - pipe = nouveau_pipe_create(nv); - if (!pipe) - NOUVEAU_ERR("Couldn't create hw pipe\n"); - pscreen = nvc->pscreen; - - nv->cap.hw_vertex_buffer = - pscreen->get_param(pscreen, NOUVEAU_CAP_HW_VTXBUF); - nv->cap.hw_index_buffer = - pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF); - } - - if (!pipe) { - NOUVEAU_MSG("Using softpipe\n"); - pipe = nouveau_create_softpipe(nv); - if (!pipe) { - NOUVEAU_ERR("Error creating pipe, bailing\n"); - return GL_FALSE; - } - } - - pipe->priv = nv; - nv->st = st_create_context(pipe, glVis, st_share); - return GL_TRUE; -} - -void -nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) -{ - struct nouveau_context *nv = driContextPriv->driverPrivate; - struct nouveau_channel_context *nvc = nv->nvc; - - assert(nv); - - st_finish(nv->st); - st_destroy_context(nv->st); - - if (nv->pctx_id >= 0) { - nvc->pctx[nv->pctx_id] = NULL; - if (--nvc->refcount <= 0) { - nouveau_channel_context_destroy(nvc); - nv->nv_screen->nvc = NULL; - } - } - - free(nv); -} - -GLboolean -nouveau_context_bind(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv) -{ - struct nouveau_context *nv; - struct nouveau_framebuffer *draw, *read; - - if (!driContextPriv) { - st_make_current(NULL, NULL, NULL); - return GL_TRUE; - } - - nv = driContextPriv->driverPrivate; - draw = driDrawPriv->driverPrivate; - read = driReadPriv->driverPrivate; - - st_make_current(nv->st, draw->stfb, read->stfb); - - if ((nv->dri_drawable != driDrawPriv) || - (nv->last_stamp != driDrawPriv->lastStamp)) { - nv->dri_drawable = driDrawPriv; - st_resize_framebuffer(draw->stfb, driDrawPriv->w, - driDrawPriv->h); - nv->last_stamp = driDrawPriv->lastStamp; - } - - if (driDrawPriv != driReadPriv) { - st_resize_framebuffer(read->stfb, driReadPriv->w, - driReadPriv->h); - } - - return GL_TRUE; -} - -GLboolean -nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) -{ - struct nouveau_context *nv = driContextPriv->driverPrivate; - (void)nv; - - st_flush(nv->st, 0, NULL); - return GL_TRUE; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_context.h b/src/gallium/winsys/dri/nouveau/nouveau_context.h deleted file mode 100644 index 77e2147a2c7..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_context.h +++ /dev/null @@ -1,113 +0,0 @@ -#ifndef __NOUVEAU_CONTEXT_H__ -#define __NOUVEAU_CONTEXT_H__ - -#include "dri_util.h" -#include "xmlconfig.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau_drmif.h" -#include "nouveau_dma.h" - -struct nouveau_framebuffer { - struct st_framebuffer *stfb; -}; - -struct nouveau_channel_context { - struct pipe_screen *pscreen; - int refcount; - - unsigned cur_pctx; - unsigned nr_pctx; - struct pipe_context **pctx; - - struct nouveau_channel *channel; - - struct nouveau_notifier *sync_notifier; - - /* Common */ - struct nouveau_grobj *NvM2MF; - /* NV04-NV40 */ - struct nouveau_grobj *NvCtxSurf2D; - struct nouveau_grobj *NvSwzSurf; - struct nouveau_grobj *NvImageBlit; - struct nouveau_grobj *NvGdiRect; - struct nouveau_grobj *NvSIFM; - /* G80 */ - struct nouveau_grobj *Nv2D; - - uint32_t next_handle; - uint32_t next_subchannel; - uint32_t next_sequence; -}; - -struct nouveau_context { - struct st_context *st; - - /* DRI stuff */ - __DRIscreenPrivate *dri_screen; - __DRIdrawablePrivate *dri_drawable; - unsigned int last_stamp; - driOptionCache dri_option_cache; - drm_context_t drm_context; - drmLock drm_lock; - GLboolean locked; - struct nouveau_screen *nv_screen; - struct pipe_surface *frontbuffer; - - struct { - int hw_vertex_buffer; - int hw_index_buffer; - } cap; - - /* Hardware context */ - struct nouveau_channel_context *nvc; - int pctx_id; - - /* pipe_surface accel */ - struct pipe_surface *surf_src, *surf_dst; - unsigned surf_src_offset, surf_dst_offset; - int (*surface_copy_prep)(struct nouveau_context *, - struct pipe_surface *dst, - struct pipe_surface *src); - void (*surface_copy)(struct nouveau_context *, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h); - void (*surface_copy_done)(struct nouveau_context *); - int (*surface_fill)(struct nouveau_context *, struct pipe_surface *, - unsigned, unsigned, unsigned, unsigned, unsigned); -}; - -extern GLboolean nouveau_context_create(const __GLcontextModes *, - __DRIcontextPrivate *, void *); -extern void nouveau_context_destroy(__DRIcontextPrivate *); -extern GLboolean nouveau_context_bind(__DRIcontextPrivate *, - __DRIdrawablePrivate *draw, - __DRIdrawablePrivate *read); -extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *); - -#ifdef DEBUG -extern int __nouveau_debug; - -#define DEBUG_BO (1 << 0) - -#define DBG(flag, ...) do { \ - if (__nouveau_debug & (DEBUG_##flag)) \ - NOUVEAU_ERR(__VA_ARGS__); \ -} while(0) -#else -#define DBG(flag, ...) -#endif - -extern void LOCK_HARDWARE(struct nouveau_context *); -extern void UNLOCK_HARDWARE(struct nouveau_context *); - -extern int -nouveau_surface_channel_create_nv04(struct nouveau_channel_context *); -extern int -nouveau_surface_channel_create_nv50(struct nouveau_channel_context *); -extern int nouveau_surface_init_nv04(struct nouveau_context *); -extern int nouveau_surface_init_nv50(struct nouveau_context *); - -extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int); -extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *); - -#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_device.c b/src/gallium/winsys/dri/nouveau/nouveau_device.c deleted file mode 100644 index 0b452fcd02d..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_device.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> - -#include "nouveau_drmif.h" - -int -nouveau_device_open_existing(struct nouveau_device **dev, int close, - int fd, drm_context_t ctx) -{ - struct nouveau_device_priv *nvdev; - int ret; - - if (!dev || *dev) - return -EINVAL; - - nvdev = calloc(1, sizeof(*nvdev)); - if (!nvdev) - return -ENOMEM; - nvdev->fd = fd; - nvdev->ctx = ctx; - nvdev->needs_close = close; - - drmCommandNone(nvdev->fd, DRM_NOUVEAU_CARD_INIT); - - if ((ret = nouveau_bo_init(&nvdev->base))) { - nouveau_device_close((void *)&nvdev); - return ret; - } - - { - uint64_t value; - - ret = nouveau_device_get_param(&nvdev->base, - NOUVEAU_GETPARAM_CHIPSET_ID, - &value); - if (ret) { - nouveau_device_close((void *)&nvdev); - return ret; - } - nvdev->base.chipset = value; - } - - *dev = &nvdev->base; - return 0; -} - -int -nouveau_device_open(struct nouveau_device **dev, const char *busid) -{ - drm_context_t ctx; - int fd, ret; - - if (!dev || *dev) - return -EINVAL; - - fd = drmOpen("nouveau", busid); - if (fd < 0) - return -EINVAL; - - ret = drmCreateContext(fd, &ctx); - if (ret) { - drmClose(fd); - return ret; - } - - ret = nouveau_device_open_existing(dev, 1, fd, ctx); - if (ret) { - drmDestroyContext(fd, ctx); - drmClose(fd); - return ret; - } - - return 0; -} - -void -nouveau_device_close(struct nouveau_device **dev) -{ - struct nouveau_device_priv *nvdev; - - if (dev || !*dev) - return; - nvdev = nouveau_device(*dev); - *dev = NULL; - - nouveau_bo_takedown(&nvdev->base); - - if (nvdev->needs_close) { - drmDestroyContext(nvdev->fd, nvdev->ctx); - drmClose(nvdev->fd); - } - free(nvdev); -} - -int -nouveau_device_get_param(struct nouveau_device *dev, - uint64_t param, uint64_t *value) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - struct drm_nouveau_getparam g; - int ret; - - if (!nvdev || !value) - return -EINVAL; - - g.param = param; - ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_GETPARAM, - &g, sizeof(g)); - if (ret) - return ret; - - *value = g.value; - return 0; -} - -int -nouveau_device_set_param(struct nouveau_device *dev, - uint64_t param, uint64_t value) -{ - struct nouveau_device_priv *nvdev = nouveau_device(dev); - struct drm_nouveau_setparam s; - int ret; - - if (!nvdev) - return -EINVAL; - - s.param = param; - s.value = value; - ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_SETPARAM, - &s, sizeof(s)); - if (ret) - return ret; - - return 0; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_dma.c b/src/gallium/winsys/dri/nouveau/nouveau_dma.c deleted file mode 100644 index f8a8ba04f6d..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_dma.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdint.h> -#include <assert.h> -#include <errno.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" -#include "nouveau_local.h" - -static inline uint32_t -READ_GET(struct nouveau_channel_priv *nvchan) -{ - return *nvchan->get; -} - -static inline void -WRITE_PUT(struct nouveau_channel_priv *nvchan, uint32_t val) -{ - uint32_t put = ((val << 2) + nvchan->dma->base); - volatile int dum; - - NOUVEAU_DMA_BARRIER; - dum = READ_GET(nvchan); - - *nvchan->put = put; - nvchan->dma->put = val; -#ifdef NOUVEAU_DMA_TRACE - NOUVEAU_MSG("WRITE_PUT %d/0x%08x\n", nvchan->drm.channel, put); -#endif - - NOUVEAU_DMA_BARRIER; -} - -static inline int -LOCAL_GET(struct nouveau_dma_priv *dma, uint32_t *val) -{ - uint32_t get = *val; - - if (get >= dma->base && get <= (dma->base + (dma->max << 2))) { - *val = (get - dma->base) >> 2; - return 1; - } - - return 0; -} - -void -nouveau_dma_channel_init(struct nouveau_channel *chan) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - int i; - - nvchan->dma = &nvchan->dma_master; - nvchan->dma->base = nvchan->drm.put_base; - nvchan->dma->cur = nvchan->dma->put = 0; - nvchan->dma->max = (nvchan->drm.cmdbuf_size >> 2) - 2; - nvchan->dma->free = nvchan->dma->max - nvchan->dma->cur; - - RING_SPACE_CH(chan, RING_SKIPS); - for (i = 0; i < RING_SKIPS; i++) - OUT_RING_CH(chan, 0); -} - -#define CHECK_TIMEOUT() do { \ - if ((NOUVEAU_TIME_MSEC() - t_start) > NOUVEAU_DMA_TIMEOUT) \ - return - EBUSY; \ -} while(0) - -int -nouveau_dma_wait(struct nouveau_channel *chan, int size) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - uint32_t get, t_start; - - FIRE_RING_CH(chan); - - t_start = NOUVEAU_TIME_MSEC(); - while (dma->free < size) { - CHECK_TIMEOUT(); - - get = READ_GET(nvchan); - if (!LOCAL_GET(dma, &get)) - continue; - - if (dma->put >= get) { - dma->free = dma->max - dma->cur; - - if (dma->free < size) { -#ifdef NOUVEAU_DMA_DEBUG - dma->push_free = 1; -#endif - OUT_RING_CH(chan, 0x20000000 | dma->base); - if (get <= RING_SKIPS) { - /*corner case - will be idle*/ - if (dma->put <= RING_SKIPS) - WRITE_PUT(nvchan, - RING_SKIPS + 1); - - do { - CHECK_TIMEOUT(); - get = READ_GET(nvchan); - if (!LOCAL_GET(dma, &get)) - get = 0; - } while (get <= RING_SKIPS); - } - - WRITE_PUT(nvchan, RING_SKIPS); - dma->cur = dma->put = RING_SKIPS; - dma->free = get - (RING_SKIPS + 1); - } - } else { - dma->free = get - dma->cur - 1; - } - } - - return 0; -} - -#ifdef NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF -static void -nouveau_dma_parse_pushbuf(struct nouveau_channel *chan, int get, int put) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - unsigned mthd_count = 0; - - while (get != put) { - uint32_t gpuget = (get << 2) + nvchan->drm.put_base; - uint32_t data; - - if (get < 0 || get >= nvchan->drm.cmdbuf_size) { - NOUVEAU_ERR("DMA_PT 0x%08x\n", gpuget); - assert(0); - } - data = nvchan->pushbuf[get++]; - - if (mthd_count) { - NOUVEAU_MSG("0x%08x 0x%08x\n", gpuget, data); - mthd_count--; - continue; - } - - switch (data & 0x60000000) { - case 0x00000000: - mthd_count = (data >> 18) & 0x7ff; - NOUVEAU_MSG("0x%08x 0x%08x MTHD " - "Sc %d Mthd 0x%04x Size %d\n", - gpuget, data, (data>>13) & 7, data & 0x1ffc, - mthd_count); - break; - case 0x20000000: - get = (data & 0x1ffffffc) >> 2; - NOUVEAU_MSG("0x%08x 0x%08x JUMP 0x%08x\n", - gpuget, data, data & 0x1ffffffc); - continue; - case 0x40000000: - mthd_count = (data >> 18) & 0x7ff; - NOUVEAU_MSG("0x%08x 0x%08x NINC " - "Sc %d Mthd 0x%04x Size %d\n", - gpuget, data, (data>>13) & 7, data & 0x1ffc, - mthd_count); - break; - case 0x60000000: - /* DMA_OPCODE_CALL apparently, doesn't seem to work on - * my NV40 at least.. - */ - /* fall-through */ - default: - NOUVEAU_MSG("DMA_PUSHER 0x%08x 0x%08x\n", - gpuget, data); - assert(0); - } - } -} -#endif - -void -nouveau_dma_kickoff(struct nouveau_channel *chan) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - - if (dma->cur == dma->put) - return; - -#ifdef NOUVEAU_DMA_DEBUG - if (dma->push_free) { - NOUVEAU_ERR("Packet incomplete: %d left\n", dma->push_free); - return; - } -#endif - -#ifdef NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF - nouveau_dma_parse_pushbuf(chan, dma->put, dma->cur); -#endif - - WRITE_PUT(nvchan, dma->cur); -} diff --git a/src/gallium/winsys/dri/nouveau/nouveau_dma.h b/src/gallium/winsys/dri/nouveau/nouveau_dma.h deleted file mode 100644 index cfa6d26e828..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_dma.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_DMA_H__ -#define __NOUVEAU_DMA_H__ - -#include <string.h> -#include "nouveau_drmif.h" -#include "nouveau_local.h" - -#define RING_SKIPS 8 - -extern int nouveau_dma_wait(struct nouveau_channel *chan, int size); -extern void nouveau_dma_subc_bind(struct nouveau_grobj *); -extern void nouveau_dma_channel_init(struct nouveau_channel *); -extern void nouveau_dma_kickoff(struct nouveau_channel *); - -#ifdef NOUVEAU_DMA_DEBUG -static char faulty[1024]; -#endif - -static inline void -nouveau_dma_out(struct nouveau_channel *chan, uint32_t data) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - -#ifdef NOUVEAU_DMA_DEBUG - if (dma->push_free == 0) { - NOUVEAU_ERR("No space left in packet at %s\n", faulty); - return; - } - dma->push_free--; -#endif -#ifdef NOUVEAU_DMA_TRACE - { - uint32_t offset = (dma->cur << 2) + dma->base; - NOUVEAU_MSG("\tOUT_RING %d/0x%08x -> 0x%08x\n", - nvchan->drm.channel, offset, data); - } -#endif - nvchan->pushbuf[dma->cur + (dma->base - nvchan->drm.put_base)/4] = data; - dma->cur++; -} - -static inline void -nouveau_dma_outp(struct nouveau_channel *chan, uint32_t *ptr, int size) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - (void)dma; - -#ifdef NOUVEAU_DMA_DEBUG - if (dma->push_free < size) { - NOUVEAU_ERR("Packet too small. Free=%d, Need=%d\n", - dma->push_free, size); - return; - } -#endif -#ifdef NOUVEAU_DMA_TRACE - while (size--) { - nouveau_dma_out(chan, *ptr); - ptr++; - } -#else - memcpy(&nvchan->pushbuf[dma->cur], ptr, size << 2); -#ifdef NOUVEAU_DMA_DEBUG - dma->push_free -= size; -#endif - dma->cur += size; -#endif -} - -static inline void -nouveau_dma_space(struct nouveau_channel *chan, int size) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - - if (dma->free < size) { - if (nouveau_dma_wait(chan, size) && chan->hang_notify) - chan->hang_notify(chan); - } - dma->free -= size; -#ifdef NOUVEAU_DMA_DEBUG - dma->push_free = size; -#endif -} - -static inline void -nouveau_dma_begin(struct nouveau_channel *chan, struct nouveau_grobj *grobj, - int method, int size, const char* file, int line) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *dma = nvchan->dma; - (void)dma; - -#ifdef NOUVEAU_DMA_TRACE - NOUVEAU_MSG("BEGIN_RING %d/%08x/%d/0x%04x/%d\n", nvchan->drm.channel, - grobj->handle, grobj->subc, method, size); -#endif - -#ifdef NOUVEAU_DMA_DEBUG - if (dma->push_free) { - NOUVEAU_ERR("Previous packet incomplete: %d left at %s\n", - dma->push_free, faulty); - return; - } - sprintf(faulty,"%s:%d",file,line); -#endif - - nouveau_dma_space(chan, (size + 1)); - nouveau_dma_out(chan, (size << 18) | (grobj->subc << 13) | method); -} - -#define RING_SPACE_CH(ch,sz) nouveau_dma_space((ch), (sz)) -#define BEGIN_RING_CH(ch,gr,m,sz) nouveau_dma_begin((ch), (gr), (m), (sz), __FUNCTION__, __LINE__ ) -#define OUT_RING_CH(ch, data) nouveau_dma_out((ch), (data)) -#define OUT_RINGp_CH(ch,ptr,dwords) nouveau_dma_outp((ch), (void*)(ptr), \ - (dwords)) -#define FIRE_RING_CH(ch) nouveau_dma_kickoff((ch)) -#define WAIT_RING_CH(ch,sz) nouveau_dma_wait((ch), (sz)) - -#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_dri.h b/src/gallium/winsys/dri/nouveau/nouveau_dri.h deleted file mode 100644 index 1207c2d609c..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_dri.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _NOUVEAU_DRI_ -#define _NOUVEAU_DRI_ - -#include "xf86drm.h" -#include "drm.h" -#include "nouveau_drm.h" - -struct nouveau_dri { - uint32_t device_id; /**< \brief PCI device ID */ - uint32_t width; /**< \brief width in pixels of display */ - uint32_t height; /**< \brief height in scanlines of display */ - uint32_t depth; /**< \brief depth of display (8, 15, 16, 24) */ - uint32_t bpp; /**< \brief bit depth of display (8, 16, 24, 32) */ - - uint32_t bus_type; /**< \brief ths bus type */ - uint32_t bus_mode; /**< \brief bus mode (used for AGP, maybe also for PCI-E ?) */ - - uint32_t front_offset; /**< \brief front buffer offset */ - uint32_t front_pitch; /**< \brief front buffer pitch */ - uint32_t back_offset; /**< \brief private back buffer offset */ - uint32_t back_pitch; /**< \brief private back buffer pitch */ - uint32_t depth_offset; /**< \brief private depth buffer offset */ - uint32_t depth_pitch; /**< \brief private depth buffer pitch */ - -}; - -#endif - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_drmif.h b/src/gallium/winsys/dri/nouveau/nouveau_drmif.h deleted file mode 100644 index dcd6a5eb0a4..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_drmif.h +++ /dev/null @@ -1,310 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NOUVEAU_DRMIF_H__ -#define __NOUVEAU_DRMIF_H__ - -#include <stdint.h> -#include <xf86drm.h> -#include <nouveau_drm.h> - -#include "nouveau/nouveau_device.h" -#include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_grobj.h" -#include "nouveau/nouveau_notifier.h" -#include "nouveau/nouveau_bo.h" -#include "nouveau/nouveau_resource.h" -#include "nouveau/nouveau_pushbuf.h" - -struct nouveau_device_priv { - struct nouveau_device base; - - int fd; - drm_context_t ctx; - drmLock *lock; - int needs_close; - - struct drm_nouveau_mem_alloc sa; - void *sa_map; - struct nouveau_resource *sa_heap; -}; -#define nouveau_device(n) ((struct nouveau_device_priv *)(n)) - -extern int -nouveau_device_open_existing(struct nouveau_device **, int close, - int fd, drm_context_t ctx); - -extern int -nouveau_device_open(struct nouveau_device **, const char *busid); - -extern void -nouveau_device_close(struct nouveau_device **); - -extern int -nouveau_device_get_param(struct nouveau_device *, uint64_t param, uint64_t *v); - -extern int -nouveau_device_set_param(struct nouveau_device *, uint64_t param, uint64_t val); - -struct nouveau_fence { - struct nouveau_channel *channel; -}; - -struct nouveau_fence_cb { - struct nouveau_fence_cb *next; - void (*func)(void *); - void *priv; -}; - -struct nouveau_fence_priv { - struct nouveau_fence base; - int refcount; - - struct nouveau_fence *next; - struct nouveau_fence_cb *signal_cb; - - uint32_t sequence; - int emitted; - int signalled; -}; -#define nouveau_fence(n) ((struct nouveau_fence_priv *)(n)) - -extern int -nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **); - -extern int -nouveau_fence_ref(struct nouveau_fence *, struct nouveau_fence **); - -extern int -nouveau_fence_signal_cb(struct nouveau_fence *, void (*)(void *), void *); - -extern void -nouveau_fence_emit(struct nouveau_fence *); - -extern int -nouveau_fence_wait(struct nouveau_fence **); - -extern void -nouveau_fence_flush(struct nouveau_channel *); - -struct nouveau_pushbuf_reloc { - struct nouveau_pushbuf_bo *pbbo; - uint32_t *ptr; - uint32_t flags; - uint32_t data; - uint32_t vor; - uint32_t tor; -}; - -struct nouveau_pushbuf_bo { - struct nouveau_channel *channel; - struct nouveau_bo *bo; - unsigned flags; - unsigned handled; -}; - -#define NOUVEAU_PUSHBUF_MAX_BUFFERS 1024 -#define NOUVEAU_PUSHBUF_MAX_RELOCS 1024 -struct nouveau_pushbuf_priv { - struct nouveau_pushbuf base; - - struct nouveau_fence *fence; - - unsigned nop_jump; - unsigned start; - unsigned size; - - struct nouveau_pushbuf_bo *buffers; - unsigned nr_buffers; - struct nouveau_pushbuf_reloc *relocs; - unsigned nr_relocs; -}; -#define nouveau_pushbuf(n) ((struct nouveau_pushbuf_priv *)(n)) - -#define pbbo_to_ptr(o) ((uint64_t)(unsigned long)(o)) -#define ptr_to_pbbo(h) ((struct nouveau_pushbuf_bo *)(unsigned long)(h)) -#define pbrel_to_ptr(o) ((uint64_t)(unsigned long)(o)) -#define ptr_to_pbrel(h) ((struct nouveau_pushbuf_reloc *)(unsigned long)(h)) -#define bo_to_ptr(o) ((uint64_t)(unsigned long)(o)) -#define ptr_to_bo(h) ((struct nouveau_bo_priv *)(unsigned long)(h)) - -extern int -nouveau_pushbuf_init(struct nouveau_channel *); - -extern int -nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); - -extern int -nouveau_pushbuf_emit_reloc(struct nouveau_channel *, void *ptr, - struct nouveau_bo *, uint32_t data, uint32_t flags, - uint32_t vor, uint32_t tor); - -struct nouveau_dma_priv { - uint32_t base; - uint32_t max; - uint32_t cur; - uint32_t put; - uint32_t free; - - int push_free; -} dma; - -struct nouveau_channel_priv { - struct nouveau_channel base; - - struct drm_nouveau_channel_alloc drm; - - uint32_t *pushbuf; - void *notifier_block; - - volatile uint32_t *user; - volatile uint32_t *put; - volatile uint32_t *get; - volatile uint32_t *ref_cnt; - - struct nouveau_dma_priv dma_master; - struct nouveau_dma_priv dma_bufmgr; - struct nouveau_dma_priv *dma; - - struct nouveau_fence *fence_head; - struct nouveau_fence *fence_tail; - uint32_t fence_sequence; - - struct nouveau_pushbuf_priv pb; - - unsigned user_charge; -}; -#define nouveau_channel(n) ((struct nouveau_channel_priv *)(n)) - -extern int -nouveau_channel_alloc(struct nouveau_device *, uint32_t fb, uint32_t tt, - struct nouveau_channel **); - -extern void -nouveau_channel_free(struct nouveau_channel **); - -struct nouveau_grobj_priv { - struct nouveau_grobj base; -}; -#define nouveau_grobj(n) ((struct nouveau_grobj_priv *)(n)) - -extern int nouveau_grobj_alloc(struct nouveau_channel *, uint32_t handle, - int class, struct nouveau_grobj **); -extern int nouveau_grobj_ref(struct nouveau_channel *, uint32_t handle, - struct nouveau_grobj **); -extern void nouveau_grobj_free(struct nouveau_grobj **); - - -struct nouveau_notifier_priv { - struct nouveau_notifier base; - - struct drm_nouveau_notifierobj_alloc drm; - volatile void *map; -}; -#define nouveau_notifier(n) ((struct nouveau_notifier_priv *)(n)) - -extern int -nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle, int count, - struct nouveau_notifier **); - -extern void -nouveau_notifier_free(struct nouveau_notifier **); - -extern void -nouveau_notifier_reset(struct nouveau_notifier *, int id); - -extern uint32_t -nouveau_notifier_status(struct nouveau_notifier *, int id); - -extern uint32_t -nouveau_notifier_return_val(struct nouveau_notifier *, int id); - -extern int -nouveau_notifier_wait_status(struct nouveau_notifier *, int id, int status, - int timeout); - -struct nouveau_bo_priv { - struct nouveau_bo base; - - struct nouveau_pushbuf_bo *pending; - struct nouveau_fence *fence; - struct nouveau_fence *wr_fence; - - struct drm_nouveau_mem_alloc drm; - void *map; - - void *sysmem; - int user; - - int refcount; - - uint64_t offset; - uint64_t flags; - int tiled; -}; -#define nouveau_bo(n) ((struct nouveau_bo_priv *)(n)) - -extern int -nouveau_bo_init(struct nouveau_device *); - -extern void -nouveau_bo_takedown(struct nouveau_device *); - -extern int -nouveau_bo_new(struct nouveau_device *, uint32_t flags, int align, int size, - struct nouveau_bo **); - -extern int -nouveau_bo_user(struct nouveau_device *, void *ptr, int size, - struct nouveau_bo **); - -extern int -nouveau_bo_ref(struct nouveau_device *, uint64_t handle, struct nouveau_bo **); - -extern int -nouveau_bo_set_status(struct nouveau_bo *, uint32_t flags); - -extern void -nouveau_bo_del(struct nouveau_bo **); - -extern int -nouveau_bo_map(struct nouveau_bo *, uint32_t flags); - -extern void -nouveau_bo_unmap(struct nouveau_bo *); - -extern int -nouveau_bo_validate(struct nouveau_channel *, struct nouveau_bo *, - uint32_t flags); - -extern int -nouveau_resource_init(struct nouveau_resource **heap, unsigned start, - unsigned size); - -extern int -nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, - struct nouveau_resource **); - -extern void -nouveau_resource_free(struct nouveau_resource **); - -#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_fence.c b/src/gallium/winsys/dri/nouveau/nouveau_fence.c deleted file mode 100644 index e7b0b4ff079..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_fence.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> -#include <assert.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" -#include "nouveau_local.h" - -static void -nouveau_fence_del_unsignalled(struct nouveau_fence *fence) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(fence->channel); - struct nouveau_fence *le; - - if (nvchan->fence_head == fence) { - nvchan->fence_head = nouveau_fence(fence)->next; - if (nvchan->fence_head == NULL) - nvchan->fence_tail = NULL; - return; - } - - le = nvchan->fence_head; - while (le && nouveau_fence(le)->next != fence) - le = nouveau_fence(le)->next; - assert(le && nouveau_fence(le)->next == fence); - nouveau_fence(le)->next = nouveau_fence(fence)->next; - if (nvchan->fence_tail == fence) - nvchan->fence_tail = le; -} - -static void -nouveau_fence_del(struct nouveau_fence **fence) -{ - struct nouveau_fence_priv *nvfence; - - if (!fence || !*fence) - return; - nvfence = nouveau_fence(*fence); - *fence = NULL; - - if (--nvfence->refcount) - return; - - if (nvfence->emitted && !nvfence->signalled) { - if (nvfence->signal_cb) { - nvfence->refcount++; - nouveau_fence_wait((void *)&nvfence); - return; - } - - nouveau_fence_del_unsignalled(&nvfence->base); - } - free(nvfence); -} - -int -nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **fence) -{ - struct nouveau_fence_priv *nvfence; - - if (!chan || !fence || *fence) - return -EINVAL; - - nvfence = calloc(1, sizeof(struct nouveau_fence_priv)); - if (!nvfence) - return -ENOMEM; - nvfence->base.channel = chan; - nvfence->refcount = 1; - - *fence = &nvfence->base; - return 0; -} - -int -nouveau_fence_ref(struct nouveau_fence *ref, struct nouveau_fence **fence) -{ - struct nouveau_fence_priv *nvfence; - - if (!fence) - return -EINVAL; - - if (*fence) { - nouveau_fence_del(fence); - *fence = NULL; - } - - if (ref) { - nvfence = nouveau_fence(ref); - nvfence->refcount++; - *fence = &nvfence->base; - } - - return 0; -} - -int -nouveau_fence_signal_cb(struct nouveau_fence *fence, void (*func)(void *), - void *priv) -{ - struct nouveau_fence_priv *nvfence = nouveau_fence(fence); - struct nouveau_fence_cb *cb; - - if (!nvfence || !func) - return -EINVAL; - - cb = malloc(sizeof(struct nouveau_fence_cb)); - if (!cb) - return -ENOMEM; - - cb->func = func; - cb->priv = priv; - cb->next = nvfence->signal_cb; - nvfence->signal_cb = cb; - return 0; -} - -void -nouveau_fence_emit(struct nouveau_fence *fence) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(fence->channel); - struct nouveau_fence_priv *nvfence = nouveau_fence(fence); - - nvfence->emitted = 1; - nvfence->sequence = ++nvchan->fence_sequence; - if (nvfence->sequence == 0xffffffff) - NOUVEAU_ERR("AII wrap unhandled\n"); - - /*XXX: assumes subc 0 is populated */ - RING_SPACE_CH(fence->channel, 2); - OUT_RING_CH (fence->channel, 0x00040050); - OUT_RING_CH (fence->channel, nvfence->sequence); - - if (nvchan->fence_tail) { - nouveau_fence(nvchan->fence_tail)->next = fence; - } else { - nvchan->fence_head = fence; - } - nvchan->fence_tail = fence; -} - -void -nouveau_fence_flush(struct nouveau_channel *chan) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - uint32_t sequence = *nvchan->ref_cnt; - - while (nvchan->fence_head) { - struct nouveau_fence_priv *nvfence; - - nvfence = nouveau_fence(nvchan->fence_head); - if (nvfence->sequence > sequence) - break; - nouveau_fence_del_unsignalled(&nvfence->base); - nvfence->signalled = 1; - - if (nvfence->signal_cb) { - struct nouveau_fence *fence = NULL; - - nouveau_fence_ref(&nvfence->base, &fence); - - while (nvfence->signal_cb) { - struct nouveau_fence_cb *cb; - - cb = nvfence->signal_cb; - nvfence->signal_cb = cb->next; - cb->func(cb->priv); - free(cb); - } - - nouveau_fence_ref(NULL, &fence); - } - } -} - -int -nouveau_fence_wait(struct nouveau_fence **fence) -{ - struct nouveau_fence_priv *nvfence; - - if (!fence || !*fence) - return -EINVAL; - nvfence = nouveau_fence(*fence); - - if (nvfence->emitted) { - while (!nvfence->signalled) - nouveau_fence_flush(nvfence->base.channel); - } - nouveau_fence_ref(NULL, fence); - - return 0; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_grobj.c b/src/gallium/winsys/dri/nouveau/nouveau_grobj.c deleted file mode 100644 index 51523897d58..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_grobj.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> - -#include "nouveau_drmif.h" - -int -nouveau_grobj_alloc(struct nouveau_channel *chan, uint32_t handle, - int class, struct nouveau_grobj **grobj) -{ - struct nouveau_device_priv *nvdev = nouveau_device(chan->device); - struct nouveau_grobj_priv *nvgrobj; - struct drm_nouveau_grobj_alloc g; - int ret; - - if (!nvdev || !grobj || *grobj) - return -EINVAL; - - nvgrobj = calloc(1, sizeof(*nvgrobj)); - if (!nvgrobj) - return -ENOMEM; - nvgrobj->base.channel = chan; - nvgrobj->base.handle = handle; - nvgrobj->base.grclass = class; - - g.channel = chan->id; - g.handle = handle; - g.class = class; - ret = drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GROBJ_ALLOC, - &g, sizeof(g)); - if (ret) { - nouveau_grobj_free((void *)&nvgrobj); - return ret; - } - - *grobj = &nvgrobj->base; - return 0; -} - -int -nouveau_grobj_ref(struct nouveau_channel *chan, uint32_t handle, - struct nouveau_grobj **grobj) -{ - struct nouveau_grobj_priv *nvgrobj; - - if (!chan || !grobj || *grobj) - return -EINVAL; - - nvgrobj = calloc(1, sizeof(struct nouveau_grobj_priv)); - if (!nvgrobj) - return -ENOMEM; - nvgrobj->base.channel = chan; - nvgrobj->base.handle = handle; - nvgrobj->base.grclass = 0; - - *grobj = &nvgrobj->base; - return 0; -} - -void -nouveau_grobj_free(struct nouveau_grobj **grobj) -{ - struct nouveau_device_priv *nvdev; - struct nouveau_channel_priv *chan; - struct nouveau_grobj_priv *nvgrobj; - - if (!grobj || !*grobj) - return; - nvgrobj = nouveau_grobj(*grobj); - *grobj = NULL; - - - chan = nouveau_channel(nvgrobj->base.channel); - nvdev = nouveau_device(chan->base.device); - - if (nvgrobj->base.grclass) { - struct drm_nouveau_gpuobj_free f; - - f.channel = chan->drm.channel; - f.handle = nvgrobj->base.handle; - drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, - &f, sizeof(f)); - } - free(nvgrobj); -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_local.h b/src/gallium/winsys/dri/nouveau/nouveau_local.h deleted file mode 100644 index e878a408037..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_local.h +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef __NOUVEAU_LOCAL_H__ -#define __NOUVEAU_LOCAL_H__ - -#include "pipe/p_compiler.h" -#include "nouveau_winsys_pipe.h" -#include <stdio.h> - -struct pipe_buffer; - -/* Debug output */ -#define NOUVEAU_MSG(fmt, args...) do { \ - fprintf(stdout, "nouveau: "fmt, ##args); \ - fflush(stdout); \ -} while(0) - -#define NOUVEAU_ERR(fmt, args...) do { \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); \ - fflush(stderr); \ -} while(0) - -#define NOUVEAU_TIME_MSEC() 0 - -/* User FIFO control */ -//#define NOUVEAU_DMA_TRACE -//#define NOUVEAU_DMA_DEBUG -//#define NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF -#define NOUVEAU_DMA_BARRIER -#define NOUVEAU_DMA_TIMEOUT 2000 - -/* Push buffer access macros */ -static INLINE void -OUT_RING(struct nouveau_channel *chan, unsigned data) -{ - *(chan->pushbuf->cur++) = (data); -} - -static INLINE void -OUT_RINGp(struct nouveau_channel *chan, uint32_t *data, unsigned size) -{ - memcpy(chan->pushbuf->cur, data, size * 4); - chan->pushbuf->cur += size; -} - -static INLINE void -OUT_RINGf(struct nouveau_channel *chan, float f) -{ - union { uint32_t i; float f; } c; - c.f = f; - OUT_RING(chan, c.i); -} - -static INLINE void -BEGIN_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, - unsigned mthd, unsigned size) -{ - if (chan->pushbuf->remaining < (size + 1)) - nouveau_pushbuf_flush(chan, (size + 1)); - OUT_RING(chan, (gr->subc << 13) | (size << 18) | mthd); - chan->pushbuf->remaining -= (size + 1); -} - -static INLINE void -FIRE_RING(struct nouveau_channel *chan) -{ - nouveau_pushbuf_flush(chan, 0); -} - -static INLINE void -BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned subc) -{ - gr->subc = subc; - BEGIN_RING(chan, gr, 0x0000, 1); - OUT_RING (chan, gr->handle); -} - -static INLINE void -OUT_RELOC(struct nouveau_channel *chan, struct nouveau_bo *bo, - unsigned data, unsigned flags, unsigned vor, unsigned tor) -{ - nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, bo, - data, flags, vor, tor); -} - -/* Raw data + flags depending on FB/TT buffer */ -static INLINE void -OUT_RELOCd(struct nouveau_channel *chan, struct nouveau_bo *bo, - unsigned data, unsigned flags, unsigned vor, unsigned tor) -{ - OUT_RELOC(chan, bo, data, flags | NOUVEAU_BO_OR, vor, tor); -} - -/* FB/TT object handle */ -static INLINE void -OUT_RELOCo(struct nouveau_channel *chan, struct nouveau_bo *bo, - unsigned flags) -{ - OUT_RELOC(chan, bo, 0, flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); -} - -/* Low 32-bits of offset */ -static INLINE void -OUT_RELOCl(struct nouveau_channel *chan, struct nouveau_bo *bo, - unsigned delta, unsigned flags) -{ - OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_LOW, 0, 0); -} - -/* High 32-bits of offset */ -static INLINE void -OUT_RELOCh(struct nouveau_channel *chan, struct nouveau_bo *bo, - unsigned delta, unsigned flags) -{ - OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_HIGH, 0, 0); -} - -#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_lock.c b/src/gallium/winsys/dri/nouveau/nouveau_lock.c deleted file mode 100644 index 9adb9ac8547..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_lock.c +++ /dev/null @@ -1,94 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "glapi/glthread.h" -#include <GL/internal/glcore.h> - -#include "nouveau_context.h" -#include "nouveau_screen.h" - -_glthread_DECLARE_STATIC_MUTEX( lockMutex ); - -static void -nouveau_contended_lock(struct nouveau_context *nv, GLuint flags) -{ - __DRIdrawablePrivate *dPriv = nv->dri_drawable; - __DRIscreenPrivate *sPriv = nv->dri_screen; - struct nouveau_screen *nv_screen = nv->nv_screen; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - drmGetLock(nvdev->fd, nvdev->ctx, flags); - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: - */ - if (dPriv) - DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); -} - -/* Lock the hardware and validate our state. - */ -void -LOCK_HARDWARE(struct nouveau_context *nv) -{ - struct nouveau_screen *nv_screen = nv->nv_screen; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - char __ret=0; - - _glthread_LOCK_MUTEX(lockMutex); - assert(!nv->locked); - - DRM_CAS(nvdev->lock, nvdev->ctx, - (DRM_LOCK_HELD | nvdev->ctx), __ret); - - if (__ret) - nouveau_contended_lock(nv, 0); - nv->locked = GL_TRUE; -} - - - /* Unlock the hardware using the global current context - */ -void -UNLOCK_HARDWARE(struct nouveau_context *nv) -{ - struct nouveau_screen *nv_screen = nv->nv_screen; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - assert(nv->locked); - nv->locked = GL_FALSE; - - DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); - - _glthread_UNLOCK_MUTEX(lockMutex); -} diff --git a/src/gallium/winsys/dri/nouveau/nouveau_notifier.c b/src/gallium/winsys/dri/nouveau/nouveau_notifier.c deleted file mode 100644 index 01e8f38440e..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_notifier.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> - -#include "nouveau_drmif.h" -#include "nouveau_local.h" - -#define NOTIFIER(__v) \ - struct nouveau_notifier_priv *nvnotify = nouveau_notifier(notifier); \ - volatile uint32_t *__v = (void*)nvnotify->map + (id * 32) - -int -nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle, - int count, struct nouveau_notifier **notifier) -{ - struct nouveau_notifier_priv *nvnotify; - int ret; - - if (!chan || !notifier || *notifier) - return -EINVAL; - - nvnotify = calloc(1, sizeof(struct nouveau_notifier_priv)); - if (!nvnotify) - return -ENOMEM; - nvnotify->base.channel = chan; - nvnotify->base.handle = handle; - - nvnotify->drm.channel = chan->id; - nvnotify->drm.handle = handle; - nvnotify->drm.count = count; - if ((ret = drmCommandWriteRead(nouveau_device(chan->device)->fd, - DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, - &nvnotify->drm, - sizeof(nvnotify->drm)))) { - nouveau_notifier_free((void *)&nvnotify); - return ret; - } - - nvnotify->map = (void *)nouveau_channel(chan)->notifier_block + - nvnotify->drm.offset; - *notifier = &nvnotify->base; - return 0; -} - -void -nouveau_notifier_free(struct nouveau_notifier **notifier) -{ - - struct nouveau_notifier_priv *nvnotify; - struct nouveau_channel_priv *nvchan; - struct nouveau_device_priv *nvdev; - struct drm_nouveau_gpuobj_free f; - - if (!notifier || !*notifier) - return; - nvnotify = nouveau_notifier(*notifier); - *notifier = NULL; - - nvchan = nouveau_channel(nvnotify->base.channel); - nvdev = nouveau_device(nvchan->base.device); - - f.channel = nvchan->drm.channel; - f.handle = nvnotify->base.handle; - drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, &f, sizeof(f)); - free(nvnotify); -} - -void -nouveau_notifier_reset(struct nouveau_notifier *notifier, int id) -{ - NOTIFIER(n); - - n[NV_NOTIFY_TIME_0 /4] = 0x00000000; - n[NV_NOTIFY_TIME_1 /4] = 0x00000000; - n[NV_NOTIFY_RETURN_VALUE/4] = 0x00000000; - n[NV_NOTIFY_STATE /4] = (NV_NOTIFY_STATE_STATUS_IN_PROCESS << - NV_NOTIFY_STATE_STATUS_SHIFT); -} - -uint32_t -nouveau_notifier_status(struct nouveau_notifier *notifier, int id) -{ - NOTIFIER(n); - - return n[NV_NOTIFY_STATE/4] >> NV_NOTIFY_STATE_STATUS_SHIFT; -} - -uint32_t -nouveau_notifier_return_val(struct nouveau_notifier *notifier, int id) -{ - NOTIFIER(n); - - return n[NV_NOTIFY_RETURN_VALUE/4]; -} - -int -nouveau_notifier_wait_status(struct nouveau_notifier *notifier, int id, - int status, int timeout) -{ - NOTIFIER(n); - uint32_t time = 0, t_start = NOUVEAU_TIME_MSEC(); - - while (time <= timeout) { - uint32_t v; - - v = n[NV_NOTIFY_STATE/4] >> NV_NOTIFY_STATE_STATUS_SHIFT; - if (v == status) - return 0; - - if (timeout) - time = NOUVEAU_TIME_MSEC() - t_start; - } - - return -EBUSY; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c b/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c deleted file mode 100644 index 815046ba85f..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_pushbuf.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> -#include <assert.h> - -#include "nouveau_drmif.h" -#include "nouveau_dma.h" - -#define PB_BUFMGR_DWORDS (4096 / 2) -#define PB_MIN_USER_DWORDS 2048 - -static int -nouveau_pushbuf_space(struct nouveau_channel *chan, unsigned min) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; - - assert((min + 1) <= nvchan->dma->max); - - /* Wait for enough space in push buffer */ - min = min < PB_MIN_USER_DWORDS ? PB_MIN_USER_DWORDS : min; - min += 1; /* a bit extra for the NOP */ - if (nvchan->dma->free < min) - WAIT_RING_CH(chan, min); - - /* Insert NOP, may turn into a jump later */ - RING_SPACE_CH(chan, 1); - nvpb->nop_jump = nvchan->dma->cur; - OUT_RING_CH(chan, 0); - - /* Any remaining space is available to the user */ - nvpb->start = nvchan->dma->cur; - nvpb->size = nvchan->dma->free; - nvpb->base.channel = chan; - nvpb->base.remaining = nvpb->size; - nvpb->base.cur = &nvchan->pushbuf[nvpb->start]; - - /* Create a new fence object for this "frame" */ - nouveau_fence_ref(NULL, &nvpb->fence); - nouveau_fence_new(chan, &nvpb->fence); - - return 0; -} - -int -nouveau_pushbuf_init(struct nouveau_channel *chan) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_dma_priv *m = &nvchan->dma_master; - struct nouveau_dma_priv *b = &nvchan->dma_bufmgr; - int i; - - if (!nvchan) - return -EINVAL; - - /* Reassign last bit of push buffer for a "separate" bufmgr - * ring buffer - */ - m->max -= PB_BUFMGR_DWORDS; - m->free -= PB_BUFMGR_DWORDS; - - b->base = m->base + ((m->max + 2) << 2); - b->max = PB_BUFMGR_DWORDS - 2; - b->cur = b->put = 0; - b->free = b->max - b->cur; - - /* Some NOPs just to be safe - *XXX: RING_SKIPS - */ - nvchan->dma = b; - RING_SPACE_CH(chan, 8); - for (i = 0; i < 8; i++) - OUT_RING_CH(chan, 0); - nvchan->dma = m; - - nouveau_pushbuf_space(chan, 0); - chan->pushbuf = &nvchan->pb.base; - - nvchan->pb.buffers = calloc(NOUVEAU_PUSHBUF_MAX_BUFFERS, - sizeof(struct nouveau_pushbuf_bo)); - nvchan->pb.relocs = calloc(NOUVEAU_PUSHBUF_MAX_RELOCS, - sizeof(struct nouveau_pushbuf_reloc)); - return 0; -} - -static uint32_t -nouveau_pushbuf_calc_reloc(struct nouveau_bo *bo, - struct nouveau_pushbuf_reloc *r) -{ - uint32_t push; - - if (r->flags & NOUVEAU_BO_LOW) { - push = bo->offset + r->data; - } else - if (r->flags & NOUVEAU_BO_HIGH) { - push = (bo->offset + r->data) >> 32; - } else { - push = r->data; - } - - if (r->flags & NOUVEAU_BO_OR) { - if (bo->flags & NOUVEAU_BO_VRAM) - push |= r->vor; - else - push |= r->tor; - } - - return push; -} - -/* This would be our TTM "superioctl" */ -int -nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min) -{ - struct nouveau_channel_priv *nvchan = nouveau_channel(chan); - struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; - int ret, i; - - if (nvpb->base.remaining == nvpb->size) - return 0; - - nouveau_fence_flush(chan); - - nvpb->size -= nvpb->base.remaining; - nvchan->dma->cur += nvpb->size; - nvchan->dma->free -= nvpb->size; - assert(nvchan->dma->cur <= nvchan->dma->max); - - nvchan->dma = &nvchan->dma_bufmgr; - nvchan->pushbuf[nvpb->nop_jump] = 0x20000000 | - (nvchan->dma->base + (nvchan->dma->cur << 2)); - - /* Validate buffers + apply relocations */ - nvchan->user_charge = 0; - for (i = 0; i < nvpb->nr_relocs; i++) { - struct nouveau_pushbuf_reloc *r = &nvpb->relocs[i]; - struct nouveau_pushbuf_bo *pbbo = r->pbbo; - struct nouveau_bo *bo = pbbo->bo; - - /* Validated, mem matches presumed, no relocation necessary */ - if (pbbo->handled & 2) { - if (!(pbbo->handled & 1)) - assert(0); - continue; - } - - /* Not yet validated, do it now */ - if (!(pbbo->handled & 1)) { - ret = nouveau_bo_validate(chan, bo, pbbo->flags); - if (ret) { - assert(0); - return ret; - } - pbbo->handled |= 1; - - if (bo->offset == nouveau_bo(bo)->offset && - bo->flags == nouveau_bo(bo)->flags) { - pbbo->handled |= 2; - continue; - } - bo->offset = nouveau_bo(bo)->offset; - bo->flags = nouveau_bo(bo)->flags; - } - - /* Apply the relocation */ - *r->ptr = nouveau_pushbuf_calc_reloc(bo, r); - } - nvpb->nr_relocs = 0; - - /* Dereference all buffers on validate list */ - for (i = 0; i < nvpb->nr_buffers; i++) { - struct nouveau_pushbuf_bo *pbbo = &nvpb->buffers[i]; - - nouveau_bo(pbbo->bo)->pending = NULL; - nouveau_bo_del(&pbbo->bo); - } - nvpb->nr_buffers = 0; - - /* Switch back to user's ring */ - RING_SPACE_CH(chan, 1); - OUT_RING_CH(chan, 0x20000000 | ((nvpb->start << 2) + - nvchan->dma_master.base)); - nvchan->dma = &nvchan->dma_master; - - /* Fence + kickoff */ - nouveau_fence_emit(nvpb->fence); - FIRE_RING_CH(chan); - - /* Allocate space for next push buffer */ - ret = nouveau_pushbuf_space(chan, min); - assert(!ret); - - return 0; -} - -static struct nouveau_pushbuf_bo * -nouveau_pushbuf_emit_buffer(struct nouveau_channel *chan, struct nouveau_bo *bo) -{ - struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(chan->pushbuf); - struct nouveau_bo_priv *nvbo = nouveau_bo(bo); - struct nouveau_pushbuf_bo *pbbo; - - if (nvbo->pending) - return nvbo->pending; - - if (nvpb->nr_buffers >= NOUVEAU_PUSHBUF_MAX_BUFFERS) - return NULL; - pbbo = nvpb->buffers + nvpb->nr_buffers++; - nvbo->pending = pbbo; - - nouveau_bo_ref(bo->device, bo->handle, &pbbo->bo); - pbbo->channel = chan; - pbbo->flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART; - pbbo->handled = 0; - return pbbo; -} - -int -nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr, - struct nouveau_bo *bo, uint32_t data, uint32_t flags, - uint32_t vor, uint32_t tor) -{ - struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(chan->pushbuf); - struct nouveau_pushbuf_bo *pbbo; - struct nouveau_pushbuf_reloc *r; - - if (nvpb->nr_relocs >= NOUVEAU_PUSHBUF_MAX_RELOCS) - return -ENOMEM; - - pbbo = nouveau_pushbuf_emit_buffer(chan, bo); - if (!pbbo) - return -ENOMEM; - pbbo->flags |= (flags & NOUVEAU_BO_RDWR); - pbbo->flags &= (flags | NOUVEAU_BO_RDWR); - - r = nvpb->relocs + nvpb->nr_relocs++; - r->pbbo = pbbo; - r->ptr = ptr; - r->flags = flags; - r->data = data; - r->vor = vor; - r->tor = tor; - - if (flags & NOUVEAU_BO_DUMMY) - *(uint32_t *)ptr = 0; - else - *(uint32_t *)ptr = nouveau_pushbuf_calc_reloc(bo, r); - return 0; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_resource.c b/src/gallium/winsys/dri/nouveau/nouveau_resource.c deleted file mode 100644 index 3bbcb5c45e0..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_resource.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright 2007 Nouveau Project - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF - * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <errno.h> - -#include "nouveau_drmif.h" -#include "nouveau_local.h" - -int -nouveau_resource_init(struct nouveau_resource **heap, - unsigned start, unsigned size) -{ - struct nouveau_resource *r; - - r = calloc(1, sizeof(struct nouveau_resource)); - if (!r) - return 1; - - r->start = start; - r->size = size; - *heap = r; - return 0; -} - -int -nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, - struct nouveau_resource **res) -{ - struct nouveau_resource *r; - - if (!heap || !size || !res || *res) - return 1; - - while (heap) { - if (!heap->in_use && heap->size >= size) { - r = calloc(1, sizeof(struct nouveau_resource)); - if (!r) - return 1; - - r->start = (heap->start + heap->size) - size; - r->size = size; - r->in_use = 1; - r->priv = priv; - - heap->size -= size; - - r->next = heap->next; - if (heap->next) - heap->next->prev = r; - r->prev = heap; - heap->next = r; - - *res = r; - return 0; - } - - heap = heap->next; - } - - return 1; -} - -void -nouveau_resource_free(struct nouveau_resource **res) -{ - struct nouveau_resource *r; - - if (!res || !*res) - return; - r = *res; - *res = NULL; - - r->in_use = 0; - - if (r->next && !r->next->in_use) { - struct nouveau_resource *new = r->next; - - new->prev = r->prev; - if (r->prev) - r->prev->next = new; - new->size += r->size; - new->start = r->start; - - free(r); - r = new; - } - - if (r->prev && !r->prev->in_use) { - r->prev->next = r->next; - if (r->next) - r->next->prev = r->prev; - r->prev->size += r->size; - free(r); - } - -} diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.c b/src/gallium/winsys/dri/nouveau/nouveau_screen.c deleted file mode 100644 index df1fe7e69b4..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.c +++ /dev/null @@ -1,310 +0,0 @@ -#include "utils.h" -#include "vblank.h" -#include "xmlpool.h" - -#include "pipe/p_context.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_cb_fbo.h" - -#include "nouveau_context.h" -#include "nouveau_drm.h" -#include "nouveau_dri.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" - -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 11 -#error nouveau_drm.h version does not match expected version -#endif - -/* Extension stuff, enabling of extensions handled by Gallium's GL state - * tracker. But, we still need to define the entry points we want. - */ -#define need_GL_ARB_fragment_program -#define need_GL_ARB_multisample -#define need_GL_ARB_occlusion_query -#define need_GL_ARB_point_parameters -#define need_GL_ARB_shader_objects -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_program -#define need_GL_ARB_vertex_shader -#define need_GL_ARB_vertex_buffer_object -#define need_GL_EXT_compiled_vertex_array -#define need_GL_EXT_fog_coord -#define need_GL_EXT_secondary_color -#define need_GL_EXT_framebuffer_object -#define need_GL_VERSION_2_0 -#define need_GL_VERSION_2_1 -#include "extension_helper.h" - -const struct dri_extension card_extensions[] = -{ - { "GL_ARB_multisample", GL_ARB_multisample_functions }, - { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, - { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, - { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, - { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, - { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, - { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, - { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, - { "GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions }, - { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, - { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, - { NULL, 0 } -}; - -PUBLIC const char __driConfigOptions[] = -DRI_CONF_BEGIN -DRI_CONF_END; -static const GLuint __driNConfigOptions = 0; - -extern const struct dri_extension common_extensions[]; -extern const struct dri_extension nv40_extensions[]; - -static GLboolean -nouveau_screen_create(__DRIscreenPrivate *driScrnPriv) -{ - struct nouveau_dri *nv_dri = driScrnPriv->pDevPriv; - struct nouveau_screen *nv_screen; - int ret; - - if (driScrnPriv->devPrivSize != sizeof(struct nouveau_dri)) { - NOUVEAU_ERR("DRI struct mismatch between DDX/DRI\n"); - return GL_FALSE; - } - - nv_screen = CALLOC_STRUCT(nouveau_screen); - if (!nv_screen) - return GL_FALSE; - nv_screen->driScrnPriv = driScrnPriv; - driScrnPriv->private = (void *)nv_screen; - - driParseOptionInfo(&nv_screen->option_cache, - __driConfigOptions, __driNConfigOptions); - - if ((ret = nouveau_device_open_existing(&nv_screen->device, 0, - driScrnPriv->fd, 0))) { - NOUVEAU_ERR("Failed opening nouveau device: %d\n", ret); - return GL_FALSE; - } - - nv_screen->front_offset = nv_dri->front_offset; - nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); - nv_screen->front_cpp = nv_dri->bpp / 8; - nv_screen->front_height = nv_dri->height; - - return GL_TRUE; -} - -static void -nouveau_screen_destroy(__DRIscreenPrivate *driScrnPriv) -{ - struct nouveau_screen *nv_screen = driScrnPriv->private; - - driScrnPriv->private = NULL; - FREE(nv_screen); -} - -static GLboolean -nouveau_create_buffer(__DRIscreenPrivate * driScrnPriv, - __DRIdrawablePrivate * driDrawPriv, - const __GLcontextModes *glVis, GLboolean pixmapBuffer) -{ - struct nouveau_framebuffer *nvfb; - enum pipe_format colour, depth, stencil; - - if (pixmapBuffer) - return GL_FALSE; - - nvfb = CALLOC_STRUCT(nouveau_framebuffer); - if (!nvfb) - return GL_FALSE; - - if (glVis->redBits == 5) - colour = PIPE_FORMAT_R5G6B5_UNORM; - else - colour = PIPE_FORMAT_A8R8G8B8_UNORM; - - if (glVis->depthBits == 16) - depth = PIPE_FORMAT_Z16_UNORM; - else if (glVis->depthBits == 24) - depth = PIPE_FORMAT_Z24S8_UNORM; - else - depth = PIPE_FORMAT_NONE; - - if (glVis->stencilBits == 8) - stencil = PIPE_FORMAT_Z24S8_UNORM; - else - stencil = PIPE_FORMAT_NONE; - - nvfb->stfb = st_create_framebuffer(glVis, colour, depth, stencil, - driDrawPriv->w, driDrawPriv->h, - (void*)nvfb); - if (!nvfb->stfb) { - free(nvfb); - return GL_FALSE; - } - - driDrawPriv->driverPrivate = (void *)nvfb; - return GL_TRUE; -} - -static void -nouveau_destroy_buffer(__DRIdrawablePrivate * driDrawPriv) -{ - struct nouveau_framebuffer *nvfb; - - nvfb = (struct nouveau_framebuffer *)driDrawPriv->driverPrivate; - st_unreference_framebuffer(&nvfb->stfb); - free(nvfb); -} - -static struct __DriverAPIRec -nouveau_api = { - .InitDriver = nouveau_screen_create, - .DestroyScreen = nouveau_screen_destroy, - .CreateContext = nouveau_context_create, - .DestroyContext = nouveau_context_destroy, - .CreateBuffer = nouveau_create_buffer, - .DestroyBuffer = nouveau_destroy_buffer, - .SwapBuffers = nouveau_swap_buffers, - .MakeCurrent = nouveau_context_bind, - .UnbindContext = nouveau_context_unbind, - .GetSwapInfo = NULL, - .GetMSC = NULL, - .WaitForMSC = NULL, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = nouveau_copy_sub_buffer, - .setTexOffset = NULL -}; - -static __GLcontextModes * -nouveau_fill_in_modes(unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer) -{ - __GLcontextModes * modes; - __GLcontextModes * m; - unsigned num_modes; - unsigned depth_buffer_factor; - unsigned back_buffer_factor; - int i; - - static const struct { - GLenum format; - GLenum type; - } fb_format_array[] = { - { GL_RGB , GL_UNSIGNED_SHORT_5_6_5 }, - { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV }, - { GL_BGR , GL_UNSIGNED_INT_8_8_8_8_REV }, - }; - - /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't - * support pageflipping at all. - */ - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML - }; - - uint8_t depth_bits_array[4] = { 0, 16, 24, 24 }; - uint8_t stencil_bits_array[4] = { 0, 0, 0, 8 }; - uint8_t msaa_samples_array[1] = { 0 }; - - depth_buffer_factor = 4; - back_buffer_factor = (have_back_buffer) ? 3 : 1; - - num_modes = ((pixel_bits==16) ? 1 : 2) * - depth_buffer_factor * back_buffer_factor * 4; - modes = (*dri_interface->createContextModes)(num_modes, - sizeof(__GLcontextModes)); - m = modes; - - for (i=((pixel_bits==16)?0:1);i<((pixel_bits==16)?1:3);i++) { - if (!driFillInModes(&m, fb_format_array[i].format, - fb_format_array[i].type, - depth_bits_array, - stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, - back_buffer_factor, - msaa_samples_array, 1, - GLX_TRUE_COLOR)) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - - if (!driFillInModes(&m, fb_format_array[i].format, - fb_format_array[i].type, - depth_bits_array, - stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, - back_buffer_factor, - msaa_samples_array, 1, - GLX_DIRECT_COLOR)) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - } - - return modes; -} -PUBLIC void * -__driCreateNewScreen_20050727(__DRInativeDisplay *dpy, int scrn, - __DRIscreen *psc, const __GLcontextModes * modes, - const __DRIversion * ddx_version, - const __DRIversion * dri_version, - const __DRIversion * drm_version, - const __DRIframebuffer * frame_buffer, - void * pSAREA, int fd, int internal_api_version, - const __DRIinterfaceMethods * interface, - __GLcontextModes ** driver_modes) -{ - __DRIscreenPrivate *psp; - static const __DRIversion ddx_expected = - { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; - static const __DRIversion dri_expected = { 4, 0, 0 }; - static const __DRIversion drm_expected = - { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; - struct nouveau_dri *nv_dri = NULL; - - dri_interface = interface; - - if (!driCheckDriDdxDrmVersions2("nouveau", - dri_version, &dri_expected, - ddx_version, &ddx_expected, - drm_version, &drm_expected)) { - return NULL; - } - - if (drm_expected.patch != drm_version->patch) { - fprintf(stderr, "Incompatible DRM patch level.\n" - "Expected: %d\n" "Current : %d\n", - drm_expected.patch, drm_version->patch); - return NULL; - } - - psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, - ddx_version, dri_version, drm_version, - frame_buffer, pSAREA, fd, - internal_api_version, - &nouveau_api); - if (psp == NULL) - return NULL; - nv_dri = psp->pDevPriv; - - *driver_modes = nouveau_fill_in_modes(nv_dri->bpp, - (nv_dri->bpp == 16) ? 16 : 24, - (nv_dri->bpp == 16) ? 0 : 8, - 1); - - driInitExtensions(NULL, card_extensions, GL_FALSE); - - return (void *)psp; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_screen.h b/src/gallium/winsys/dri/nouveau/nouveau_screen.h deleted file mode 100644 index 388d6be9bbc..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_screen.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef __NOUVEAU_SCREEN_H__ -#define __NOUVEAU_SCREEN_H__ - -#include "xmlconfig.h" - -struct nouveau_screen { - __DRIscreenPrivate *driScrnPriv; - driOptionCache option_cache; - - struct nouveau_device *device; - - uint32_t front_offset; - uint32_t front_pitch; - uint32_t front_cpp; - uint32_t front_height; - - void *nvc; -}; - -#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c deleted file mode 100644 index 70e0104e83b..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.c +++ /dev/null @@ -1,86 +0,0 @@ -#include "main/glheader.h" -#include "glapi/glthread.h" -#include <GL/internal/glcore.h> - -#include "pipe/p_context.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_context.h" -#include "state_tracker/st_cb_fbo.h" - -#include "nouveau_context.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" - -void -nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, - const drm_clip_rect_t *rect) -{ - struct nouveau_context *nv = dPriv->driContextPriv->driverPrivate; - drm_clip_rect_t *pbox; - int nbox, i; - - LOCK_HARDWARE(nv); - if (!dPriv->numClipRects) { - UNLOCK_HARDWARE(nv); - return; - } - pbox = dPriv->pClipRects; - nbox = dPriv->numClipRects; - - nv->surface_copy_prep(nv, nv->frontbuffer, surf); - for (i = 0; i < nbox; i++, pbox++) { - int sx, sy, dx, dy, w, h; - - sx = pbox->x1 - dPriv->x; - sy = pbox->y1 - dPriv->y; - dx = pbox->x1; - dy = pbox->y1; - w = pbox->x2 - pbox->x1; - h = pbox->y2 - pbox->y1; - - nv->surface_copy(nv, dx, dy, sx, sy, w, h); - } - - FIRE_RING(nv->nvc->channel); - UNLOCK_HARDWARE(nv); - - if (nv->last_stamp != dPriv->lastStamp) { - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - st_resize_framebuffer(nvfb->stfb, dPriv->w, dPriv->h); - nv->last_stamp = dPriv->lastStamp; - } -} - -void -nouveau_copy_sub_buffer(__DRIdrawablePrivate *dPriv, int x, int y, int w, int h) -{ - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - struct pipe_surface *surf; - - surf = st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT); - if (surf) { - drm_clip_rect_t rect; - rect.x1 = x; - rect.y1 = y; - rect.x2 = x + w; - rect.y2 = y + h; - - st_notify_swapbuffers(nvfb->stfb); - nouveau_copy_buffer(dPriv, surf, &rect); - } -} - -void -nouveau_swap_buffers(__DRIdrawablePrivate *dPriv) -{ - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - struct pipe_surface *surf; - - surf = st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT); - if (surf) { - st_notify_swapbuffers(nvfb->stfb); - nouveau_copy_buffer(dPriv, surf, NULL); - } -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.h b/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.h deleted file mode 100644 index 825d3da6da5..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_swapbuffers.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __NOUVEAU_SWAPBUFFERS_H__ -#define __NOUVEAU_SWAPBUFFERS_H__ - -extern void nouveau_copy_buffer(__DRIdrawablePrivate *, struct pipe_surface *, - const drm_clip_rect_t *); -extern void nouveau_copy_sub_buffer(__DRIdrawablePrivate *, - int x, int y, int w, int h); -extern void nouveau_swap_buffers(__DRIdrawablePrivate *); - -#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys.c deleted file mode 100644 index 5eabbc88939..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys.c +++ /dev/null @@ -1,158 +0,0 @@ -#include "pipe/p_util.h" - -#include "nouveau_context.h" -#include "nouveau_screen.h" -#include "nouveau_winsys_pipe.h" - -#include "nouveau/nouveau_winsys.h" - -static int -nouveau_pipe_notifier_alloc(struct nouveau_winsys *nvws, int count, - struct nouveau_notifier **notify) -{ - struct nouveau_context *nv = nvws->nv; - - return nouveau_notifier_alloc(nv->nvc->channel, nv->nvc->next_handle++, - count, notify); -} - -static int -nouveau_pipe_grobj_alloc(struct nouveau_winsys *nvws, int grclass, - struct nouveau_grobj **grobj) -{ - struct nouveau_context *nv = nvws->nv; - struct nouveau_channel *chan = nv->nvc->channel; - int ret; - - ret = nouveau_grobj_alloc(chan, nv->nvc->next_handle++, - grclass, grobj); - if (ret) - return ret; - - assert(nv->nvc->next_subchannel < 7); - BIND_RING(chan, *grobj, nv->nvc->next_subchannel++); - return 0; -} - -static int -nouveau_pipe_surface_copy(struct nouveau_winsys *nvws, struct pipe_surface *dst, - unsigned dx, unsigned dy, struct pipe_surface *src, - unsigned sx, unsigned sy, unsigned w, unsigned h) -{ - struct nouveau_context *nv = nvws->nv; - - if (nv->surface_copy_prep(nv, dst, src)) - return 1; - nv->surface_copy(nv, dx, dy, sx, sy, w, h); - nv->surface_copy_done(nv); - - return 0; -} - -static int -nouveau_pipe_surface_fill(struct nouveau_winsys *nvws, struct pipe_surface *dst, - unsigned dx, unsigned dy, unsigned w, unsigned h, - unsigned value) -{ - if (nvws->nv->surface_fill(nvws->nv, dst, dx, dy, w, h, value)) - return 1; - return 0; -} - -static int -nouveau_pipe_push_reloc(struct nouveau_winsys *nvws, void *ptr, - struct pipe_buffer *buf, uint32_t data, - uint32_t flags, uint32_t vor, uint32_t tor) -{ - return nouveau_pushbuf_emit_reloc(nvws->channel, ptr, - nouveau_buffer(buf)->bo, - data, flags, vor, tor); -} - -static int -nouveau_pipe_push_flush(struct nouveau_winsys *nvws, unsigned size, - struct pipe_fence_handle **fence) -{ - if (fence) { - struct nouveau_pushbuf *pb = nvws->channel->pushbuf; - struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(pb); - struct nouveau_fence *ref = NULL; - - nouveau_fence_ref(nvpb->fence, &ref); - *fence = (struct pipe_fence_handle *)ref; - } - - return nouveau_pushbuf_flush(nvws->channel, size); -} - -struct pipe_context * -nouveau_pipe_create(struct nouveau_context *nv) -{ - struct nouveau_channel_context *nvc = nv->nvc; - struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); - struct pipe_screen *(*hws_create)(struct pipe_winsys *, - struct nouveau_winsys *); - struct pipe_context *(*hw_create)(struct pipe_screen *, unsigned); - struct pipe_winsys *ws; - unsigned chipset = nv->nv_screen->device->chipset; - - if (!nvws) - return NULL; - - switch (chipset & 0xf0) { - case 0x10: - case 0x20: - hws_create = nv10_screen_create; - hw_create = nv10_create; - break; - case 0x30: - hws_create = nv30_screen_create; - hw_create = nv30_create; - break; - case 0x40: - case 0x60: - hws_create = nv40_screen_create; - hw_create = nv40_create; - break; - case 0x50: - case 0x80: - case 0x90: - hws_create = nv50_screen_create; - hw_create = nv50_create; - break; - default: - NOUVEAU_ERR("Unknown chipset NV%02x\n", chipset); - return NULL; - } - - nvws->nv = nv; - nvws->channel = nv->nvc->channel; - - nvws->res_init = nouveau_resource_init; - nvws->res_alloc = nouveau_resource_alloc; - nvws->res_free = nouveau_resource_free; - - nvws->push_reloc = nouveau_pipe_push_reloc; - nvws->push_flush = nouveau_pipe_push_flush; - - nvws->grobj_alloc = nouveau_pipe_grobj_alloc; - nvws->grobj_free = nouveau_grobj_free; - - nvws->notifier_alloc = nouveau_pipe_notifier_alloc; - nvws->notifier_free = nouveau_notifier_free; - nvws->notifier_reset = nouveau_notifier_reset; - nvws->notifier_status = nouveau_notifier_status; - nvws->notifier_retval = nouveau_notifier_return_val; - nvws->notifier_wait = nouveau_notifier_wait_status; - - nvws->surface_copy = nouveau_pipe_surface_copy; - nvws->surface_fill = nouveau_pipe_surface_fill; - - ws = nouveau_create_pipe_winsys(nv); - - if (!nvc->pscreen) - nvc->pscreen = hws_create(ws, nvws); - nvc->pctx[nv->pctx_id] = hw_create(nvc->pscreen, nv->pctx_id); - return nvc->pctx[nv->pctx_id]; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c deleted file mode 100644 index 8a2870a2ff2..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.c +++ /dev/null @@ -1,205 +0,0 @@ -#include "pipe/p_winsys.h" -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" - -#include "nouveau_context.h" -#include "nouveau_local.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" -#include "nouveau_winsys_pipe.h" - -static void -nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, - void *context_private) -{ - struct nouveau_context *nv = context_private; - __DRIdrawablePrivate *dPriv = nv->dri_drawable; - - nouveau_copy_buffer(dPriv, surf, NULL); -} - -static const char * -nouveau_get_name(struct pipe_winsys *pws) -{ - return "Nouveau/DRI"; -} - -static struct pipe_buffer * -nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, - unsigned usage, unsigned size) -{ - struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; - struct nouveau_context *nv = nvpws->nv; - struct nouveau_device *dev = nv->nv_screen->device; - struct nouveau_pipe_buffer *nvbuf; - uint32_t flags; - - nvbuf = calloc(1, sizeof(*nvbuf)); - if (!nvbuf) - return NULL; - nvbuf->base.refcount = 1; - nvbuf->base.alignment = alignment; - nvbuf->base.usage = usage; - nvbuf->base.size = size; - - flags = NOUVEAU_BO_LOCAL; - - if (usage & PIPE_BUFFER_USAGE_PIXEL) { - if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) - flags |= NOUVEAU_BO_GART; - flags |= NOUVEAU_BO_VRAM; - - switch (dev->chipset & 0xf0) { - case 0x50: - case 0x80: - case 0x90: - flags |= NOUVEAU_BO_TILED; - if (usage & NOUVEAU_BUFFER_USAGE_ZETA) - flags |= NOUVEAU_BO_ZTILE; - break; - default: - break; - } - } - - if (usage & PIPE_BUFFER_USAGE_VERTEX) { - if (nv->cap.hw_vertex_buffer) - flags |= NOUVEAU_BO_GART; - } - - if (usage & PIPE_BUFFER_USAGE_INDEX) { - if (nv->cap.hw_index_buffer) - flags |= NOUVEAU_BO_GART; - } - - if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { - free(nvbuf); - return NULL; - } - - return &nvbuf->base; -} - -static struct pipe_buffer * -nouveau_pipe_bo_user_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) -{ - struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; - struct nouveau_device *dev = nvpws->nv->nv_screen->device; - struct nouveau_pipe_buffer *nvbuf; - - nvbuf = calloc(1, sizeof(*nvbuf)); - if (!nvbuf) - return NULL; - nvbuf->base.refcount = 1; - nvbuf->base.size = bytes; - - if (nouveau_bo_user(dev, ptr, bytes, &nvbuf->bo)) { - free(nvbuf); - return NULL; - } - - return &nvbuf->base; -} - -static void -nouveau_pipe_bo_del(struct pipe_winsys *ws, struct pipe_buffer *buf) -{ - struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); - - nouveau_bo_del(&nvbuf->bo); - free(nvbuf); -} - -static void * -nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf, - unsigned flags) -{ - struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); - uint32_t map_flags = 0; - - if (flags & PIPE_BUFFER_USAGE_CPU_READ) - map_flags |= NOUVEAU_BO_RD; - if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) - map_flags |= NOUVEAU_BO_WR; - - if (nouveau_bo_map(nvbuf->bo, map_flags)) - return NULL; - return nvbuf->bo->map; -} - -static void -nouveau_pipe_bo_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) -{ - struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); - - nouveau_bo_unmap(nvbuf->bo); -} - -static INLINE struct nouveau_fence * -nouveau_pipe_fence(struct pipe_fence_handle *pfence) -{ - return (struct nouveau_fence *)pfence; -} - -static void -nouveau_pipe_fence_reference(struct pipe_winsys *ws, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *pfence) -{ - nouveau_fence_ref((void *)pfence, (void *)ptr); -} - -static int -nouveau_pipe_fence_signalled(struct pipe_winsys *ws, - struct pipe_fence_handle *pfence, unsigned flag) -{ - struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)ws; - struct nouveau_fence *fence = nouveau_pipe_fence(pfence); - - if (nouveau_fence(fence)->signalled == 0) - nouveau_fence_flush(nvpws->nv->nvc->channel); - - return !nouveau_fence(fence)->signalled; -} - -static int -nouveau_pipe_fence_finish(struct pipe_winsys *ws, - struct pipe_fence_handle *pfence, unsigned flag) -{ - struct nouveau_fence *fence = nouveau_pipe_fence(pfence); - struct nouveau_fence *ref = NULL; - - nouveau_fence_ref(fence, &ref); - return nouveau_fence_wait(&ref); -} - -struct pipe_winsys * -nouveau_create_pipe_winsys(struct nouveau_context *nv) -{ - struct nouveau_pipe_winsys *nvpws; - struct pipe_winsys *pws; - - nvpws = CALLOC_STRUCT(nouveau_pipe_winsys); - if (!nvpws) - return NULL; - nvpws->nv = nv; - pws = &nvpws->pws; - - pws->flush_frontbuffer = nouveau_flush_frontbuffer; - - pws->buffer_create = nouveau_pipe_bo_create; - pws->buffer_destroy = nouveau_pipe_bo_del; - pws->user_buffer_create = nouveau_pipe_bo_user_create; - pws->buffer_map = nouveau_pipe_bo_map; - pws->buffer_unmap = nouveau_pipe_bo_unmap; - - pws->fence_reference = nouveau_pipe_fence_reference; - pws->fence_signalled = nouveau_pipe_fence_signalled; - pws->fence_finish = nouveau_pipe_fence_finish; - - pws->get_name = nouveau_get_name; - - return &nvpws->pws; -} - diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.h b/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.h deleted file mode 100644 index 6a03ac0d773..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_pipe.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef NOUVEAU_PIPE_WINSYS_H -#define NOUVEAU_PIPE_WINSYS_H - -#include "pipe/p_context.h" -#include "pipe/p_winsys.h" -#include "nouveau_context.h" - -struct nouveau_pipe_buffer { - struct pipe_buffer base; - struct nouveau_bo *bo; -}; - -static inline struct nouveau_pipe_buffer * -nouveau_buffer(struct pipe_buffer *buf) -{ - return (struct nouveau_pipe_buffer *)buf; -} - -struct nouveau_pipe_winsys { - struct pipe_winsys pws; - - struct nouveau_context *nv; -}; - -extern struct pipe_winsys * -nouveau_create_pipe_winsys(struct nouveau_context *nv); - -struct pipe_context * -nouveau_create_softpipe(struct nouveau_context *nv); - -struct pipe_context * -nouveau_pipe_create(struct nouveau_context *nv); - -#endif diff --git a/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c b/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c deleted file mode 100644 index 704f6c77506..00000000000 --- a/src/gallium/winsys/dri/nouveau/nouveau_winsys_softpipe.c +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> - */ - -#include "imports.h" - -#include "pipe/p_defines.h" -#include "pipe/p_format.h" -#include "softpipe/sp_winsys.h" - -#include "nouveau_context.h" -#include "nouveau_winsys_pipe.h" - -struct nouveau_softpipe_winsys { - struct softpipe_winsys sws; - struct nouveau_context *nv; -}; - -/** - * Return list of surface formats supported by this driver. - */ -static boolean -nouveau_is_format_supported(struct softpipe_winsys *sws, uint format) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - }; - - return FALSE; -} - -struct pipe_context * -nouveau_create_softpipe(struct nouveau_context *nv) -{ - struct nouveau_softpipe_winsys *nvsws; - struct pipe_screen *pscreen; - struct pipe_winsys *ws; - - ws = nouveau_create_pipe_winsys(nv); - if (!ws) - return NULL; - pscreen = softpipe_create_screen(ws); - - nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); - if (!nvsws) - return NULL; - - nvsws->sws.is_format_supported = nouveau_is_format_supported; - nvsws->nv = nv; - - return softpipe_create(pscreen, ws, &nvsws->sws); -} - diff --git a/src/gallium/winsys/dri/nouveau/nv04_surface.c b/src/gallium/winsys/dri/nouveau/nv04_surface.c deleted file mode 100644 index 8fa3d106c8c..00000000000 --- a/src/gallium/winsys/dri/nouveau/nv04_surface.c +++ /dev/null @@ -1,314 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_format.h" - -#include "nouveau_context.h" - -static INLINE int -nv04_surface_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; - case PIPE_FORMAT_R5G6B5_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; - default: - return -1; - } -} - -static INLINE int -nv04_rect_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - case PIPE_FORMAT_R5G6B5_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - default: - return -1; - } -} - -static void -nv04_surface_copy_m2mf(struct nouveau_context *nv, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h) -{ - struct nouveau_channel *chan = nv->nvc->channel; - struct pipe_surface *dst = nv->surf_dst; - struct pipe_surface *src = nv->surf_src; - unsigned dst_offset, src_offset; - - dst_offset = dst->offset + (dy * dst->stride) + (dx * dst->block.size); - src_offset = src->offset + (sy * src->stride) + (sx * src->block.size); - - while (h) { - int count = (h > 2047) ? 2047 : h; - - BEGIN_RING(chan, nv->nvc->NvM2MF, - NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); - OUT_RELOCl(chan, nouveau_buffer(src->buffer)->bo, src_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, src->stride); - OUT_RING (chan, dst->stride); - OUT_RING (chan, w * src->block.size); - OUT_RING (chan, count); - OUT_RING (chan, 0x0101); - OUT_RING (chan, 0); - - h -= count; - src_offset += src->stride * count; - dst_offset += dst->stride * count; - } -} - -static void -nv04_surface_copy_blit(struct nouveau_context *nv, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h) -{ - struct nouveau_channel *chan = nv->nvc->channel; - - BEGIN_RING(chan, nv->nvc->NvImageBlit, 0x0300, 3); - OUT_RING (chan, (sy << 16) | sx); - OUT_RING (chan, (dy << 16) | dx); - OUT_RING (chan, ( h << 16) | w); -} - -static int -nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, - struct pipe_surface *src) -{ - struct nouveau_channel *chan = nv->nvc->channel; - int format; - - if (src->format != dst->format) - return 1; - - /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback - * to NV_MEMORY_TO_MEMORY_FORMAT in this case. - */ - if ((src->offset & 63) || (dst->offset & 63)) { - BEGIN_RING(nv->nvc->channel, nv->nvc->NvM2MF, - NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); - OUT_RELOCo(chan, nouveau_buffer(src->buffer)->bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - nv->surface_copy = nv04_surface_copy_m2mf; - nv->surf_dst = dst; - nv->surf_src = src; - return 0; - - } - - if ((format = nv04_surface_format(dst->format)) < 0) { - NOUVEAU_ERR("Bad surface format 0x%x\n", dst->format); - return 1; - } - nv->surface_copy = nv04_surface_copy_blit; - - BEGIN_RING(chan, nv->nvc->NvCtxSurf2D, - NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(chan, nouveau_buffer(src->buffer)->bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, nv->nvc->NvCtxSurf2D, - NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (chan, format); - OUT_RING (chan, (dst->stride << 16) | src->stride); - OUT_RELOCl(chan, nouveau_buffer(src->buffer)->bo, src->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - return 0; -} - -static void -nv04_surface_copy_done(struct nouveau_context *nv) -{ - FIRE_RING(nv->nvc->channel); -} - -static int -nv04_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, - unsigned dx, unsigned dy, unsigned w, unsigned h, - unsigned value) -{ - struct nouveau_channel *chan = nv->nvc->channel; - struct nouveau_grobj *surf2d = nv->nvc->NvCtxSurf2D; - struct nouveau_grobj *rect = nv->nvc->NvGdiRect; - int cs2d_format, gdirect_format; - - if ((cs2d_format = nv04_surface_format(dst->format)) < 0) { - NOUVEAU_ERR("Bad format = %d\n", dst->format); - return 1; - } - - if ((gdirect_format = nv04_rect_format(dst->format)) < 0) { - NOUVEAU_ERR("Bad format = %d\n", dst->format); - return 1; - } - - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (chan, cs2d_format); - OUT_RING (chan, (dst->stride << 16) | dst->stride); - OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); - OUT_RING (chan, gdirect_format); - BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); - OUT_RING (chan, value); - BEGIN_RING(chan, rect, - NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); - OUT_RING (chan, (dx << 16) | dy); - OUT_RING (chan, ( w << 16) | h); - - FIRE_RING(chan); - return 0; -} - -int -nouveau_surface_channel_create_nv04(struct nouveau_channel_context *nvc) -{ - struct nouveau_channel *chan = nvc->channel; - unsigned chipset = nvc->channel->device->chipset, class; - int ret; - - if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, 0x39, - &nvc->NvM2MF))) { - NOUVEAU_ERR("Error creating m2mf object: %d\n", ret); - return 1; - } - BIND_RING (chan, nvc->NvM2MF, nvc->next_subchannel++); - BEGIN_RING(chan, nvc->NvM2MF, - NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); - OUT_RING (chan, nvc->sync_notifier->handle); - - class = chipset < 0x10 ? NV04_CONTEXT_SURFACES_2D : - NV10_CONTEXT_SURFACES_2D; - if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, - &nvc->NvCtxSurf2D))) { - NOUVEAU_ERR("Error creating 2D surface object: %d\n", ret); - return 1; - } - BIND_RING (chan, nvc->NvCtxSurf2D, nvc->next_subchannel++); - BEGIN_RING(chan, nvc->NvCtxSurf2D, - NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RING (chan, nvc->channel->vram->handle); - OUT_RING (chan, nvc->channel->vram->handle); - - class = chipset < 0x10 ? NV04_IMAGE_BLIT : NV12_IMAGE_BLIT; - if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, - &nvc->NvImageBlit))) { - NOUVEAU_ERR("Error creating blit object: %d\n", ret); - return 1; - } - BIND_RING (chan, nvc->NvImageBlit, nvc->next_subchannel++); - BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); - OUT_RING (chan, nvc->sync_notifier->handle); - BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_SURFACE, 1); - OUT_RING (chan, nvc->NvCtxSurf2D->handle); - BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_OPERATION, 1); - OUT_RING (chan, NV04_IMAGE_BLIT_OPERATION_SRCCOPY); - - class = NV04_GDI_RECTANGLE_TEXT; - if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, - &nvc->NvGdiRect))) { - NOUVEAU_ERR("Error creating rect object: %d\n", ret); - return 1; - } - BIND_RING (chan, nvc->NvGdiRect, nvc->next_subchannel++); - BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); - OUT_RING (chan, nvc->sync_notifier->handle); - BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); - OUT_RING (chan, nvc->NvCtxSurf2D->handle); - BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); - OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); - BEGIN_RING(chan, nvc->NvGdiRect, - NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); - OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); - - switch (chipset & 0xf0) { - case 0x00: - case 0x10: - class = NV04_SWIZZLED_SURFACE; - break; - case 0x20: - class = NV20_SWIZZLED_SURFACE; - break; - case 0x30: - class = NV30_SWIZZLED_SURFACE; - break; - case 0x40: - case 0x60: - class = NV40_SWIZZLED_SURFACE; - break; - default: - /* Famous last words: this really can't happen.. */ - assert(0); - break; - } - - ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, - &nvc->NvSwzSurf); - if (ret) { - NOUVEAU_ERR("Error creating swizzled surface: %d\n", ret); - return 1; - } - - BIND_RING (chan, nvc->NvSwzSurf, nvc->next_subchannel++); - BEGIN_RING(chan, nvc->NvSwzSurf, NV04_SWIZZLED_SURFACE_DMA_NOTIFY, 1); - OUT_RING (chan, nvc->sync_notifier->handle); - BEGIN_RING(chan, nvc->NvSwzSurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); - OUT_RING (chan, nvc->channel->vram->handle); - - if (chipset < 0x10) { - class = NV04_SCALED_IMAGE_FROM_MEMORY; - } else - if (chipset < 0x40) { - class = NV10_SCALED_IMAGE_FROM_MEMORY; - } else { - class = NV40_SCALED_IMAGE_FROM_MEMORY; - } - - ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, - &nvc->NvSIFM); - if (ret) { - NOUVEAU_ERR("Error creating scaled image object: %d\n", ret); - return 1; - } - - BIND_RING (chan, nvc->NvSIFM, nvc->next_subchannel++); - - return 0; -} - -int -nouveau_surface_init_nv04(struct nouveau_context *nv) -{ - nv->surface_copy_prep = nv04_surface_copy_prep; - nv->surface_copy = nv04_surface_copy_blit; - nv->surface_copy_done = nv04_surface_copy_done; - nv->surface_fill = nv04_surface_fill; - return 0; -} - diff --git a/src/gallium/winsys/dri/nouveau/nv50_surface.c b/src/gallium/winsys/dri/nouveau/nv50_surface.c deleted file mode 100644 index c8ab7f690f3..00000000000 --- a/src/gallium/winsys/dri/nouveau/nv50_surface.c +++ /dev/null @@ -1,194 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_format.h" - -#include "nouveau_context.h" - -static INLINE int -nv50_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return NV50_2D_DST_FORMAT_32BPP; - case PIPE_FORMAT_X8R8G8B8_UNORM: - return NV50_2D_DST_FORMAT_24BPP; - case PIPE_FORMAT_R5G6B5_UNORM: - return NV50_2D_DST_FORMAT_16BPP; - case PIPE_FORMAT_A8_UNORM: - return NV50_2D_DST_FORMAT_8BPP; - default: - return -1; - } -} - -static int -nv50_surface_set(struct nouveau_context *nv, struct pipe_surface *surf, int dst) -{ - struct nouveau_channel *chan = nv->nvc->channel; - struct nouveau_grobj *eng2d = nv->nvc->Nv2D; - struct nouveau_bo *bo = nouveau_buffer(surf->buffer)->bo; - int surf_format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; - int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); - - surf_format = nv50_format(surf->format); - if (surf_format < 0) - return 1; - - if (!nouveau_bo(bo)->tiled) { - BEGIN_RING(chan, eng2d, mthd, 2); - OUT_RING (chan, surf_format); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, mthd + 0x14, 5); - OUT_RING (chan, surf->stride); - OUT_RING (chan, surf->width); - OUT_RING (chan, surf->height); - OUT_RELOCh(chan, bo, surf->offset, flags); - OUT_RELOCl(chan, bo, surf->offset, flags); - } else { - BEGIN_RING(chan, eng2d, mthd, 5); - OUT_RING (chan, surf_format); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, eng2d, mthd + 0x18, 4); - OUT_RING (chan, surf->width); - OUT_RING (chan, surf->height); - OUT_RELOCh(chan, bo, surf->offset, flags); - OUT_RELOCl(chan, bo, surf->offset, flags); - } - -#if 0 - if (dst) { - BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); - OUT_RING (chan, 0); - OUT_RING (chan, 0); - OUT_RING (chan, surf->width); - OUT_RING (chan, surf->height); - } -#endif - - return 0; -} - -static int -nv50_surface_copy_prep(struct nouveau_context *nv, - struct pipe_surface *dst, struct pipe_surface *src) -{ - int ret; - - assert(src->format == dst->format); - - ret = nv50_surface_set(nv, dst, 1); - if (ret) - return ret; - - ret = nv50_surface_set(nv, src, 0); - if (ret) - return ret; - - return 0; -} - -static void -nv50_surface_copy(struct nouveau_context *nv, unsigned dx, unsigned dy, - unsigned sx, unsigned sy, unsigned w, unsigned h) -{ - struct nouveau_channel *chan = nv->nvc->channel; - struct nouveau_grobj *eng2d = nv->nvc->Nv2D; - - BEGIN_RING(chan, eng2d, 0x088c, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4); - OUT_RING (chan, dx); - OUT_RING (chan, dy); - OUT_RING (chan, w); - OUT_RING (chan, h); - BEGIN_RING(chan, eng2d, 0x08c0, 4); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - OUT_RING (chan, 0); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng2d, 0x08d0, 4); - OUT_RING (chan, 0); - OUT_RING (chan, sx); - OUT_RING (chan, 0); - OUT_RING (chan, sy); -} - -static void -nv50_surface_copy_done(struct nouveau_context *nv) -{ - FIRE_RING(nv->nvc->channel); -} - -static int -nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, - unsigned dx, unsigned dy, unsigned w, unsigned h, - unsigned value) -{ - struct nouveau_channel *chan = nv->nvc->channel; - struct nouveau_grobj *eng2d = nv->nvc->Nv2D; - int rect_format, ret; - - rect_format = nv50_format(dst->format); - if (rect_format < 0) - return 1; - - ret = nv50_surface_set(nv, dst, 1); - if (ret) - return ret; - - BEGIN_RING(chan, eng2d, 0x0580, 3); - OUT_RING (chan, 4); - OUT_RING (chan, rect_format); - OUT_RING (chan, value); - - BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); - OUT_RING (chan, dx); - OUT_RING (chan, dy); - OUT_RING (chan, dx + w); - OUT_RING (chan, dy + h); - - FIRE_RING(chan); - return 0; -} - -int -nouveau_surface_channel_create_nv50(struct nouveau_channel_context *nvc) -{ - struct nouveau_channel *chan = nvc->channel; - struct nouveau_grobj *eng2d = NULL; - int ret; - - ret = nouveau_grobj_alloc(chan, nvc->next_handle++, NV50_2D, &eng2d); - if (ret) - return ret; - nvc->Nv2D = eng2d; - - BIND_RING (chan, eng2d, nvc->next_subchannel++); - BEGIN_RING(chan, eng2d, NV50_2D_DMA_NOTIFY, 4); - OUT_RING (chan, nvc->sync_notifier->handle); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); - BEGIN_RING(chan, eng2d, NV50_2D_OPERATION, 1); - OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY); - BEGIN_RING(chan, eng2d, 0x0290, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, eng2d, 0x0888, 1); - OUT_RING (chan, 1); - - return 0; -} - -int -nouveau_surface_init_nv50(struct nouveau_context *nv) -{ - nv->surface_copy_prep = nv50_surface_copy_prep; - nv->surface_copy = nv50_surface_copy; - nv->surface_copy_done = nv50_surface_copy_done; - nv->surface_fill = nv50_surface_fill; - return 0; -} - diff --git a/src/gallium/winsys/drm/nouveau/Makefile b/src/gallium/winsys/drm/nouveau/Makefile new file mode 100644 index 00000000000..be630ff6d16 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/Makefile @@ -0,0 +1,45 @@ + +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = nouveau_dri.so + +MINIGLX_SOURCES = + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/nv04/libnv04.a \ + $(TOP)/src/gallium/drivers/nv10/libnv10.a \ + $(TOP)/src/gallium/drivers/nv30/libnv30.a \ + $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a + +DRIVER_SOURCES = \ + nouveau_bo.c \ + nouveau_channel.c \ + nouveau_context.c \ + nouveau_device.c \ + nouveau_dma.c \ + nouveau_fence.c \ + nouveau_grobj.c \ + nouveau_lock.c \ + nouveau_notifier.c \ + nouveau_pushbuf.c \ + nouveau_resource.c \ + nouveau_screen.c \ + nouveau_swapbuffers.c \ + nouveau_winsys.c \ + nouveau_winsys_pipe.c \ + nouveau_winsys_softpipe.c \ + nv04_surface.c \ + nv50_surface.c + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: diff --git a/src/gallium/winsys/drm/nouveau/nouveau_bo.c b/src/gallium/winsys/drm/nouveau/nouveau_bo.c new file mode 100644 index 00000000000..b5942994d94 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_bo.c @@ -0,0 +1,470 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <errno.h> +#include <assert.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" +#include "nouveau_local.h" + +static void +nouveau_mem_free(struct nouveau_device *dev, struct drm_nouveau_mem_alloc *ma, + void **map) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + struct drm_nouveau_mem_free mf; + + if (map && *map) { + drmUnmap(*map, ma->size); + *map = NULL; + } + + if (ma->size) { + mf.offset = ma->offset; + mf.flags = ma->flags; + drmCommandWrite(nvdev->fd, DRM_NOUVEAU_MEM_FREE, + &mf, sizeof(mf)); + ma->size = 0; + } +} + +static int +nouveau_mem_alloc(struct nouveau_device *dev, unsigned size, unsigned align, + uint32_t flags, struct drm_nouveau_mem_alloc *ma, void **map) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + int ret; + + ma->alignment = align; + ma->size = size; + ma->flags = flags; + if (map) + ma->flags |= NOUVEAU_MEM_MAPPED; + ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_MEM_ALLOC, ma, + sizeof(struct drm_nouveau_mem_alloc)); + if (ret) + return ret; + + if (map) { + ret = drmMap(nvdev->fd, ma->map_handle, ma->size, map); + if (ret) { + *map = NULL; + nouveau_mem_free(dev, ma, map); + return ret; + } + } + + return 0; +} + +static void +nouveau_bo_tmp_del(void *priv) +{ + struct nouveau_resource *r = priv; + + nouveau_fence_ref(NULL, (struct nouveau_fence **)&r->priv); + nouveau_resource_free(&r); +} + +static unsigned +nouveau_bo_tmp_max(struct nouveau_device_priv *nvdev) +{ + struct nouveau_resource *r = nvdev->sa_heap; + unsigned max = 0; + + while (r) { + if (r->in_use && !nouveau_fence(r->priv)->emitted) { + r = r->next; + continue; + } + + if (max < r->size) + max = r->size; + r = r->next; + } + + return max; +} + +static struct nouveau_resource * +nouveau_bo_tmp(struct nouveau_channel *chan, unsigned size, + struct nouveau_fence *fence) +{ + struct nouveau_device_priv *nvdev = nouveau_device(chan->device); + struct nouveau_resource *r = NULL; + struct nouveau_fence *ref = NULL; + + if (fence) + nouveau_fence_ref(fence, &ref); + else + nouveau_fence_new(chan, &ref); + assert(ref); + + while (nouveau_resource_alloc(nvdev->sa_heap, size, ref, &r)) { + if (nouveau_bo_tmp_max(nvdev) < size) { + nouveau_fence_ref(NULL, &ref); + return NULL; + } + + nouveau_fence_flush(chan); + } + nouveau_fence_signal_cb(ref, nouveau_bo_tmp_del, r); + + return r; +} + +int +nouveau_bo_init(struct nouveau_device *dev) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + int ret; + + ret = nouveau_mem_alloc(dev, 128*1024, 0, NOUVEAU_MEM_AGP | + NOUVEAU_MEM_PCI, &nvdev->sa, &nvdev->sa_map); + if (ret) + return ret; + + ret = nouveau_resource_init(&nvdev->sa_heap, 0, nvdev->sa.size); + if (ret) { + nouveau_mem_free(dev, &nvdev->sa, &nvdev->sa_map); + return ret; + } + + return 0; +} + +void +nouveau_bo_takedown(struct nouveau_device *dev) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + nouveau_mem_free(dev, &nvdev->sa, &nvdev->sa_map); +} + +int +nouveau_bo_new(struct nouveau_device *dev, uint32_t flags, int align, + int size, struct nouveau_bo **bo) +{ + struct nouveau_bo_priv *nvbo; + int ret; + + if (!dev || !bo || *bo) + return -EINVAL; + + nvbo = calloc(1, sizeof(struct nouveau_bo_priv)); + if (!nvbo) + return -ENOMEM; + nvbo->base.device = dev; + nvbo->base.size = size; + nvbo->base.handle = bo_to_ptr(nvbo); + nvbo->drm.alignment = align; + nvbo->refcount = 1; + + if (flags & NOUVEAU_BO_TILED) { + nvbo->tiled = 1; + if (flags & NOUVEAU_BO_ZTILE) + nvbo->tiled |= 2; + flags &= ~NOUVEAU_BO_TILED; + } + + ret = nouveau_bo_set_status(&nvbo->base, flags); + if (ret) { + free(nvbo); + return ret; + } + + *bo = &nvbo->base; + return 0; +} + +int +nouveau_bo_user(struct nouveau_device *dev, void *ptr, int size, + struct nouveau_bo **bo) +{ + struct nouveau_bo_priv *nvbo; + + if (!dev || !bo || *bo) + return -EINVAL; + + nvbo = calloc(1, sizeof(*nvbo)); + if (!nvbo) + return -ENOMEM; + nvbo->base.device = dev; + + nvbo->sysmem = ptr; + nvbo->user = 1; + + nvbo->base.size = size; + nvbo->base.offset = nvbo->drm.offset; + nvbo->base.handle = bo_to_ptr(nvbo); + nvbo->refcount = 1; + *bo = &nvbo->base; + return 0; +} + +int +nouveau_bo_ref(struct nouveau_device *dev, uint64_t handle, + struct nouveau_bo **bo) +{ + struct nouveau_bo_priv *nvbo = ptr_to_bo(handle); + + if (!dev || !bo || *bo) + return -EINVAL; + + nvbo->refcount++; + *bo = &nvbo->base; + return 0; +} + +static void +nouveau_bo_del_cb(void *priv) +{ + struct nouveau_bo_priv *nvbo = priv; + + nouveau_fence_ref(NULL, &nvbo->fence); + nouveau_mem_free(nvbo->base.device, &nvbo->drm, &nvbo->map); + if (nvbo->sysmem && !nvbo->user) + free(nvbo->sysmem); + free(nvbo); +} + +void +nouveau_bo_del(struct nouveau_bo **bo) +{ + struct nouveau_bo_priv *nvbo; + + if (!bo || !*bo) + return; + nvbo = nouveau_bo(*bo); + *bo = NULL; + + if (--nvbo->refcount) + return; + + if (nvbo->pending) + nouveau_pushbuf_flush(nvbo->pending->channel, 0); + + if (nvbo->fence) + nouveau_fence_signal_cb(nvbo->fence, nouveau_bo_del_cb, nvbo); + else + nouveau_bo_del_cb(nvbo); +} + +int +nouveau_bo_map(struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + + if (!nvbo) + return -EINVAL; + + if (nvbo->pending && + (nvbo->pending->flags & NOUVEAU_BO_WR || flags & NOUVEAU_BO_WR)) { + nouveau_pushbuf_flush(nvbo->pending->channel, 0); + } + + if (flags & NOUVEAU_BO_WR) + nouveau_fence_wait(&nvbo->fence); + else + nouveau_fence_wait(&nvbo->wr_fence); + + if (nvbo->sysmem) + bo->map = nvbo->sysmem; + else + bo->map = nvbo->map; + return 0; +} + +void +nouveau_bo_unmap(struct nouveau_bo *bo) +{ + bo->map = NULL; +} + +static int +nouveau_bo_upload(struct nouveau_bo_priv *nvbo) +{ + if (nvbo->fence) + nouveau_fence_wait(&nvbo->fence); + memcpy(nvbo->map, nvbo->sysmem, nvbo->drm.size); + return 0; +} + +int +nouveau_bo_set_status(struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + struct drm_nouveau_mem_alloc new; + void *new_map = NULL, *new_sysmem = NULL; + unsigned new_flags = 0, ret; + + assert(!bo->map); + + /* Check current memtype vs requested, if they match do nothing */ + if ((nvbo->drm.flags & NOUVEAU_MEM_FB) && (flags & NOUVEAU_BO_VRAM)) + return 0; + if ((nvbo->drm.flags & (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI)) && + (flags & NOUVEAU_BO_GART)) + return 0; + if (nvbo->drm.size == 0 && nvbo->sysmem && (flags & NOUVEAU_BO_LOCAL)) + return 0; + + memset(&new, 0x00, sizeof(new)); + + /* Allocate new memory */ + if (flags & NOUVEAU_BO_VRAM) + new_flags |= NOUVEAU_MEM_FB; + else + if (flags & NOUVEAU_BO_GART) + new_flags |= (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI); + + if (nvbo->tiled && flags) { + new_flags |= NOUVEAU_MEM_TILE; + if (nvbo->tiled & 2) + new_flags |= NOUVEAU_MEM_TILE_ZETA; + } + + if (new_flags) { + ret = nouveau_mem_alloc(bo->device, bo->size, + nvbo->drm.alignment, new_flags, + &new, &new_map); + if (ret) + return ret; + } else + if (!nvbo->user) { + new_sysmem = malloc(bo->size); + } + + /* Copy old -> new */ + /*XXX: use M2MF */ + if (nvbo->sysmem || nvbo->map) { + struct nouveau_pushbuf_bo *pbo = nvbo->pending; + nvbo->pending = NULL; + nouveau_bo_map(bo, NOUVEAU_BO_RD); + memcpy(new_map, bo->map, bo->size); + nouveau_bo_unmap(bo); + nvbo->pending = pbo; + } + + /* Free old memory */ + if (nvbo->fence) + nouveau_fence_wait(&nvbo->fence); + nouveau_mem_free(bo->device, &nvbo->drm, &nvbo->map); + if (nvbo->sysmem && !nvbo->user) + free(nvbo->sysmem); + + nvbo->drm = new; + nvbo->map = new_map; + if (!nvbo->user) + nvbo->sysmem = new_sysmem; + bo->flags = flags; + bo->offset = nvbo->drm.offset; + return 0; +} + +static int +nouveau_bo_validate_user(struct nouveau_channel *chan, struct nouveau_bo *bo, + struct nouveau_fence *fence, uint32_t flags) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_device_priv *nvdev = nouveau_device(chan->device); + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + struct nouveau_resource *r; + + if (nvchan->user_charge + bo->size > nvdev->sa.size) + return 1; + + if (!(flags & NOUVEAU_BO_GART)) + return 1; + + r = nouveau_bo_tmp(chan, bo->size, fence); + if (!r) + return 1; + nvchan->user_charge += bo->size; + + memcpy(nvdev->sa_map + r->start, nvbo->sysmem, bo->size); + + nvbo->offset = nvdev->sa.offset + r->start; + nvbo->flags = NOUVEAU_BO_GART; + return 0; +} + +static int +nouveau_bo_validate_bo(struct nouveau_channel *chan, struct nouveau_bo *bo, + struct nouveau_fence *fence, uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + int ret; + + ret = nouveau_bo_set_status(bo, flags); + if (ret) { + nouveau_fence_flush(chan); + + ret = nouveau_bo_set_status(bo, flags); + if (ret) + return ret; + } + + if (nvbo->user) + nouveau_bo_upload(nvbo); + + nvbo->offset = nvbo->drm.offset; + if (nvbo->drm.flags & (NOUVEAU_MEM_AGP | NOUVEAU_MEM_PCI)) + nvbo->flags = NOUVEAU_BO_GART; + else + nvbo->flags = NOUVEAU_BO_VRAM; + + return 0; +} + +int +nouveau_bo_validate(struct nouveau_channel *chan, struct nouveau_bo *bo, + uint32_t flags) +{ + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + struct nouveau_fence *fence = nouveau_pushbuf(chan->pushbuf)->fence; + int ret; + + assert(bo->map == NULL); + + if (nvbo->user) { + ret = nouveau_bo_validate_user(chan, bo, fence, flags); + if (ret) { + ret = nouveau_bo_validate_bo(chan, bo, fence, flags); + if (ret) + return ret; + } + } else { + ret = nouveau_bo_validate_bo(chan, bo, fence, flags); + if (ret) + return ret; + } + + if (flags & NOUVEAU_BO_WR) + nouveau_fence_ref(fence, &nvbo->wr_fence); + nouveau_fence_ref(fence, &nvbo->fence); + return 0; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_channel.c b/src/gallium/winsys/drm/nouveau/nouveau_channel.c new file mode 100644 index 00000000000..3b4dcd1ecf2 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_channel.c @@ -0,0 +1,126 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" + +int +nouveau_channel_alloc(struct nouveau_device *dev, uint32_t fb_ctxdma, + uint32_t tt_ctxdma, struct nouveau_channel **chan) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + struct nouveau_channel_priv *nvchan; + int ret; + + if (!nvdev || !chan || *chan) + return -EINVAL; + + nvchan = calloc(1, sizeof(struct nouveau_channel_priv)); + if (!nvchan) + return -ENOMEM; + nvchan->base.device = dev; + + nvchan->drm.fb_ctxdma_handle = fb_ctxdma; + nvchan->drm.tt_ctxdma_handle = tt_ctxdma; + ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_CHANNEL_ALLOC, + &nvchan->drm, sizeof(nvchan->drm)); + if (ret) { + free(nvchan); + return ret; + } + + nvchan->base.id = nvchan->drm.channel; + if (nouveau_grobj_ref(&nvchan->base, nvchan->drm.fb_ctxdma_handle, + &nvchan->base.vram) || + nouveau_grobj_ref(&nvchan->base, nvchan->drm.tt_ctxdma_handle, + &nvchan->base.gart)) { + nouveau_channel_free((void *)&nvchan); + return -EINVAL; + } + + ret = drmMap(nvdev->fd, nvchan->drm.ctrl, nvchan->drm.ctrl_size, + (void*)&nvchan->user); + if (ret) { + nouveau_channel_free((void *)&nvchan); + return ret; + } + nvchan->put = &nvchan->user[0x40/4]; + nvchan->get = &nvchan->user[0x44/4]; + nvchan->ref_cnt = &nvchan->user[0x48/4]; + + ret = drmMap(nvdev->fd, nvchan->drm.notifier, nvchan->drm.notifier_size, + (drmAddressPtr)&nvchan->notifier_block); + if (ret) { + nouveau_channel_free((void *)&nvchan); + return ret; + } + + ret = drmMap(nvdev->fd, nvchan->drm.cmdbuf, nvchan->drm.cmdbuf_size, + (void*)&nvchan->pushbuf); + if (ret) { + nouveau_channel_free((void *)&nvchan); + return ret; + } + + ret = nouveau_grobj_alloc(&nvchan->base, 0x00000000, 0x0030, + &nvchan->base.nullobj); + if (ret) { + nouveau_channel_free((void *)&nvchan); + return ret; + } + + nouveau_dma_channel_init(&nvchan->base); + nouveau_pushbuf_init(&nvchan->base); + + *chan = &nvchan->base; + return 0; +} + +void +nouveau_channel_free(struct nouveau_channel **chan) +{ + struct nouveau_channel_priv *nvchan; + struct nouveau_device_priv *nvdev; + struct drm_nouveau_channel_free cf; + + if (!chan || !*chan) + return; + nvchan = nouveau_channel(*chan); + *chan = NULL; + nvdev = nouveau_device(nvchan->base.device); + + FIRE_RING_CH(&nvchan->base); + + nouveau_grobj_free(&nvchan->base.vram); + nouveau_grobj_free(&nvchan->base.gart); + nouveau_grobj_free(&nvchan->base.nullobj); + + cf.channel = nvchan->drm.channel; + drmCommandWrite(nvdev->fd, DRM_NOUVEAU_CHANNEL_FREE, &cf, sizeof(cf)); + free(nvchan); +} + + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_context.c b/src/gallium/winsys/drm/nouveau/nouveau_context.c new file mode 100644 index 00000000000..74413c408f3 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_context.c @@ -0,0 +1,346 @@ +#include "main/glheader.h" +#include "glapi/glthread.h" +#include <GL/internal/glcore.h> +#include "utils.h" + +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" + +#include "nouveau_context.h" +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_winsys_pipe.h" + +#ifdef DEBUG +static const struct dri_debug_control debug_control[] = { + { "bo", DEBUG_BO }, + { NULL, 0 } +}; +int __nouveau_debug = 0; +#endif + +static void +nouveau_channel_context_destroy(struct nouveau_channel_context *nvc) +{ + nouveau_grobj_free(&nvc->NvCtxSurf2D); + nouveau_grobj_free(&nvc->NvImageBlit); + nouveau_grobj_free(&nvc->NvGdiRect); + nouveau_grobj_free(&nvc->NvM2MF); + nouveau_grobj_free(&nvc->Nv2D); + nouveau_grobj_free(&nvc->NvSwzSurf); + nouveau_grobj_free(&nvc->NvSIFM); + + nouveau_notifier_free(&nvc->sync_notifier); + + nouveau_channel_free(&nvc->channel); + + FREE(nvc); +} + +static struct nouveau_channel_context * +nouveau_channel_context_create(struct nouveau_device *dev) +{ + struct nouveau_channel_context *nvc; + int ret; + + nvc = CALLOC_STRUCT(nouveau_channel_context); + if (!nvc) + return NULL; + + if ((ret = nouveau_channel_alloc(dev, 0x8003d001, 0x8003d002, + &nvc->channel))) { + NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + nvc->next_handle = 0x80000000; + + if ((ret = nouveau_notifier_alloc(nvc->channel, nvc->next_handle++, 1, + &nvc->sync_notifier))) { + NOUVEAU_ERR("Error creating channel sync notifier: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + ret = nouveau_surface_channel_create_nv50(nvc); + break; + default: + ret = nouveau_surface_channel_create_nv04(nvc); + break; + } + + if (ret) { + NOUVEAU_ERR("Error initialising surface objects: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + return nvc; +} + +GLboolean +nouveau_context_create(const __GLcontextModes *glVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv; + struct nouveau_screen *nv_screen = driScrnPriv->private; + struct nouveau_context *nv = CALLOC_STRUCT(nouveau_context); + struct pipe_context *pipe = NULL; + struct st_context *st_share = NULL; + struct nouveau_channel_context *nvc = NULL; + struct nouveau_device *dev = nv_screen->device; + int i; + + if (sharedContextPrivate) { + st_share = ((struct nouveau_context *)sharedContextPrivate)->st; + } + + switch (dev->chipset & 0xf0) { + case 0x10: + case 0x20: + /* NV10 */ + case 0x30: + /* NV30 */ + case 0x40: + case 0x60: + /* NV40 */ + case 0x50: + case 0x80: + case 0x90: + /* G80 */ + break; + default: + NOUVEAU_ERR("Unsupported chipset: NV%02x\n", dev->chipset); + return GL_FALSE; + } + + driContextPriv->driverPrivate = (void *)nv; + nv->nv_screen = nv_screen; + nv->dri_screen = driScrnPriv; + + { + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + nvdev->ctx = driContextPriv->hHWContext; + nvdev->lock = (drmLock *)&driScrnPriv->pSAREA->lock; + } + + driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, + nv->dri_screen->myNum, "nouveau"); +#ifdef DEBUG + __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), + debug_control); +#endif + + /*XXX: Hack up a fake region and buffer object for front buffer. + * This will go away with TTM, replaced with a simple reference + * of the front buffer handle passed to us by the DDX. + */ + { + struct pipe_surface *fb_surf; + struct nouveau_pipe_buffer *fb_buf; + struct nouveau_bo_priv *fb_bo; + + fb_bo = calloc(1, sizeof(struct nouveau_bo_priv)); + fb_bo->drm.offset = nv_screen->front_offset; + fb_bo->drm.flags = NOUVEAU_MEM_FB; + fb_bo->drm.size = nv_screen->front_pitch * + nv_screen->front_height; + fb_bo->refcount = 1; + fb_bo->base.flags = NOUVEAU_BO_PIN | NOUVEAU_BO_VRAM; + fb_bo->base.offset = fb_bo->drm.offset; + fb_bo->base.handle = (unsigned long)fb_bo; + fb_bo->base.size = fb_bo->drm.size; + fb_bo->base.device = nv_screen->device; + + fb_buf = calloc(1, sizeof(struct nouveau_pipe_buffer)); + fb_buf->bo = &fb_bo->base; + + fb_surf = calloc(1, sizeof(struct pipe_surface)); + if (nv_screen->front_cpp == 2) + fb_surf->format = PIPE_FORMAT_R5G6B5_UNORM; + else + fb_surf->format = PIPE_FORMAT_A8R8G8B8_UNORM; + pf_get_block(fb_surf->format, &fb_surf->block); + fb_surf->width = nv_screen->front_pitch / nv_screen->front_cpp; + fb_surf->height = nv_screen->front_height; + fb_surf->stride = fb_surf->width * fb_surf->block.size; + fb_surf->refcount = 1; + fb_surf->buffer = &fb_buf->base; + + nv->frontbuffer = fb_surf; + } + + /* Attempt to share a single channel between multiple contexts from + * a single process. + */ + nvc = nv_screen->nvc; + if (!nvc && st_share) { + struct nouveau_context *snv = st_share->pipe->priv; + if (snv) { + nvc = snv->nvc; + } + } + + /*XXX: temporary - disable multi-context/single-channel on pre-NV4x */ + switch (dev->chipset & 0xf0) { + case 0x40: + case 0x60: + /* NV40 class */ + case 0x50: + case 0x80: + case 0x90: + /* G80 class */ + break; + default: + nvc = NULL; + break; + } + + if (!nvc) { + nvc = nouveau_channel_context_create(dev); + if (!nvc) { + NOUVEAU_ERR("Failed initialising GPU context\n"); + return GL_FALSE; + } + nv_screen->nvc = nvc; + } + + nvc->refcount++; + nv->nvc = nvc; + + /* Find a free slot for a pipe context, allocate a new one if needed */ + nv->pctx_id = -1; + for (i = 0; i < nvc->nr_pctx; i++) { + if (nvc->pctx[i] == NULL) { + nv->pctx_id = i; + break; + } + } + + if (nv->pctx_id < 0) { + nv->pctx_id = nvc->nr_pctx++; + nvc->pctx = + realloc(nvc->pctx, + sizeof(struct pipe_context *) * nvc->nr_pctx); + } + + /* Create pipe */ + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + if (nouveau_surface_init_nv50(nv)) + return GL_FALSE; + break; + default: + if (nouveau_surface_init_nv04(nv)) + return GL_FALSE; + break; + } + + if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { + struct pipe_screen *pscreen; + + pipe = nouveau_pipe_create(nv); + if (!pipe) + NOUVEAU_ERR("Couldn't create hw pipe\n"); + pscreen = nvc->pscreen; + + nv->cap.hw_vertex_buffer = + pscreen->get_param(pscreen, NOUVEAU_CAP_HW_VTXBUF); + nv->cap.hw_index_buffer = + pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF); + } + + if (!pipe) { + NOUVEAU_MSG("Using softpipe\n"); + pipe = nouveau_create_softpipe(nv); + if (!pipe) { + NOUVEAU_ERR("Error creating pipe, bailing\n"); + return GL_FALSE; + } + } + + pipe->priv = nv; + nv->st = st_create_context(pipe, glVis, st_share); + return GL_TRUE; +} + +void +nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) +{ + struct nouveau_context *nv = driContextPriv->driverPrivate; + struct nouveau_channel_context *nvc = nv->nvc; + + assert(nv); + + st_finish(nv->st); + st_destroy_context(nv->st); + + if (nv->pctx_id >= 0) { + nvc->pctx[nv->pctx_id] = NULL; + if (--nvc->refcount <= 0) { + nouveau_channel_context_destroy(nvc); + nv->nv_screen->nvc = NULL; + } + } + + free(nv); +} + +GLboolean +nouveau_context_bind(__DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv) +{ + struct nouveau_context *nv; + struct nouveau_framebuffer *draw, *read; + + if (!driContextPriv) { + st_make_current(NULL, NULL, NULL); + return GL_TRUE; + } + + nv = driContextPriv->driverPrivate; + draw = driDrawPriv->driverPrivate; + read = driReadPriv->driverPrivate; + + st_make_current(nv->st, draw->stfb, read->stfb); + + if ((nv->dri_drawable != driDrawPriv) || + (nv->last_stamp != driDrawPriv->lastStamp)) { + nv->dri_drawable = driDrawPriv; + st_resize_framebuffer(draw->stfb, driDrawPriv->w, + driDrawPriv->h); + nv->last_stamp = driDrawPriv->lastStamp; + } + + if (driDrawPriv != driReadPriv) { + st_resize_framebuffer(read->stfb, driReadPriv->w, + driReadPriv->h); + } + + return GL_TRUE; +} + +GLboolean +nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) +{ + struct nouveau_context *nv = driContextPriv->driverPrivate; + (void)nv; + + st_flush(nv->st, 0, NULL); + return GL_TRUE; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_context.h b/src/gallium/winsys/drm/nouveau/nouveau_context.h new file mode 100644 index 00000000000..77e2147a2c7 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_context.h @@ -0,0 +1,113 @@ +#ifndef __NOUVEAU_CONTEXT_H__ +#define __NOUVEAU_CONTEXT_H__ + +#include "dri_util.h" +#include "xmlconfig.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau_drmif.h" +#include "nouveau_dma.h" + +struct nouveau_framebuffer { + struct st_framebuffer *stfb; +}; + +struct nouveau_channel_context { + struct pipe_screen *pscreen; + int refcount; + + unsigned cur_pctx; + unsigned nr_pctx; + struct pipe_context **pctx; + + struct nouveau_channel *channel; + + struct nouveau_notifier *sync_notifier; + + /* Common */ + struct nouveau_grobj *NvM2MF; + /* NV04-NV40 */ + struct nouveau_grobj *NvCtxSurf2D; + struct nouveau_grobj *NvSwzSurf; + struct nouveau_grobj *NvImageBlit; + struct nouveau_grobj *NvGdiRect; + struct nouveau_grobj *NvSIFM; + /* G80 */ + struct nouveau_grobj *Nv2D; + + uint32_t next_handle; + uint32_t next_subchannel; + uint32_t next_sequence; +}; + +struct nouveau_context { + struct st_context *st; + + /* DRI stuff */ + __DRIscreenPrivate *dri_screen; + __DRIdrawablePrivate *dri_drawable; + unsigned int last_stamp; + driOptionCache dri_option_cache; + drm_context_t drm_context; + drmLock drm_lock; + GLboolean locked; + struct nouveau_screen *nv_screen; + struct pipe_surface *frontbuffer; + + struct { + int hw_vertex_buffer; + int hw_index_buffer; + } cap; + + /* Hardware context */ + struct nouveau_channel_context *nvc; + int pctx_id; + + /* pipe_surface accel */ + struct pipe_surface *surf_src, *surf_dst; + unsigned surf_src_offset, surf_dst_offset; + int (*surface_copy_prep)(struct nouveau_context *, + struct pipe_surface *dst, + struct pipe_surface *src); + void (*surface_copy)(struct nouveau_context *, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h); + void (*surface_copy_done)(struct nouveau_context *); + int (*surface_fill)(struct nouveau_context *, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned, unsigned); +}; + +extern GLboolean nouveau_context_create(const __GLcontextModes *, + __DRIcontextPrivate *, void *); +extern void nouveau_context_destroy(__DRIcontextPrivate *); +extern GLboolean nouveau_context_bind(__DRIcontextPrivate *, + __DRIdrawablePrivate *draw, + __DRIdrawablePrivate *read); +extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *); + +#ifdef DEBUG +extern int __nouveau_debug; + +#define DEBUG_BO (1 << 0) + +#define DBG(flag, ...) do { \ + if (__nouveau_debug & (DEBUG_##flag)) \ + NOUVEAU_ERR(__VA_ARGS__); \ +} while(0) +#else +#define DBG(flag, ...) +#endif + +extern void LOCK_HARDWARE(struct nouveau_context *); +extern void UNLOCK_HARDWARE(struct nouveau_context *); + +extern int +nouveau_surface_channel_create_nv04(struct nouveau_channel_context *); +extern int +nouveau_surface_channel_create_nv50(struct nouveau_channel_context *); +extern int nouveau_surface_init_nv04(struct nouveau_context *); +extern int nouveau_surface_init_nv50(struct nouveau_context *); + +extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int); +extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *); + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_device.c b/src/gallium/winsys/drm/nouveau/nouveau_device.c new file mode 100644 index 00000000000..0b452fcd02d --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_device.c @@ -0,0 +1,159 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> + +#include "nouveau_drmif.h" + +int +nouveau_device_open_existing(struct nouveau_device **dev, int close, + int fd, drm_context_t ctx) +{ + struct nouveau_device_priv *nvdev; + int ret; + + if (!dev || *dev) + return -EINVAL; + + nvdev = calloc(1, sizeof(*nvdev)); + if (!nvdev) + return -ENOMEM; + nvdev->fd = fd; + nvdev->ctx = ctx; + nvdev->needs_close = close; + + drmCommandNone(nvdev->fd, DRM_NOUVEAU_CARD_INIT); + + if ((ret = nouveau_bo_init(&nvdev->base))) { + nouveau_device_close((void *)&nvdev); + return ret; + } + + { + uint64_t value; + + ret = nouveau_device_get_param(&nvdev->base, + NOUVEAU_GETPARAM_CHIPSET_ID, + &value); + if (ret) { + nouveau_device_close((void *)&nvdev); + return ret; + } + nvdev->base.chipset = value; + } + + *dev = &nvdev->base; + return 0; +} + +int +nouveau_device_open(struct nouveau_device **dev, const char *busid) +{ + drm_context_t ctx; + int fd, ret; + + if (!dev || *dev) + return -EINVAL; + + fd = drmOpen("nouveau", busid); + if (fd < 0) + return -EINVAL; + + ret = drmCreateContext(fd, &ctx); + if (ret) { + drmClose(fd); + return ret; + } + + ret = nouveau_device_open_existing(dev, 1, fd, ctx); + if (ret) { + drmDestroyContext(fd, ctx); + drmClose(fd); + return ret; + } + + return 0; +} + +void +nouveau_device_close(struct nouveau_device **dev) +{ + struct nouveau_device_priv *nvdev; + + if (dev || !*dev) + return; + nvdev = nouveau_device(*dev); + *dev = NULL; + + nouveau_bo_takedown(&nvdev->base); + + if (nvdev->needs_close) { + drmDestroyContext(nvdev->fd, nvdev->ctx); + drmClose(nvdev->fd); + } + free(nvdev); +} + +int +nouveau_device_get_param(struct nouveau_device *dev, + uint64_t param, uint64_t *value) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + struct drm_nouveau_getparam g; + int ret; + + if (!nvdev || !value) + return -EINVAL; + + g.param = param; + ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_GETPARAM, + &g, sizeof(g)); + if (ret) + return ret; + + *value = g.value; + return 0; +} + +int +nouveau_device_set_param(struct nouveau_device *dev, + uint64_t param, uint64_t value) +{ + struct nouveau_device_priv *nvdev = nouveau_device(dev); + struct drm_nouveau_setparam s; + int ret; + + if (!nvdev) + return -EINVAL; + + s.param = param; + s.value = value; + ret = drmCommandWriteRead(nvdev->fd, DRM_NOUVEAU_SETPARAM, + &s, sizeof(s)); + if (ret) + return ret; + + return 0; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_dma.c b/src/gallium/winsys/drm/nouveau/nouveau_dma.c new file mode 100644 index 00000000000..f8a8ba04f6d --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_dma.c @@ -0,0 +1,219 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdint.h> +#include <assert.h> +#include <errno.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" +#include "nouveau_local.h" + +static inline uint32_t +READ_GET(struct nouveau_channel_priv *nvchan) +{ + return *nvchan->get; +} + +static inline void +WRITE_PUT(struct nouveau_channel_priv *nvchan, uint32_t val) +{ + uint32_t put = ((val << 2) + nvchan->dma->base); + volatile int dum; + + NOUVEAU_DMA_BARRIER; + dum = READ_GET(nvchan); + + *nvchan->put = put; + nvchan->dma->put = val; +#ifdef NOUVEAU_DMA_TRACE + NOUVEAU_MSG("WRITE_PUT %d/0x%08x\n", nvchan->drm.channel, put); +#endif + + NOUVEAU_DMA_BARRIER; +} + +static inline int +LOCAL_GET(struct nouveau_dma_priv *dma, uint32_t *val) +{ + uint32_t get = *val; + + if (get >= dma->base && get <= (dma->base + (dma->max << 2))) { + *val = (get - dma->base) >> 2; + return 1; + } + + return 0; +} + +void +nouveau_dma_channel_init(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + int i; + + nvchan->dma = &nvchan->dma_master; + nvchan->dma->base = nvchan->drm.put_base; + nvchan->dma->cur = nvchan->dma->put = 0; + nvchan->dma->max = (nvchan->drm.cmdbuf_size >> 2) - 2; + nvchan->dma->free = nvchan->dma->max - nvchan->dma->cur; + + RING_SPACE_CH(chan, RING_SKIPS); + for (i = 0; i < RING_SKIPS; i++) + OUT_RING_CH(chan, 0); +} + +#define CHECK_TIMEOUT() do { \ + if ((NOUVEAU_TIME_MSEC() - t_start) > NOUVEAU_DMA_TIMEOUT) \ + return - EBUSY; \ +} while(0) + +int +nouveau_dma_wait(struct nouveau_channel *chan, int size) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + uint32_t get, t_start; + + FIRE_RING_CH(chan); + + t_start = NOUVEAU_TIME_MSEC(); + while (dma->free < size) { + CHECK_TIMEOUT(); + + get = READ_GET(nvchan); + if (!LOCAL_GET(dma, &get)) + continue; + + if (dma->put >= get) { + dma->free = dma->max - dma->cur; + + if (dma->free < size) { +#ifdef NOUVEAU_DMA_DEBUG + dma->push_free = 1; +#endif + OUT_RING_CH(chan, 0x20000000 | dma->base); + if (get <= RING_SKIPS) { + /*corner case - will be idle*/ + if (dma->put <= RING_SKIPS) + WRITE_PUT(nvchan, + RING_SKIPS + 1); + + do { + CHECK_TIMEOUT(); + get = READ_GET(nvchan); + if (!LOCAL_GET(dma, &get)) + get = 0; + } while (get <= RING_SKIPS); + } + + WRITE_PUT(nvchan, RING_SKIPS); + dma->cur = dma->put = RING_SKIPS; + dma->free = get - (RING_SKIPS + 1); + } + } else { + dma->free = get - dma->cur - 1; + } + } + + return 0; +} + +#ifdef NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF +static void +nouveau_dma_parse_pushbuf(struct nouveau_channel *chan, int get, int put) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + unsigned mthd_count = 0; + + while (get != put) { + uint32_t gpuget = (get << 2) + nvchan->drm.put_base; + uint32_t data; + + if (get < 0 || get >= nvchan->drm.cmdbuf_size) { + NOUVEAU_ERR("DMA_PT 0x%08x\n", gpuget); + assert(0); + } + data = nvchan->pushbuf[get++]; + + if (mthd_count) { + NOUVEAU_MSG("0x%08x 0x%08x\n", gpuget, data); + mthd_count--; + continue; + } + + switch (data & 0x60000000) { + case 0x00000000: + mthd_count = (data >> 18) & 0x7ff; + NOUVEAU_MSG("0x%08x 0x%08x MTHD " + "Sc %d Mthd 0x%04x Size %d\n", + gpuget, data, (data>>13) & 7, data & 0x1ffc, + mthd_count); + break; + case 0x20000000: + get = (data & 0x1ffffffc) >> 2; + NOUVEAU_MSG("0x%08x 0x%08x JUMP 0x%08x\n", + gpuget, data, data & 0x1ffffffc); + continue; + case 0x40000000: + mthd_count = (data >> 18) & 0x7ff; + NOUVEAU_MSG("0x%08x 0x%08x NINC " + "Sc %d Mthd 0x%04x Size %d\n", + gpuget, data, (data>>13) & 7, data & 0x1ffc, + mthd_count); + break; + case 0x60000000: + /* DMA_OPCODE_CALL apparently, doesn't seem to work on + * my NV40 at least.. + */ + /* fall-through */ + default: + NOUVEAU_MSG("DMA_PUSHER 0x%08x 0x%08x\n", + gpuget, data); + assert(0); + } + } +} +#endif + +void +nouveau_dma_kickoff(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + + if (dma->cur == dma->put) + return; + +#ifdef NOUVEAU_DMA_DEBUG + if (dma->push_free) { + NOUVEAU_ERR("Packet incomplete: %d left\n", dma->push_free); + return; + } +#endif + +#ifdef NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF + nouveau_dma_parse_pushbuf(chan, dma->put, dma->cur); +#endif + + WRITE_PUT(nvchan, dma->cur); +} diff --git a/src/gallium/winsys/drm/nouveau/nouveau_dma.h b/src/gallium/winsys/drm/nouveau/nouveau_dma.h new file mode 100644 index 00000000000..cfa6d26e828 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_dma.h @@ -0,0 +1,143 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_DMA_H__ +#define __NOUVEAU_DMA_H__ + +#include <string.h> +#include "nouveau_drmif.h" +#include "nouveau_local.h" + +#define RING_SKIPS 8 + +extern int nouveau_dma_wait(struct nouveau_channel *chan, int size); +extern void nouveau_dma_subc_bind(struct nouveau_grobj *); +extern void nouveau_dma_channel_init(struct nouveau_channel *); +extern void nouveau_dma_kickoff(struct nouveau_channel *); + +#ifdef NOUVEAU_DMA_DEBUG +static char faulty[1024]; +#endif + +static inline void +nouveau_dma_out(struct nouveau_channel *chan, uint32_t data) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + +#ifdef NOUVEAU_DMA_DEBUG + if (dma->push_free == 0) { + NOUVEAU_ERR("No space left in packet at %s\n", faulty); + return; + } + dma->push_free--; +#endif +#ifdef NOUVEAU_DMA_TRACE + { + uint32_t offset = (dma->cur << 2) + dma->base; + NOUVEAU_MSG("\tOUT_RING %d/0x%08x -> 0x%08x\n", + nvchan->drm.channel, offset, data); + } +#endif + nvchan->pushbuf[dma->cur + (dma->base - nvchan->drm.put_base)/4] = data; + dma->cur++; +} + +static inline void +nouveau_dma_outp(struct nouveau_channel *chan, uint32_t *ptr, int size) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + (void)dma; + +#ifdef NOUVEAU_DMA_DEBUG + if (dma->push_free < size) { + NOUVEAU_ERR("Packet too small. Free=%d, Need=%d\n", + dma->push_free, size); + return; + } +#endif +#ifdef NOUVEAU_DMA_TRACE + while (size--) { + nouveau_dma_out(chan, *ptr); + ptr++; + } +#else + memcpy(&nvchan->pushbuf[dma->cur], ptr, size << 2); +#ifdef NOUVEAU_DMA_DEBUG + dma->push_free -= size; +#endif + dma->cur += size; +#endif +} + +static inline void +nouveau_dma_space(struct nouveau_channel *chan, int size) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + + if (dma->free < size) { + if (nouveau_dma_wait(chan, size) && chan->hang_notify) + chan->hang_notify(chan); + } + dma->free -= size; +#ifdef NOUVEAU_DMA_DEBUG + dma->push_free = size; +#endif +} + +static inline void +nouveau_dma_begin(struct nouveau_channel *chan, struct nouveau_grobj *grobj, + int method, int size, const char* file, int line) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *dma = nvchan->dma; + (void)dma; + +#ifdef NOUVEAU_DMA_TRACE + NOUVEAU_MSG("BEGIN_RING %d/%08x/%d/0x%04x/%d\n", nvchan->drm.channel, + grobj->handle, grobj->subc, method, size); +#endif + +#ifdef NOUVEAU_DMA_DEBUG + if (dma->push_free) { + NOUVEAU_ERR("Previous packet incomplete: %d left at %s\n", + dma->push_free, faulty); + return; + } + sprintf(faulty,"%s:%d",file,line); +#endif + + nouveau_dma_space(chan, (size + 1)); + nouveau_dma_out(chan, (size << 18) | (grobj->subc << 13) | method); +} + +#define RING_SPACE_CH(ch,sz) nouveau_dma_space((ch), (sz)) +#define BEGIN_RING_CH(ch,gr,m,sz) nouveau_dma_begin((ch), (gr), (m), (sz), __FUNCTION__, __LINE__ ) +#define OUT_RING_CH(ch, data) nouveau_dma_out((ch), (data)) +#define OUT_RINGp_CH(ch,ptr,dwords) nouveau_dma_outp((ch), (void*)(ptr), \ + (dwords)) +#define FIRE_RING_CH(ch) nouveau_dma_kickoff((ch)) +#define WAIT_RING_CH(ch,sz) nouveau_dma_wait((ch), (sz)) + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_dri.h b/src/gallium/winsys/drm/nouveau/nouveau_dri.h new file mode 100644 index 00000000000..1207c2d609c --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_dri.h @@ -0,0 +1,28 @@ +#ifndef _NOUVEAU_DRI_ +#define _NOUVEAU_DRI_ + +#include "xf86drm.h" +#include "drm.h" +#include "nouveau_drm.h" + +struct nouveau_dri { + uint32_t device_id; /**< \brief PCI device ID */ + uint32_t width; /**< \brief width in pixels of display */ + uint32_t height; /**< \brief height in scanlines of display */ + uint32_t depth; /**< \brief depth of display (8, 15, 16, 24) */ + uint32_t bpp; /**< \brief bit depth of display (8, 16, 24, 32) */ + + uint32_t bus_type; /**< \brief ths bus type */ + uint32_t bus_mode; /**< \brief bus mode (used for AGP, maybe also for PCI-E ?) */ + + uint32_t front_offset; /**< \brief front buffer offset */ + uint32_t front_pitch; /**< \brief front buffer pitch */ + uint32_t back_offset; /**< \brief private back buffer offset */ + uint32_t back_pitch; /**< \brief private back buffer pitch */ + uint32_t depth_offset; /**< \brief private depth buffer offset */ + uint32_t depth_pitch; /**< \brief private depth buffer pitch */ + +}; + +#endif + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_drmif.h b/src/gallium/winsys/drm/nouveau/nouveau_drmif.h new file mode 100644 index 00000000000..dcd6a5eb0a4 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_drmif.h @@ -0,0 +1,310 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_DRMIF_H__ +#define __NOUVEAU_DRMIF_H__ + +#include <stdint.h> +#include <xf86drm.h> +#include <nouveau_drm.h> + +#include "nouveau/nouveau_device.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_notifier.h" +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" + +struct nouveau_device_priv { + struct nouveau_device base; + + int fd; + drm_context_t ctx; + drmLock *lock; + int needs_close; + + struct drm_nouveau_mem_alloc sa; + void *sa_map; + struct nouveau_resource *sa_heap; +}; +#define nouveau_device(n) ((struct nouveau_device_priv *)(n)) + +extern int +nouveau_device_open_existing(struct nouveau_device **, int close, + int fd, drm_context_t ctx); + +extern int +nouveau_device_open(struct nouveau_device **, const char *busid); + +extern void +nouveau_device_close(struct nouveau_device **); + +extern int +nouveau_device_get_param(struct nouveau_device *, uint64_t param, uint64_t *v); + +extern int +nouveau_device_set_param(struct nouveau_device *, uint64_t param, uint64_t val); + +struct nouveau_fence { + struct nouveau_channel *channel; +}; + +struct nouveau_fence_cb { + struct nouveau_fence_cb *next; + void (*func)(void *); + void *priv; +}; + +struct nouveau_fence_priv { + struct nouveau_fence base; + int refcount; + + struct nouveau_fence *next; + struct nouveau_fence_cb *signal_cb; + + uint32_t sequence; + int emitted; + int signalled; +}; +#define nouveau_fence(n) ((struct nouveau_fence_priv *)(n)) + +extern int +nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **); + +extern int +nouveau_fence_ref(struct nouveau_fence *, struct nouveau_fence **); + +extern int +nouveau_fence_signal_cb(struct nouveau_fence *, void (*)(void *), void *); + +extern void +nouveau_fence_emit(struct nouveau_fence *); + +extern int +nouveau_fence_wait(struct nouveau_fence **); + +extern void +nouveau_fence_flush(struct nouveau_channel *); + +struct nouveau_pushbuf_reloc { + struct nouveau_pushbuf_bo *pbbo; + uint32_t *ptr; + uint32_t flags; + uint32_t data; + uint32_t vor; + uint32_t tor; +}; + +struct nouveau_pushbuf_bo { + struct nouveau_channel *channel; + struct nouveau_bo *bo; + unsigned flags; + unsigned handled; +}; + +#define NOUVEAU_PUSHBUF_MAX_BUFFERS 1024 +#define NOUVEAU_PUSHBUF_MAX_RELOCS 1024 +struct nouveau_pushbuf_priv { + struct nouveau_pushbuf base; + + struct nouveau_fence *fence; + + unsigned nop_jump; + unsigned start; + unsigned size; + + struct nouveau_pushbuf_bo *buffers; + unsigned nr_buffers; + struct nouveau_pushbuf_reloc *relocs; + unsigned nr_relocs; +}; +#define nouveau_pushbuf(n) ((struct nouveau_pushbuf_priv *)(n)) + +#define pbbo_to_ptr(o) ((uint64_t)(unsigned long)(o)) +#define ptr_to_pbbo(h) ((struct nouveau_pushbuf_bo *)(unsigned long)(h)) +#define pbrel_to_ptr(o) ((uint64_t)(unsigned long)(o)) +#define ptr_to_pbrel(h) ((struct nouveau_pushbuf_reloc *)(unsigned long)(h)) +#define bo_to_ptr(o) ((uint64_t)(unsigned long)(o)) +#define ptr_to_bo(h) ((struct nouveau_bo_priv *)(unsigned long)(h)) + +extern int +nouveau_pushbuf_init(struct nouveau_channel *); + +extern int +nouveau_pushbuf_flush(struct nouveau_channel *, unsigned min); + +extern int +nouveau_pushbuf_emit_reloc(struct nouveau_channel *, void *ptr, + struct nouveau_bo *, uint32_t data, uint32_t flags, + uint32_t vor, uint32_t tor); + +struct nouveau_dma_priv { + uint32_t base; + uint32_t max; + uint32_t cur; + uint32_t put; + uint32_t free; + + int push_free; +} dma; + +struct nouveau_channel_priv { + struct nouveau_channel base; + + struct drm_nouveau_channel_alloc drm; + + uint32_t *pushbuf; + void *notifier_block; + + volatile uint32_t *user; + volatile uint32_t *put; + volatile uint32_t *get; + volatile uint32_t *ref_cnt; + + struct nouveau_dma_priv dma_master; + struct nouveau_dma_priv dma_bufmgr; + struct nouveau_dma_priv *dma; + + struct nouveau_fence *fence_head; + struct nouveau_fence *fence_tail; + uint32_t fence_sequence; + + struct nouveau_pushbuf_priv pb; + + unsigned user_charge; +}; +#define nouveau_channel(n) ((struct nouveau_channel_priv *)(n)) + +extern int +nouveau_channel_alloc(struct nouveau_device *, uint32_t fb, uint32_t tt, + struct nouveau_channel **); + +extern void +nouveau_channel_free(struct nouveau_channel **); + +struct nouveau_grobj_priv { + struct nouveau_grobj base; +}; +#define nouveau_grobj(n) ((struct nouveau_grobj_priv *)(n)) + +extern int nouveau_grobj_alloc(struct nouveau_channel *, uint32_t handle, + int class, struct nouveau_grobj **); +extern int nouveau_grobj_ref(struct nouveau_channel *, uint32_t handle, + struct nouveau_grobj **); +extern void nouveau_grobj_free(struct nouveau_grobj **); + + +struct nouveau_notifier_priv { + struct nouveau_notifier base; + + struct drm_nouveau_notifierobj_alloc drm; + volatile void *map; +}; +#define nouveau_notifier(n) ((struct nouveau_notifier_priv *)(n)) + +extern int +nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle, int count, + struct nouveau_notifier **); + +extern void +nouveau_notifier_free(struct nouveau_notifier **); + +extern void +nouveau_notifier_reset(struct nouveau_notifier *, int id); + +extern uint32_t +nouveau_notifier_status(struct nouveau_notifier *, int id); + +extern uint32_t +nouveau_notifier_return_val(struct nouveau_notifier *, int id); + +extern int +nouveau_notifier_wait_status(struct nouveau_notifier *, int id, int status, + int timeout); + +struct nouveau_bo_priv { + struct nouveau_bo base; + + struct nouveau_pushbuf_bo *pending; + struct nouveau_fence *fence; + struct nouveau_fence *wr_fence; + + struct drm_nouveau_mem_alloc drm; + void *map; + + void *sysmem; + int user; + + int refcount; + + uint64_t offset; + uint64_t flags; + int tiled; +}; +#define nouveau_bo(n) ((struct nouveau_bo_priv *)(n)) + +extern int +nouveau_bo_init(struct nouveau_device *); + +extern void +nouveau_bo_takedown(struct nouveau_device *); + +extern int +nouveau_bo_new(struct nouveau_device *, uint32_t flags, int align, int size, + struct nouveau_bo **); + +extern int +nouveau_bo_user(struct nouveau_device *, void *ptr, int size, + struct nouveau_bo **); + +extern int +nouveau_bo_ref(struct nouveau_device *, uint64_t handle, struct nouveau_bo **); + +extern int +nouveau_bo_set_status(struct nouveau_bo *, uint32_t flags); + +extern void +nouveau_bo_del(struct nouveau_bo **); + +extern int +nouveau_bo_map(struct nouveau_bo *, uint32_t flags); + +extern void +nouveau_bo_unmap(struct nouveau_bo *); + +extern int +nouveau_bo_validate(struct nouveau_channel *, struct nouveau_bo *, + uint32_t flags); + +extern int +nouveau_resource_init(struct nouveau_resource **heap, unsigned start, + unsigned size); + +extern int +nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, + struct nouveau_resource **); + +extern void +nouveau_resource_free(struct nouveau_resource **); + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_fence.c b/src/gallium/winsys/drm/nouveau/nouveau_fence.c new file mode 100644 index 00000000000..e7b0b4ff079 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_fence.c @@ -0,0 +1,214 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> +#include <assert.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" +#include "nouveau_local.h" + +static void +nouveau_fence_del_unsignalled(struct nouveau_fence *fence) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(fence->channel); + struct nouveau_fence *le; + + if (nvchan->fence_head == fence) { + nvchan->fence_head = nouveau_fence(fence)->next; + if (nvchan->fence_head == NULL) + nvchan->fence_tail = NULL; + return; + } + + le = nvchan->fence_head; + while (le && nouveau_fence(le)->next != fence) + le = nouveau_fence(le)->next; + assert(le && nouveau_fence(le)->next == fence); + nouveau_fence(le)->next = nouveau_fence(fence)->next; + if (nvchan->fence_tail == fence) + nvchan->fence_tail = le; +} + +static void +nouveau_fence_del(struct nouveau_fence **fence) +{ + struct nouveau_fence_priv *nvfence; + + if (!fence || !*fence) + return; + nvfence = nouveau_fence(*fence); + *fence = NULL; + + if (--nvfence->refcount) + return; + + if (nvfence->emitted && !nvfence->signalled) { + if (nvfence->signal_cb) { + nvfence->refcount++; + nouveau_fence_wait((void *)&nvfence); + return; + } + + nouveau_fence_del_unsignalled(&nvfence->base); + } + free(nvfence); +} + +int +nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **fence) +{ + struct nouveau_fence_priv *nvfence; + + if (!chan || !fence || *fence) + return -EINVAL; + + nvfence = calloc(1, sizeof(struct nouveau_fence_priv)); + if (!nvfence) + return -ENOMEM; + nvfence->base.channel = chan; + nvfence->refcount = 1; + + *fence = &nvfence->base; + return 0; +} + +int +nouveau_fence_ref(struct nouveau_fence *ref, struct nouveau_fence **fence) +{ + struct nouveau_fence_priv *nvfence; + + if (!fence) + return -EINVAL; + + if (*fence) { + nouveau_fence_del(fence); + *fence = NULL; + } + + if (ref) { + nvfence = nouveau_fence(ref); + nvfence->refcount++; + *fence = &nvfence->base; + } + + return 0; +} + +int +nouveau_fence_signal_cb(struct nouveau_fence *fence, void (*func)(void *), + void *priv) +{ + struct nouveau_fence_priv *nvfence = nouveau_fence(fence); + struct nouveau_fence_cb *cb; + + if (!nvfence || !func) + return -EINVAL; + + cb = malloc(sizeof(struct nouveau_fence_cb)); + if (!cb) + return -ENOMEM; + + cb->func = func; + cb->priv = priv; + cb->next = nvfence->signal_cb; + nvfence->signal_cb = cb; + return 0; +} + +void +nouveau_fence_emit(struct nouveau_fence *fence) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(fence->channel); + struct nouveau_fence_priv *nvfence = nouveau_fence(fence); + + nvfence->emitted = 1; + nvfence->sequence = ++nvchan->fence_sequence; + if (nvfence->sequence == 0xffffffff) + NOUVEAU_ERR("AII wrap unhandled\n"); + + /*XXX: assumes subc 0 is populated */ + RING_SPACE_CH(fence->channel, 2); + OUT_RING_CH (fence->channel, 0x00040050); + OUT_RING_CH (fence->channel, nvfence->sequence); + + if (nvchan->fence_tail) { + nouveau_fence(nvchan->fence_tail)->next = fence; + } else { + nvchan->fence_head = fence; + } + nvchan->fence_tail = fence; +} + +void +nouveau_fence_flush(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + uint32_t sequence = *nvchan->ref_cnt; + + while (nvchan->fence_head) { + struct nouveau_fence_priv *nvfence; + + nvfence = nouveau_fence(nvchan->fence_head); + if (nvfence->sequence > sequence) + break; + nouveau_fence_del_unsignalled(&nvfence->base); + nvfence->signalled = 1; + + if (nvfence->signal_cb) { + struct nouveau_fence *fence = NULL; + + nouveau_fence_ref(&nvfence->base, &fence); + + while (nvfence->signal_cb) { + struct nouveau_fence_cb *cb; + + cb = nvfence->signal_cb; + nvfence->signal_cb = cb->next; + cb->func(cb->priv); + free(cb); + } + + nouveau_fence_ref(NULL, &fence); + } + } +} + +int +nouveau_fence_wait(struct nouveau_fence **fence) +{ + struct nouveau_fence_priv *nvfence; + + if (!fence || !*fence) + return -EINVAL; + nvfence = nouveau_fence(*fence); + + if (nvfence->emitted) { + while (!nvfence->signalled) + nouveau_fence_flush(nvfence->base.channel); + } + nouveau_fence_ref(NULL, fence); + + return 0; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_grobj.c b/src/gallium/winsys/drm/nouveau/nouveau_grobj.c new file mode 100644 index 00000000000..51523897d58 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_grobj.c @@ -0,0 +1,107 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> + +#include "nouveau_drmif.h" + +int +nouveau_grobj_alloc(struct nouveau_channel *chan, uint32_t handle, + int class, struct nouveau_grobj **grobj) +{ + struct nouveau_device_priv *nvdev = nouveau_device(chan->device); + struct nouveau_grobj_priv *nvgrobj; + struct drm_nouveau_grobj_alloc g; + int ret; + + if (!nvdev || !grobj || *grobj) + return -EINVAL; + + nvgrobj = calloc(1, sizeof(*nvgrobj)); + if (!nvgrobj) + return -ENOMEM; + nvgrobj->base.channel = chan; + nvgrobj->base.handle = handle; + nvgrobj->base.grclass = class; + + g.channel = chan->id; + g.handle = handle; + g.class = class; + ret = drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GROBJ_ALLOC, + &g, sizeof(g)); + if (ret) { + nouveau_grobj_free((void *)&nvgrobj); + return ret; + } + + *grobj = &nvgrobj->base; + return 0; +} + +int +nouveau_grobj_ref(struct nouveau_channel *chan, uint32_t handle, + struct nouveau_grobj **grobj) +{ + struct nouveau_grobj_priv *nvgrobj; + + if (!chan || !grobj || *grobj) + return -EINVAL; + + nvgrobj = calloc(1, sizeof(struct nouveau_grobj_priv)); + if (!nvgrobj) + return -ENOMEM; + nvgrobj->base.channel = chan; + nvgrobj->base.handle = handle; + nvgrobj->base.grclass = 0; + + *grobj = &nvgrobj->base; + return 0; +} + +void +nouveau_grobj_free(struct nouveau_grobj **grobj) +{ + struct nouveau_device_priv *nvdev; + struct nouveau_channel_priv *chan; + struct nouveau_grobj_priv *nvgrobj; + + if (!grobj || !*grobj) + return; + nvgrobj = nouveau_grobj(*grobj); + *grobj = NULL; + + + chan = nouveau_channel(nvgrobj->base.channel); + nvdev = nouveau_device(chan->base.device); + + if (nvgrobj->base.grclass) { + struct drm_nouveau_gpuobj_free f; + + f.channel = chan->drm.channel; + f.handle = nvgrobj->base.handle; + drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, + &f, sizeof(f)); + } + free(nvgrobj); +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_local.h b/src/gallium/winsys/drm/nouveau/nouveau_local.h new file mode 100644 index 00000000000..e878a408037 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_local.h @@ -0,0 +1,117 @@ +#ifndef __NOUVEAU_LOCAL_H__ +#define __NOUVEAU_LOCAL_H__ + +#include "pipe/p_compiler.h" +#include "nouveau_winsys_pipe.h" +#include <stdio.h> + +struct pipe_buffer; + +/* Debug output */ +#define NOUVEAU_MSG(fmt, args...) do { \ + fprintf(stdout, "nouveau: "fmt, ##args); \ + fflush(stdout); \ +} while(0) + +#define NOUVEAU_ERR(fmt, args...) do { \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); \ + fflush(stderr); \ +} while(0) + +#define NOUVEAU_TIME_MSEC() 0 + +/* User FIFO control */ +//#define NOUVEAU_DMA_TRACE +//#define NOUVEAU_DMA_DEBUG +//#define NOUVEAU_DMA_DUMP_POSTRELOC_PUSHBUF +#define NOUVEAU_DMA_BARRIER +#define NOUVEAU_DMA_TIMEOUT 2000 + +/* Push buffer access macros */ +static INLINE void +OUT_RING(struct nouveau_channel *chan, unsigned data) +{ + *(chan->pushbuf->cur++) = (data); +} + +static INLINE void +OUT_RINGp(struct nouveau_channel *chan, uint32_t *data, unsigned size) +{ + memcpy(chan->pushbuf->cur, data, size * 4); + chan->pushbuf->cur += size; +} + +static INLINE void +OUT_RINGf(struct nouveau_channel *chan, float f) +{ + union { uint32_t i; float f; } c; + c.f = f; + OUT_RING(chan, c.i); +} + +static INLINE void +BEGIN_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, + unsigned mthd, unsigned size) +{ + if (chan->pushbuf->remaining < (size + 1)) + nouveau_pushbuf_flush(chan, (size + 1)); + OUT_RING(chan, (gr->subc << 13) | (size << 18) | mthd); + chan->pushbuf->remaining -= (size + 1); +} + +static INLINE void +FIRE_RING(struct nouveau_channel *chan) +{ + nouveau_pushbuf_flush(chan, 0); +} + +static INLINE void +BIND_RING(struct nouveau_channel *chan, struct nouveau_grobj *gr, unsigned subc) +{ + gr->subc = subc; + BEGIN_RING(chan, gr, 0x0000, 1); + OUT_RING (chan, gr->handle); +} + +static INLINE void +OUT_RELOC(struct nouveau_channel *chan, struct nouveau_bo *bo, + unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ + nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, bo, + data, flags, vor, tor); +} + +/* Raw data + flags depending on FB/TT buffer */ +static INLINE void +OUT_RELOCd(struct nouveau_channel *chan, struct nouveau_bo *bo, + unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ + OUT_RELOC(chan, bo, data, flags | NOUVEAU_BO_OR, vor, tor); +} + +/* FB/TT object handle */ +static INLINE void +OUT_RELOCo(struct nouveau_channel *chan, struct nouveau_bo *bo, + unsigned flags) +{ + OUT_RELOC(chan, bo, 0, flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); +} + +/* Low 32-bits of offset */ +static INLINE void +OUT_RELOCl(struct nouveau_channel *chan, struct nouveau_bo *bo, + unsigned delta, unsigned flags) +{ + OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_LOW, 0, 0); +} + +/* High 32-bits of offset */ +static INLINE void +OUT_RELOCh(struct nouveau_channel *chan, struct nouveau_bo *bo, + unsigned delta, unsigned flags) +{ + OUT_RELOC(chan, bo, delta, flags | NOUVEAU_BO_HIGH, 0, 0); +} + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_lock.c b/src/gallium/winsys/drm/nouveau/nouveau_lock.c new file mode 100644 index 00000000000..9adb9ac8547 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_lock.c @@ -0,0 +1,94 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "glapi/glthread.h" +#include <GL/internal/glcore.h> + +#include "nouveau_context.h" +#include "nouveau_screen.h" + +_glthread_DECLARE_STATIC_MUTEX( lockMutex ); + +static void +nouveau_contended_lock(struct nouveau_context *nv, GLuint flags) +{ + __DRIdrawablePrivate *dPriv = nv->dri_drawable; + __DRIscreenPrivate *sPriv = nv->dri_screen; + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + drmGetLock(nvdev->fd, nvdev->ctx, flags); + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dPriv) + DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); +} + +/* Lock the hardware and validate our state. + */ +void +LOCK_HARDWARE(struct nouveau_context *nv) +{ + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + char __ret=0; + + _glthread_LOCK_MUTEX(lockMutex); + assert(!nv->locked); + + DRM_CAS(nvdev->lock, nvdev->ctx, + (DRM_LOCK_HELD | nvdev->ctx), __ret); + + if (__ret) + nouveau_contended_lock(nv, 0); + nv->locked = GL_TRUE; +} + + + /* Unlock the hardware using the global current context + */ +void +UNLOCK_HARDWARE(struct nouveau_context *nv) +{ + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + assert(nv->locked); + nv->locked = GL_FALSE; + + DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); + + _glthread_UNLOCK_MUTEX(lockMutex); +} diff --git a/src/gallium/winsys/drm/nouveau/nouveau_notifier.c b/src/gallium/winsys/drm/nouveau/nouveau_notifier.c new file mode 100644 index 00000000000..01e8f38440e --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_notifier.c @@ -0,0 +1,137 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> + +#include "nouveau_drmif.h" +#include "nouveau_local.h" + +#define NOTIFIER(__v) \ + struct nouveau_notifier_priv *nvnotify = nouveau_notifier(notifier); \ + volatile uint32_t *__v = (void*)nvnotify->map + (id * 32) + +int +nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle, + int count, struct nouveau_notifier **notifier) +{ + struct nouveau_notifier_priv *nvnotify; + int ret; + + if (!chan || !notifier || *notifier) + return -EINVAL; + + nvnotify = calloc(1, sizeof(struct nouveau_notifier_priv)); + if (!nvnotify) + return -ENOMEM; + nvnotify->base.channel = chan; + nvnotify->base.handle = handle; + + nvnotify->drm.channel = chan->id; + nvnotify->drm.handle = handle; + nvnotify->drm.count = count; + if ((ret = drmCommandWriteRead(nouveau_device(chan->device)->fd, + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, + &nvnotify->drm, + sizeof(nvnotify->drm)))) { + nouveau_notifier_free((void *)&nvnotify); + return ret; + } + + nvnotify->map = (void *)nouveau_channel(chan)->notifier_block + + nvnotify->drm.offset; + *notifier = &nvnotify->base; + return 0; +} + +void +nouveau_notifier_free(struct nouveau_notifier **notifier) +{ + + struct nouveau_notifier_priv *nvnotify; + struct nouveau_channel_priv *nvchan; + struct nouveau_device_priv *nvdev; + struct drm_nouveau_gpuobj_free f; + + if (!notifier || !*notifier) + return; + nvnotify = nouveau_notifier(*notifier); + *notifier = NULL; + + nvchan = nouveau_channel(nvnotify->base.channel); + nvdev = nouveau_device(nvchan->base.device); + + f.channel = nvchan->drm.channel; + f.handle = nvnotify->base.handle; + drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GPUOBJ_FREE, &f, sizeof(f)); + free(nvnotify); +} + +void +nouveau_notifier_reset(struct nouveau_notifier *notifier, int id) +{ + NOTIFIER(n); + + n[NV_NOTIFY_TIME_0 /4] = 0x00000000; + n[NV_NOTIFY_TIME_1 /4] = 0x00000000; + n[NV_NOTIFY_RETURN_VALUE/4] = 0x00000000; + n[NV_NOTIFY_STATE /4] = (NV_NOTIFY_STATE_STATUS_IN_PROCESS << + NV_NOTIFY_STATE_STATUS_SHIFT); +} + +uint32_t +nouveau_notifier_status(struct nouveau_notifier *notifier, int id) +{ + NOTIFIER(n); + + return n[NV_NOTIFY_STATE/4] >> NV_NOTIFY_STATE_STATUS_SHIFT; +} + +uint32_t +nouveau_notifier_return_val(struct nouveau_notifier *notifier, int id) +{ + NOTIFIER(n); + + return n[NV_NOTIFY_RETURN_VALUE/4]; +} + +int +nouveau_notifier_wait_status(struct nouveau_notifier *notifier, int id, + int status, int timeout) +{ + NOTIFIER(n); + uint32_t time = 0, t_start = NOUVEAU_TIME_MSEC(); + + while (time <= timeout) { + uint32_t v; + + v = n[NV_NOTIFY_STATE/4] >> NV_NOTIFY_STATE_STATUS_SHIFT; + if (v == status) + return 0; + + if (timeout) + time = NOUVEAU_TIME_MSEC() - t_start; + } + + return -EBUSY; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_pushbuf.c b/src/gallium/winsys/drm/nouveau/nouveau_pushbuf.c new file mode 100644 index 00000000000..815046ba85f --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_pushbuf.c @@ -0,0 +1,271 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> +#include <assert.h> + +#include "nouveau_drmif.h" +#include "nouveau_dma.h" + +#define PB_BUFMGR_DWORDS (4096 / 2) +#define PB_MIN_USER_DWORDS 2048 + +static int +nouveau_pushbuf_space(struct nouveau_channel *chan, unsigned min) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + + assert((min + 1) <= nvchan->dma->max); + + /* Wait for enough space in push buffer */ + min = min < PB_MIN_USER_DWORDS ? PB_MIN_USER_DWORDS : min; + min += 1; /* a bit extra for the NOP */ + if (nvchan->dma->free < min) + WAIT_RING_CH(chan, min); + + /* Insert NOP, may turn into a jump later */ + RING_SPACE_CH(chan, 1); + nvpb->nop_jump = nvchan->dma->cur; + OUT_RING_CH(chan, 0); + + /* Any remaining space is available to the user */ + nvpb->start = nvchan->dma->cur; + nvpb->size = nvchan->dma->free; + nvpb->base.channel = chan; + nvpb->base.remaining = nvpb->size; + nvpb->base.cur = &nvchan->pushbuf[nvpb->start]; + + /* Create a new fence object for this "frame" */ + nouveau_fence_ref(NULL, &nvpb->fence); + nouveau_fence_new(chan, &nvpb->fence); + + return 0; +} + +int +nouveau_pushbuf_init(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_dma_priv *m = &nvchan->dma_master; + struct nouveau_dma_priv *b = &nvchan->dma_bufmgr; + int i; + + if (!nvchan) + return -EINVAL; + + /* Reassign last bit of push buffer for a "separate" bufmgr + * ring buffer + */ + m->max -= PB_BUFMGR_DWORDS; + m->free -= PB_BUFMGR_DWORDS; + + b->base = m->base + ((m->max + 2) << 2); + b->max = PB_BUFMGR_DWORDS - 2; + b->cur = b->put = 0; + b->free = b->max - b->cur; + + /* Some NOPs just to be safe + *XXX: RING_SKIPS + */ + nvchan->dma = b; + RING_SPACE_CH(chan, 8); + for (i = 0; i < 8; i++) + OUT_RING_CH(chan, 0); + nvchan->dma = m; + + nouveau_pushbuf_space(chan, 0); + chan->pushbuf = &nvchan->pb.base; + + nvchan->pb.buffers = calloc(NOUVEAU_PUSHBUF_MAX_BUFFERS, + sizeof(struct nouveau_pushbuf_bo)); + nvchan->pb.relocs = calloc(NOUVEAU_PUSHBUF_MAX_RELOCS, + sizeof(struct nouveau_pushbuf_reloc)); + return 0; +} + +static uint32_t +nouveau_pushbuf_calc_reloc(struct nouveau_bo *bo, + struct nouveau_pushbuf_reloc *r) +{ + uint32_t push; + + if (r->flags & NOUVEAU_BO_LOW) { + push = bo->offset + r->data; + } else + if (r->flags & NOUVEAU_BO_HIGH) { + push = (bo->offset + r->data) >> 32; + } else { + push = r->data; + } + + if (r->flags & NOUVEAU_BO_OR) { + if (bo->flags & NOUVEAU_BO_VRAM) + push |= r->vor; + else + push |= r->tor; + } + + return push; +} + +/* This would be our TTM "superioctl" */ +int +nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + int ret, i; + + if (nvpb->base.remaining == nvpb->size) + return 0; + + nouveau_fence_flush(chan); + + nvpb->size -= nvpb->base.remaining; + nvchan->dma->cur += nvpb->size; + nvchan->dma->free -= nvpb->size; + assert(nvchan->dma->cur <= nvchan->dma->max); + + nvchan->dma = &nvchan->dma_bufmgr; + nvchan->pushbuf[nvpb->nop_jump] = 0x20000000 | + (nvchan->dma->base + (nvchan->dma->cur << 2)); + + /* Validate buffers + apply relocations */ + nvchan->user_charge = 0; + for (i = 0; i < nvpb->nr_relocs; i++) { + struct nouveau_pushbuf_reloc *r = &nvpb->relocs[i]; + struct nouveau_pushbuf_bo *pbbo = r->pbbo; + struct nouveau_bo *bo = pbbo->bo; + + /* Validated, mem matches presumed, no relocation necessary */ + if (pbbo->handled & 2) { + if (!(pbbo->handled & 1)) + assert(0); + continue; + } + + /* Not yet validated, do it now */ + if (!(pbbo->handled & 1)) { + ret = nouveau_bo_validate(chan, bo, pbbo->flags); + if (ret) { + assert(0); + return ret; + } + pbbo->handled |= 1; + + if (bo->offset == nouveau_bo(bo)->offset && + bo->flags == nouveau_bo(bo)->flags) { + pbbo->handled |= 2; + continue; + } + bo->offset = nouveau_bo(bo)->offset; + bo->flags = nouveau_bo(bo)->flags; + } + + /* Apply the relocation */ + *r->ptr = nouveau_pushbuf_calc_reloc(bo, r); + } + nvpb->nr_relocs = 0; + + /* Dereference all buffers on validate list */ + for (i = 0; i < nvpb->nr_buffers; i++) { + struct nouveau_pushbuf_bo *pbbo = &nvpb->buffers[i]; + + nouveau_bo(pbbo->bo)->pending = NULL; + nouveau_bo_del(&pbbo->bo); + } + nvpb->nr_buffers = 0; + + /* Switch back to user's ring */ + RING_SPACE_CH(chan, 1); + OUT_RING_CH(chan, 0x20000000 | ((nvpb->start << 2) + + nvchan->dma_master.base)); + nvchan->dma = &nvchan->dma_master; + + /* Fence + kickoff */ + nouveau_fence_emit(nvpb->fence); + FIRE_RING_CH(chan); + + /* Allocate space for next push buffer */ + ret = nouveau_pushbuf_space(chan, min); + assert(!ret); + + return 0; +} + +static struct nouveau_pushbuf_bo * +nouveau_pushbuf_emit_buffer(struct nouveau_channel *chan, struct nouveau_bo *bo) +{ + struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(chan->pushbuf); + struct nouveau_bo_priv *nvbo = nouveau_bo(bo); + struct nouveau_pushbuf_bo *pbbo; + + if (nvbo->pending) + return nvbo->pending; + + if (nvpb->nr_buffers >= NOUVEAU_PUSHBUF_MAX_BUFFERS) + return NULL; + pbbo = nvpb->buffers + nvpb->nr_buffers++; + nvbo->pending = pbbo; + + nouveau_bo_ref(bo->device, bo->handle, &pbbo->bo); + pbbo->channel = chan; + pbbo->flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART; + pbbo->handled = 0; + return pbbo; +} + +int +nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr, + struct nouveau_bo *bo, uint32_t data, uint32_t flags, + uint32_t vor, uint32_t tor) +{ + struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(chan->pushbuf); + struct nouveau_pushbuf_bo *pbbo; + struct nouveau_pushbuf_reloc *r; + + if (nvpb->nr_relocs >= NOUVEAU_PUSHBUF_MAX_RELOCS) + return -ENOMEM; + + pbbo = nouveau_pushbuf_emit_buffer(chan, bo); + if (!pbbo) + return -ENOMEM; + pbbo->flags |= (flags & NOUVEAU_BO_RDWR); + pbbo->flags &= (flags | NOUVEAU_BO_RDWR); + + r = nvpb->relocs + nvpb->nr_relocs++; + r->pbbo = pbbo; + r->ptr = ptr; + r->flags = flags; + r->data = data; + r->vor = vor; + r->tor = tor; + + if (flags & NOUVEAU_BO_DUMMY) + *(uint32_t *)ptr = 0; + else + *(uint32_t *)ptr = nouveau_pushbuf_calc_reloc(bo, r); + return 0; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_resource.c b/src/gallium/winsys/drm/nouveau/nouveau_resource.c new file mode 100644 index 00000000000..3bbcb5c45e0 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_resource.c @@ -0,0 +1,116 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> + +#include "nouveau_drmif.h" +#include "nouveau_local.h" + +int +nouveau_resource_init(struct nouveau_resource **heap, + unsigned start, unsigned size) +{ + struct nouveau_resource *r; + + r = calloc(1, sizeof(struct nouveau_resource)); + if (!r) + return 1; + + r->start = start; + r->size = size; + *heap = r; + return 0; +} + +int +nouveau_resource_alloc(struct nouveau_resource *heap, int size, void *priv, + struct nouveau_resource **res) +{ + struct nouveau_resource *r; + + if (!heap || !size || !res || *res) + return 1; + + while (heap) { + if (!heap->in_use && heap->size >= size) { + r = calloc(1, sizeof(struct nouveau_resource)); + if (!r) + return 1; + + r->start = (heap->start + heap->size) - size; + r->size = size; + r->in_use = 1; + r->priv = priv; + + heap->size -= size; + + r->next = heap->next; + if (heap->next) + heap->next->prev = r; + r->prev = heap; + heap->next = r; + + *res = r; + return 0; + } + + heap = heap->next; + } + + return 1; +} + +void +nouveau_resource_free(struct nouveau_resource **res) +{ + struct nouveau_resource *r; + + if (!res || !*res) + return; + r = *res; + *res = NULL; + + r->in_use = 0; + + if (r->next && !r->next->in_use) { + struct nouveau_resource *new = r->next; + + new->prev = r->prev; + if (r->prev) + r->prev->next = new; + new->size += r->size; + new->start = r->start; + + free(r); + r = new; + } + + if (r->prev && !r->prev->in_use) { + r->prev->next = r->next; + if (r->next) + r->next->prev = r->prev; + r->prev->size += r->size; + free(r); + } + +} diff --git a/src/gallium/winsys/drm/nouveau/nouveau_screen.c b/src/gallium/winsys/drm/nouveau/nouveau_screen.c new file mode 100644 index 00000000000..df1fe7e69b4 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_screen.c @@ -0,0 +1,310 @@ +#include "utils.h" +#include "vblank.h" +#include "xmlpool.h" + +#include "pipe/p_context.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_cb_fbo.h" + +#include "nouveau_context.h" +#include "nouveau_drm.h" +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" + +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 11 +#error nouveau_drm.h version does not match expected version +#endif + +/* Extension stuff, enabling of extensions handled by Gallium's GL state + * tracker. But, we still need to define the entry points we want. + */ +#define need_GL_ARB_fragment_program +#define need_GL_ARB_multisample +#define need_GL_ARB_occlusion_query +#define need_GL_ARB_point_parameters +#define need_GL_ARB_shader_objects +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_program +#define need_GL_ARB_vertex_shader +#define need_GL_ARB_vertex_buffer_object +#define need_GL_EXT_compiled_vertex_array +#define need_GL_EXT_fog_coord +#define need_GL_EXT_secondary_color +#define need_GL_EXT_framebuffer_object +#define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 +#include "extension_helper.h" + +const struct dri_extension card_extensions[] = +{ + { "GL_ARB_multisample", GL_ARB_multisample_functions }, + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, + { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, + { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, + { "GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { NULL, 0 } +}; + +PUBLIC const char __driConfigOptions[] = +DRI_CONF_BEGIN +DRI_CONF_END; +static const GLuint __driNConfigOptions = 0; + +extern const struct dri_extension common_extensions[]; +extern const struct dri_extension nv40_extensions[]; + +static GLboolean +nouveau_screen_create(__DRIscreenPrivate *driScrnPriv) +{ + struct nouveau_dri *nv_dri = driScrnPriv->pDevPriv; + struct nouveau_screen *nv_screen; + int ret; + + if (driScrnPriv->devPrivSize != sizeof(struct nouveau_dri)) { + NOUVEAU_ERR("DRI struct mismatch between DDX/DRI\n"); + return GL_FALSE; + } + + nv_screen = CALLOC_STRUCT(nouveau_screen); + if (!nv_screen) + return GL_FALSE; + nv_screen->driScrnPriv = driScrnPriv; + driScrnPriv->private = (void *)nv_screen; + + driParseOptionInfo(&nv_screen->option_cache, + __driConfigOptions, __driNConfigOptions); + + if ((ret = nouveau_device_open_existing(&nv_screen->device, 0, + driScrnPriv->fd, 0))) { + NOUVEAU_ERR("Failed opening nouveau device: %d\n", ret); + return GL_FALSE; + } + + nv_screen->front_offset = nv_dri->front_offset; + nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); + nv_screen->front_cpp = nv_dri->bpp / 8; + nv_screen->front_height = nv_dri->height; + + return GL_TRUE; +} + +static void +nouveau_screen_destroy(__DRIscreenPrivate *driScrnPriv) +{ + struct nouveau_screen *nv_screen = driScrnPriv->private; + + driScrnPriv->private = NULL; + FREE(nv_screen); +} + +static GLboolean +nouveau_create_buffer(__DRIscreenPrivate * driScrnPriv, + __DRIdrawablePrivate * driDrawPriv, + const __GLcontextModes *glVis, GLboolean pixmapBuffer) +{ + struct nouveau_framebuffer *nvfb; + enum pipe_format colour, depth, stencil; + + if (pixmapBuffer) + return GL_FALSE; + + nvfb = CALLOC_STRUCT(nouveau_framebuffer); + if (!nvfb) + return GL_FALSE; + + if (glVis->redBits == 5) + colour = PIPE_FORMAT_R5G6B5_UNORM; + else + colour = PIPE_FORMAT_A8R8G8B8_UNORM; + + if (glVis->depthBits == 16) + depth = PIPE_FORMAT_Z16_UNORM; + else if (glVis->depthBits == 24) + depth = PIPE_FORMAT_Z24S8_UNORM; + else + depth = PIPE_FORMAT_NONE; + + if (glVis->stencilBits == 8) + stencil = PIPE_FORMAT_Z24S8_UNORM; + else + stencil = PIPE_FORMAT_NONE; + + nvfb->stfb = st_create_framebuffer(glVis, colour, depth, stencil, + driDrawPriv->w, driDrawPriv->h, + (void*)nvfb); + if (!nvfb->stfb) { + free(nvfb); + return GL_FALSE; + } + + driDrawPriv->driverPrivate = (void *)nvfb; + return GL_TRUE; +} + +static void +nouveau_destroy_buffer(__DRIdrawablePrivate * driDrawPriv) +{ + struct nouveau_framebuffer *nvfb; + + nvfb = (struct nouveau_framebuffer *)driDrawPriv->driverPrivate; + st_unreference_framebuffer(&nvfb->stfb); + free(nvfb); +} + +static struct __DriverAPIRec +nouveau_api = { + .InitDriver = nouveau_screen_create, + .DestroyScreen = nouveau_screen_destroy, + .CreateContext = nouveau_context_create, + .DestroyContext = nouveau_context_destroy, + .CreateBuffer = nouveau_create_buffer, + .DestroyBuffer = nouveau_destroy_buffer, + .SwapBuffers = nouveau_swap_buffers, + .MakeCurrent = nouveau_context_bind, + .UnbindContext = nouveau_context_unbind, + .GetSwapInfo = NULL, + .GetMSC = NULL, + .WaitForMSC = NULL, + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, + .CopySubBuffer = nouveau_copy_sub_buffer, + .setTexOffset = NULL +}; + +static __GLcontextModes * +nouveau_fill_in_modes(unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer) +{ + __GLcontextModes * modes; + __GLcontextModes * m; + unsigned num_modes; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + int i; + + static const struct { + GLenum format; + GLenum type; + } fb_format_array[] = { + { GL_RGB , GL_UNSIGNED_SHORT_5_6_5 }, + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV }, + { GL_BGR , GL_UNSIGNED_INT_8_8_8_8_REV }, + }; + + /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't + * support pageflipping at all. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML + }; + + uint8_t depth_bits_array[4] = { 0, 16, 24, 24 }; + uint8_t stencil_bits_array[4] = { 0, 0, 0, 8 }; + uint8_t msaa_samples_array[1] = { 0 }; + + depth_buffer_factor = 4; + back_buffer_factor = (have_back_buffer) ? 3 : 1; + + num_modes = ((pixel_bits==16) ? 1 : 2) * + depth_buffer_factor * back_buffer_factor * 4; + modes = (*dri_interface->createContextModes)(num_modes, + sizeof(__GLcontextModes)); + m = modes; + + for (i=((pixel_bits==16)?0:1);i<((pixel_bits==16)?1:3);i++) { + if (!driFillInModes(&m, fb_format_array[i].format, + fb_format_array[i].type, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + msaa_samples_array, 1, + GLX_TRUE_COLOR)) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + + if (!driFillInModes(&m, fb_format_array[i].format, + fb_format_array[i].type, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + msaa_samples_array, 1, + GLX_DIRECT_COLOR)) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + } + + return modes; +} +PUBLIC void * +__driCreateNewScreen_20050727(__DRInativeDisplay *dpy, int scrn, + __DRIscreen *psc, const __GLcontextModes * modes, + const __DRIversion * ddx_version, + const __DRIversion * dri_version, + const __DRIversion * drm_version, + const __DRIframebuffer * frame_buffer, + void * pSAREA, int fd, int internal_api_version, + const __DRIinterfaceMethods * interface, + __GLcontextModes ** driver_modes) +{ + __DRIscreenPrivate *psp; + static const __DRIversion ddx_expected = + { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = + { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; + struct nouveau_dri *nv_dri = NULL; + + dri_interface = interface; + + if (!driCheckDriDdxDrmVersions2("nouveau", + dri_version, &dri_expected, + ddx_version, &ddx_expected, + drm_version, &drm_expected)) { + return NULL; + } + + if (drm_expected.patch != drm_version->patch) { + fprintf(stderr, "Incompatible DRM patch level.\n" + "Expected: %d\n" "Current : %d\n", + drm_expected.patch, drm_version->patch); + return NULL; + } + + psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, + ddx_version, dri_version, drm_version, + frame_buffer, pSAREA, fd, + internal_api_version, + &nouveau_api); + if (psp == NULL) + return NULL; + nv_dri = psp->pDevPriv; + + *driver_modes = nouveau_fill_in_modes(nv_dri->bpp, + (nv_dri->bpp == 16) ? 16 : 24, + (nv_dri->bpp == 16) ? 0 : 8, + 1); + + driInitExtensions(NULL, card_extensions, GL_FALSE); + + return (void *)psp; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_screen.h b/src/gallium/winsys/drm/nouveau/nouveau_screen.h new file mode 100644 index 00000000000..388d6be9bbc --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_screen.h @@ -0,0 +1,20 @@ +#ifndef __NOUVEAU_SCREEN_H__ +#define __NOUVEAU_SCREEN_H__ + +#include "xmlconfig.h" + +struct nouveau_screen { + __DRIscreenPrivate *driScrnPriv; + driOptionCache option_cache; + + struct nouveau_device *device; + + uint32_t front_offset; + uint32_t front_pitch; + uint32_t front_cpp; + uint32_t front_height; + + void *nvc; +}; + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.c new file mode 100644 index 00000000000..70e0104e83b --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.c @@ -0,0 +1,86 @@ +#include "main/glheader.h" +#include "glapi/glthread.h" +#include <GL/internal/glcore.h> + +#include "pipe/p_context.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_fbo.h" + +#include "nouveau_context.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" + +void +nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, + const drm_clip_rect_t *rect) +{ + struct nouveau_context *nv = dPriv->driContextPriv->driverPrivate; + drm_clip_rect_t *pbox; + int nbox, i; + + LOCK_HARDWARE(nv); + if (!dPriv->numClipRects) { + UNLOCK_HARDWARE(nv); + return; + } + pbox = dPriv->pClipRects; + nbox = dPriv->numClipRects; + + nv->surface_copy_prep(nv, nv->frontbuffer, surf); + for (i = 0; i < nbox; i++, pbox++) { + int sx, sy, dx, dy, w, h; + + sx = pbox->x1 - dPriv->x; + sy = pbox->y1 - dPriv->y; + dx = pbox->x1; + dy = pbox->y1; + w = pbox->x2 - pbox->x1; + h = pbox->y2 - pbox->y1; + + nv->surface_copy(nv, dx, dy, sx, sy, w, h); + } + + FIRE_RING(nv->nvc->channel); + UNLOCK_HARDWARE(nv); + + if (nv->last_stamp != dPriv->lastStamp) { + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + st_resize_framebuffer(nvfb->stfb, dPriv->w, dPriv->h); + nv->last_stamp = dPriv->lastStamp; + } +} + +void +nouveau_copy_sub_buffer(__DRIdrawablePrivate *dPriv, int x, int y, int w, int h) +{ + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + struct pipe_surface *surf; + + surf = st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT); + if (surf) { + drm_clip_rect_t rect; + rect.x1 = x; + rect.y1 = y; + rect.x2 = x + w; + rect.y2 = y + h; + + st_notify_swapbuffers(nvfb->stfb); + nouveau_copy_buffer(dPriv, surf, &rect); + } +} + +void +nouveau_swap_buffers(__DRIdrawablePrivate *dPriv) +{ + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + struct pipe_surface *surf; + + surf = st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT); + if (surf) { + st_notify_swapbuffers(nvfb->stfb); + nouveau_copy_buffer(dPriv, surf, NULL); + } +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.h b/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.h new file mode 100644 index 00000000000..825d3da6da5 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_swapbuffers.h @@ -0,0 +1,10 @@ +#ifndef __NOUVEAU_SWAPBUFFERS_H__ +#define __NOUVEAU_SWAPBUFFERS_H__ + +extern void nouveau_copy_buffer(__DRIdrawablePrivate *, struct pipe_surface *, + const drm_clip_rect_t *); +extern void nouveau_copy_sub_buffer(__DRIdrawablePrivate *, + int x, int y, int w, int h); +extern void nouveau_swap_buffers(__DRIdrawablePrivate *); + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_winsys.c b/src/gallium/winsys/drm/nouveau/nouveau_winsys.c new file mode 100644 index 00000000000..0878840dcc0 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_winsys.c @@ -0,0 +1,158 @@ +#include "util/u_memory.h" + +#include "nouveau_context.h" +#include "nouveau_screen.h" +#include "nouveau_winsys_pipe.h" + +#include "nouveau/nouveau_winsys.h" + +static int +nouveau_pipe_notifier_alloc(struct nouveau_winsys *nvws, int count, + struct nouveau_notifier **notify) +{ + struct nouveau_context *nv = nvws->nv; + + return nouveau_notifier_alloc(nv->nvc->channel, nv->nvc->next_handle++, + count, notify); +} + +static int +nouveau_pipe_grobj_alloc(struct nouveau_winsys *nvws, int grclass, + struct nouveau_grobj **grobj) +{ + struct nouveau_context *nv = nvws->nv; + struct nouveau_channel *chan = nv->nvc->channel; + int ret; + + ret = nouveau_grobj_alloc(chan, nv->nvc->next_handle++, + grclass, grobj); + if (ret) + return ret; + + assert(nv->nvc->next_subchannel < 7); + BIND_RING(chan, *grobj, nv->nvc->next_subchannel++); + return 0; +} + +static int +nouveau_pipe_surface_copy(struct nouveau_winsys *nvws, struct pipe_surface *dst, + unsigned dx, unsigned dy, struct pipe_surface *src, + unsigned sx, unsigned sy, unsigned w, unsigned h) +{ + struct nouveau_context *nv = nvws->nv; + + if (nv->surface_copy_prep(nv, dst, src)) + return 1; + nv->surface_copy(nv, dx, dy, sx, sy, w, h); + nv->surface_copy_done(nv); + + return 0; +} + +static int +nouveau_pipe_surface_fill(struct nouveau_winsys *nvws, struct pipe_surface *dst, + unsigned dx, unsigned dy, unsigned w, unsigned h, + unsigned value) +{ + if (nvws->nv->surface_fill(nvws->nv, dst, dx, dy, w, h, value)) + return 1; + return 0; +} + +static int +nouveau_pipe_push_reloc(struct nouveau_winsys *nvws, void *ptr, + struct pipe_buffer *buf, uint32_t data, + uint32_t flags, uint32_t vor, uint32_t tor) +{ + return nouveau_pushbuf_emit_reloc(nvws->channel, ptr, + nouveau_buffer(buf)->bo, + data, flags, vor, tor); +} + +static int +nouveau_pipe_push_flush(struct nouveau_winsys *nvws, unsigned size, + struct pipe_fence_handle **fence) +{ + if (fence) { + struct nouveau_pushbuf *pb = nvws->channel->pushbuf; + struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(pb); + struct nouveau_fence *ref = NULL; + + nouveau_fence_ref(nvpb->fence, &ref); + *fence = (struct pipe_fence_handle *)ref; + } + + return nouveau_pushbuf_flush(nvws->channel, size); +} + +struct pipe_context * +nouveau_pipe_create(struct nouveau_context *nv) +{ + struct nouveau_channel_context *nvc = nv->nvc; + struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); + struct pipe_screen *(*hws_create)(struct pipe_winsys *, + struct nouveau_winsys *); + struct pipe_context *(*hw_create)(struct pipe_screen *, unsigned); + struct pipe_winsys *ws; + unsigned chipset = nv->nv_screen->device->chipset; + + if (!nvws) + return NULL; + + switch (chipset & 0xf0) { + case 0x10: + case 0x20: + hws_create = nv10_screen_create; + hw_create = nv10_create; + break; + case 0x30: + hws_create = nv30_screen_create; + hw_create = nv30_create; + break; + case 0x40: + case 0x60: + hws_create = nv40_screen_create; + hw_create = nv40_create; + break; + case 0x50: + case 0x80: + case 0x90: + hws_create = nv50_screen_create; + hw_create = nv50_create; + break; + default: + NOUVEAU_ERR("Unknown chipset NV%02x\n", chipset); + return NULL; + } + + nvws->nv = nv; + nvws->channel = nv->nvc->channel; + + nvws->res_init = nouveau_resource_init; + nvws->res_alloc = nouveau_resource_alloc; + nvws->res_free = nouveau_resource_free; + + nvws->push_reloc = nouveau_pipe_push_reloc; + nvws->push_flush = nouveau_pipe_push_flush; + + nvws->grobj_alloc = nouveau_pipe_grobj_alloc; + nvws->grobj_free = nouveau_grobj_free; + + nvws->notifier_alloc = nouveau_pipe_notifier_alloc; + nvws->notifier_free = nouveau_notifier_free; + nvws->notifier_reset = nouveau_notifier_reset; + nvws->notifier_status = nouveau_notifier_status; + nvws->notifier_retval = nouveau_notifier_return_val; + nvws->notifier_wait = nouveau_notifier_wait_status; + + nvws->surface_copy = nouveau_pipe_surface_copy; + nvws->surface_fill = nouveau_pipe_surface_fill; + + ws = nouveau_create_pipe_winsys(nv); + + if (!nvc->pscreen) + nvc->pscreen = hws_create(ws, nvws); + nvc->pctx[nv->pctx_id] = hw_create(nvc->pscreen, nv->pctx_id); + return nvc->pctx[nv->pctx_id]; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.c new file mode 100644 index 00000000000..5276806de6b --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.c @@ -0,0 +1,206 @@ +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "util/u_memory.h" + +#include "nouveau_context.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" +#include "nouveau_winsys_pipe.h" + +static void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private) +{ + struct nouveau_context *nv = context_private; + __DRIdrawablePrivate *dPriv = nv->dri_drawable; + + nouveau_copy_buffer(dPriv, surf, NULL); +} + +static const char * +nouveau_get_name(struct pipe_winsys *pws) +{ + return "Nouveau/DRI"; +} + +static struct pipe_buffer * +nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, + unsigned usage, unsigned size) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_context *nv = nvpws->nv; + struct nouveau_device *dev = nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + uint32_t flags; + + nvbuf = calloc(1, sizeof(*nvbuf)); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.alignment = alignment; + nvbuf->base.usage = usage; + nvbuf->base.size = size; + + flags = NOUVEAU_BO_LOCAL; + + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) + flags |= NOUVEAU_BO_GART; + flags |= NOUVEAU_BO_VRAM; + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + flags |= NOUVEAU_BO_TILED; + if (usage & NOUVEAU_BUFFER_USAGE_ZETA) + flags |= NOUVEAU_BO_ZTILE; + break; + default: + break; + } + } + + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + if (nv->cap.hw_vertex_buffer) + flags |= NOUVEAU_BO_GART; + } + + if (usage & PIPE_BUFFER_USAGE_INDEX) { + if (nv->cap.hw_index_buffer) + flags |= NOUVEAU_BO_GART; + } + + if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { + free(nvbuf); + return NULL; + } + + return &nvbuf->base; +} + +static struct pipe_buffer * +nouveau_pipe_bo_user_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_device *dev = nvpws->nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + + nvbuf = calloc(1, sizeof(*nvbuf)); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.size = bytes; + + if (nouveau_bo_user(dev, ptr, bytes, &nvbuf->bo)) { + free(nvbuf); + return NULL; + } + + return &nvbuf->base; +} + +static void +nouveau_pipe_bo_del(struct pipe_winsys *ws, struct pipe_buffer *buf) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + + nouveau_bo_del(&nvbuf->bo); + free(nvbuf); +} + +static void * +nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf, + unsigned flags) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + uint32_t map_flags = 0; + + if (flags & PIPE_BUFFER_USAGE_CPU_READ) + map_flags |= NOUVEAU_BO_RD; + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + map_flags |= NOUVEAU_BO_WR; + + if (nouveau_bo_map(nvbuf->bo, map_flags)) + return NULL; + return nvbuf->bo->map; +} + +static void +nouveau_pipe_bo_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + + nouveau_bo_unmap(nvbuf->bo); +} + +static INLINE struct nouveau_fence * +nouveau_pipe_fence(struct pipe_fence_handle *pfence) +{ + return (struct nouveau_fence *)pfence; +} + +static void +nouveau_pipe_fence_reference(struct pipe_winsys *ws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *pfence) +{ + nouveau_fence_ref((void *)pfence, (void *)ptr); +} + +static int +nouveau_pipe_fence_signalled(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, unsigned flag) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)ws; + struct nouveau_fence *fence = nouveau_pipe_fence(pfence); + + if (nouveau_fence(fence)->signalled == 0) + nouveau_fence_flush(nvpws->nv->nvc->channel); + + return !nouveau_fence(fence)->signalled; +} + +static int +nouveau_pipe_fence_finish(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, unsigned flag) +{ + struct nouveau_fence *fence = nouveau_pipe_fence(pfence); + struct nouveau_fence *ref = NULL; + + nouveau_fence_ref(fence, &ref); + return nouveau_fence_wait(&ref); +} + +struct pipe_winsys * +nouveau_create_pipe_winsys(struct nouveau_context *nv) +{ + struct nouveau_pipe_winsys *nvpws; + struct pipe_winsys *pws; + + nvpws = CALLOC_STRUCT(nouveau_pipe_winsys); + if (!nvpws) + return NULL; + nvpws->nv = nv; + pws = &nvpws->pws; + + pws->flush_frontbuffer = nouveau_flush_frontbuffer; + + pws->buffer_create = nouveau_pipe_bo_create; + pws->buffer_destroy = nouveau_pipe_bo_del; + pws->user_buffer_create = nouveau_pipe_bo_user_create; + pws->buffer_map = nouveau_pipe_bo_map; + pws->buffer_unmap = nouveau_pipe_bo_unmap; + + pws->fence_reference = nouveau_pipe_fence_reference; + pws->fence_signalled = nouveau_pipe_fence_signalled; + pws->fence_finish = nouveau_pipe_fence_finish; + + pws->get_name = nouveau_get_name; + + return &nvpws->pws; +} + diff --git a/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.h b/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.h new file mode 100644 index 00000000000..6a03ac0d773 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_winsys_pipe.h @@ -0,0 +1,34 @@ +#ifndef NOUVEAU_PIPE_WINSYS_H +#define NOUVEAU_PIPE_WINSYS_H + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" +#include "nouveau_context.h" + +struct nouveau_pipe_buffer { + struct pipe_buffer base; + struct nouveau_bo *bo; +}; + +static inline struct nouveau_pipe_buffer * +nouveau_buffer(struct pipe_buffer *buf) +{ + return (struct nouveau_pipe_buffer *)buf; +} + +struct nouveau_pipe_winsys { + struct pipe_winsys pws; + + struct nouveau_context *nv; +}; + +extern struct pipe_winsys * +nouveau_create_pipe_winsys(struct nouveau_context *nv); + +struct pipe_context * +nouveau_create_softpipe(struct nouveau_context *nv); + +struct pipe_context * +nouveau_pipe_create(struct nouveau_context *nv); + +#endif diff --git a/src/gallium/winsys/drm/nouveau/nouveau_winsys_softpipe.c b/src/gallium/winsys/drm/nouveau/nouveau_winsys_softpipe.c new file mode 100644 index 00000000000..704f6c77506 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nouveau_winsys_softpipe.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + */ + +#include "imports.h" + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "softpipe/sp_winsys.h" + +#include "nouveau_context.h" +#include "nouveau_winsys_pipe.h" + +struct nouveau_softpipe_winsys { + struct softpipe_winsys sws; + struct nouveau_context *nv; +}; + +/** + * Return list of surface formats supported by this driver. + */ +static boolean +nouveau_is_format_supported(struct softpipe_winsys *sws, uint format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + }; + + return FALSE; +} + +struct pipe_context * +nouveau_create_softpipe(struct nouveau_context *nv) +{ + struct nouveau_softpipe_winsys *nvsws; + struct pipe_screen *pscreen; + struct pipe_winsys *ws; + + ws = nouveau_create_pipe_winsys(nv); + if (!ws) + return NULL; + pscreen = softpipe_create_screen(ws); + + nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); + if (!nvsws) + return NULL; + + nvsws->sws.is_format_supported = nouveau_is_format_supported; + nvsws->nv = nv; + + return softpipe_create(pscreen, ws, &nvsws->sws); +} + diff --git a/src/gallium/winsys/drm/nouveau/nv04_surface.c b/src/gallium/winsys/drm/nouveau/nv04_surface.c new file mode 100644 index 00000000000..8fa3d106c8c --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nv04_surface.c @@ -0,0 +1,314 @@ +#include "pipe/p_context.h" +#include "pipe/p_format.h" + +#include "nouveau_context.h" + +static INLINE int +nv04_surface_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; + default: + return -1; + } +} + +static INLINE int +nv04_rect_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + default: + return -1; + } +} + +static void +nv04_surface_copy_m2mf(struct nouveau_context *nv, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h) +{ + struct nouveau_channel *chan = nv->nvc->channel; + struct pipe_surface *dst = nv->surf_dst; + struct pipe_surface *src = nv->surf_src; + unsigned dst_offset, src_offset; + + dst_offset = dst->offset + (dy * dst->stride) + (dx * dst->block.size); + src_offset = src->offset + (sy * src->stride) + (sx * src->block.size); + + while (h) { + int count = (h > 2047) ? 2047 : h; + + BEGIN_RING(chan, nv->nvc->NvM2MF, + NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); + OUT_RELOCl(chan, nouveau_buffer(src->buffer)->bo, src_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, src->stride); + OUT_RING (chan, dst->stride); + OUT_RING (chan, w * src->block.size); + OUT_RING (chan, count); + OUT_RING (chan, 0x0101); + OUT_RING (chan, 0); + + h -= count; + src_offset += src->stride * count; + dst_offset += dst->stride * count; + } +} + +static void +nv04_surface_copy_blit(struct nouveau_context *nv, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h) +{ + struct nouveau_channel *chan = nv->nvc->channel; + + BEGIN_RING(chan, nv->nvc->NvImageBlit, 0x0300, 3); + OUT_RING (chan, (sy << 16) | sx); + OUT_RING (chan, (dy << 16) | dx); + OUT_RING (chan, ( h << 16) | w); +} + +static int +nv04_surface_copy_prep(struct nouveau_context *nv, struct pipe_surface *dst, + struct pipe_surface *src) +{ + struct nouveau_channel *chan = nv->nvc->channel; + int format; + + if (src->format != dst->format) + return 1; + + /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback + * to NV_MEMORY_TO_MEMORY_FORMAT in this case. + */ + if ((src->offset & 63) || (dst->offset & 63)) { + BEGIN_RING(nv->nvc->channel, nv->nvc->NvM2MF, + NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); + OUT_RELOCo(chan, nouveau_buffer(src->buffer)->bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + nv->surface_copy = nv04_surface_copy_m2mf; + nv->surf_dst = dst; + nv->surf_src = src; + return 0; + + } + + if ((format = nv04_surface_format(dst->format)) < 0) { + NOUVEAU_ERR("Bad surface format 0x%x\n", dst->format); + return 1; + } + nv->surface_copy = nv04_surface_copy_blit; + + BEGIN_RING(chan, nv->nvc->NvCtxSurf2D, + NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, nouveau_buffer(src->buffer)->bo, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, nv->nvc->NvCtxSurf2D, + NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, format); + OUT_RING (chan, (dst->stride << 16) | src->stride); + OUT_RELOCl(chan, nouveau_buffer(src->buffer)->bo, src->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + return 0; +} + +static void +nv04_surface_copy_done(struct nouveau_context *nv) +{ + FIRE_RING(nv->nvc->channel); +} + +static int +nv04_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, + unsigned dx, unsigned dy, unsigned w, unsigned h, + unsigned value) +{ + struct nouveau_channel *chan = nv->nvc->channel; + struct nouveau_grobj *surf2d = nv->nvc->NvCtxSurf2D; + struct nouveau_grobj *rect = nv->nvc->NvGdiRect; + int cs2d_format, gdirect_format; + + if ((cs2d_format = nv04_surface_format(dst->format)) < 0) { + NOUVEAU_ERR("Bad format = %d\n", dst->format); + return 1; + } + + if ((gdirect_format = nv04_rect_format(dst->format)) < 0) { + NOUVEAU_ERR("Bad format = %d\n", dst->format); + return 1; + } + + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCo(chan, nouveau_buffer(dst->buffer)->bo, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, cs2d_format); + OUT_RING (chan, (dst->stride << 16) | dst->stride); + OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, nouveau_buffer(dst->buffer)->bo, dst->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); + OUT_RING (chan, gdirect_format); + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); + OUT_RING (chan, value); + BEGIN_RING(chan, rect, + NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); + OUT_RING (chan, (dx << 16) | dy); + OUT_RING (chan, ( w << 16) | h); + + FIRE_RING(chan); + return 0; +} + +int +nouveau_surface_channel_create_nv04(struct nouveau_channel_context *nvc) +{ + struct nouveau_channel *chan = nvc->channel; + unsigned chipset = nvc->channel->device->chipset, class; + int ret; + + if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, 0x39, + &nvc->NvM2MF))) { + NOUVEAU_ERR("Error creating m2mf object: %d\n", ret); + return 1; + } + BIND_RING (chan, nvc->NvM2MF, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvM2MF, + NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); + OUT_RING (chan, nvc->sync_notifier->handle); + + class = chipset < 0x10 ? NV04_CONTEXT_SURFACES_2D : + NV10_CONTEXT_SURFACES_2D; + if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, + &nvc->NvCtxSurf2D))) { + NOUVEAU_ERR("Error creating 2D surface object: %d\n", ret); + return 1; + } + BIND_RING (chan, nvc->NvCtxSurf2D, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvCtxSurf2D, + NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RING (chan, nvc->channel->vram->handle); + OUT_RING (chan, nvc->channel->vram->handle); + + class = chipset < 0x10 ? NV04_IMAGE_BLIT : NV12_IMAGE_BLIT; + if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, + &nvc->NvImageBlit))) { + NOUVEAU_ERR("Error creating blit object: %d\n", ret); + return 1; + } + BIND_RING (chan, nvc->NvImageBlit, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); + OUT_RING (chan, nvc->sync_notifier->handle); + BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_SURFACE, 1); + OUT_RING (chan, nvc->NvCtxSurf2D->handle); + BEGIN_RING(chan, nvc->NvImageBlit, NV04_IMAGE_BLIT_OPERATION, 1); + OUT_RING (chan, NV04_IMAGE_BLIT_OPERATION_SRCCOPY); + + class = NV04_GDI_RECTANGLE_TEXT; + if ((ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, + &nvc->NvGdiRect))) { + NOUVEAU_ERR("Error creating rect object: %d\n", ret); + return 1; + } + BIND_RING (chan, nvc->NvGdiRect, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); + OUT_RING (chan, nvc->sync_notifier->handle); + BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); + OUT_RING (chan, nvc->NvCtxSurf2D->handle); + BEGIN_RING(chan, nvc->NvGdiRect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); + BEGIN_RING(chan, nvc->NvGdiRect, + NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); + + switch (chipset & 0xf0) { + case 0x00: + case 0x10: + class = NV04_SWIZZLED_SURFACE; + break; + case 0x20: + class = NV20_SWIZZLED_SURFACE; + break; + case 0x30: + class = NV30_SWIZZLED_SURFACE; + break; + case 0x40: + case 0x60: + class = NV40_SWIZZLED_SURFACE; + break; + default: + /* Famous last words: this really can't happen.. */ + assert(0); + break; + } + + ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, + &nvc->NvSwzSurf); + if (ret) { + NOUVEAU_ERR("Error creating swizzled surface: %d\n", ret); + return 1; + } + + BIND_RING (chan, nvc->NvSwzSurf, nvc->next_subchannel++); + BEGIN_RING(chan, nvc->NvSwzSurf, NV04_SWIZZLED_SURFACE_DMA_NOTIFY, 1); + OUT_RING (chan, nvc->sync_notifier->handle); + BEGIN_RING(chan, nvc->NvSwzSurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); + OUT_RING (chan, nvc->channel->vram->handle); + + if (chipset < 0x10) { + class = NV04_SCALED_IMAGE_FROM_MEMORY; + } else + if (chipset < 0x40) { + class = NV10_SCALED_IMAGE_FROM_MEMORY; + } else { + class = NV40_SCALED_IMAGE_FROM_MEMORY; + } + + ret = nouveau_grobj_alloc(chan, nvc->next_handle++, class, + &nvc->NvSIFM); + if (ret) { + NOUVEAU_ERR("Error creating scaled image object: %d\n", ret); + return 1; + } + + BIND_RING (chan, nvc->NvSIFM, nvc->next_subchannel++); + + return 0; +} + +int +nouveau_surface_init_nv04(struct nouveau_context *nv) +{ + nv->surface_copy_prep = nv04_surface_copy_prep; + nv->surface_copy = nv04_surface_copy_blit; + nv->surface_copy_done = nv04_surface_copy_done; + nv->surface_fill = nv04_surface_fill; + return 0; +} + diff --git a/src/gallium/winsys/drm/nouveau/nv50_surface.c b/src/gallium/winsys/drm/nouveau/nv50_surface.c new file mode 100644 index 00000000000..c8ab7f690f3 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/nv50_surface.c @@ -0,0 +1,194 @@ +#include "pipe/p_context.h" +#include "pipe/p_format.h" + +#include "nouveau_context.h" + +static INLINE int +nv50_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV50_2D_DST_FORMAT_32BPP; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return NV50_2D_DST_FORMAT_24BPP; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV50_2D_DST_FORMAT_16BPP; + case PIPE_FORMAT_A8_UNORM: + return NV50_2D_DST_FORMAT_8BPP; + default: + return -1; + } +} + +static int +nv50_surface_set(struct nouveau_context *nv, struct pipe_surface *surf, int dst) +{ + struct nouveau_channel *chan = nv->nvc->channel; + struct nouveau_grobj *eng2d = nv->nvc->Nv2D; + struct nouveau_bo *bo = nouveau_buffer(surf->buffer)->bo; + int surf_format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + + surf_format = nv50_format(surf->format); + if (surf_format < 0) + return 1; + + if (!nouveau_bo(bo)->tiled) { + BEGIN_RING(chan, eng2d, mthd, 2); + OUT_RING (chan, surf_format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, mthd + 0x14, 5); + OUT_RING (chan, surf->stride); + OUT_RING (chan, surf->width); + OUT_RING (chan, surf->height); + OUT_RELOCh(chan, bo, surf->offset, flags); + OUT_RELOCl(chan, bo, surf->offset, flags); + } else { + BEGIN_RING(chan, eng2d, mthd, 5); + OUT_RING (chan, surf_format); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, mthd + 0x18, 4); + OUT_RING (chan, surf->width); + OUT_RING (chan, surf->height); + OUT_RELOCh(chan, bo, surf->offset, flags); + OUT_RELOCl(chan, bo, surf->offset, flags); + } + +#if 0 + if (dst) { + BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, surf->width); + OUT_RING (chan, surf->height); + } +#endif + + return 0; +} + +static int +nv50_surface_copy_prep(struct nouveau_context *nv, + struct pipe_surface *dst, struct pipe_surface *src) +{ + int ret; + + assert(src->format == dst->format); + + ret = nv50_surface_set(nv, dst, 1); + if (ret) + return ret; + + ret = nv50_surface_set(nv, src, 0); + if (ret) + return ret; + + return 0; +} + +static void +nv50_surface_copy(struct nouveau_context *nv, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h) +{ + struct nouveau_channel *chan = nv->nvc->channel; + struct nouveau_grobj *eng2d = nv->nvc->Nv2D; + + BEGIN_RING(chan, eng2d, 0x088c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, w); + OUT_RING (chan, h); + BEGIN_RING(chan, eng2d, 0x08c0, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, 0x08d0, 4); + OUT_RING (chan, 0); + OUT_RING (chan, sx); + OUT_RING (chan, 0); + OUT_RING (chan, sy); +} + +static void +nv50_surface_copy_done(struct nouveau_context *nv) +{ + FIRE_RING(nv->nvc->channel); +} + +static int +nv50_surface_fill(struct nouveau_context *nv, struct pipe_surface *dst, + unsigned dx, unsigned dy, unsigned w, unsigned h, + unsigned value) +{ + struct nouveau_channel *chan = nv->nvc->channel; + struct nouveau_grobj *eng2d = nv->nvc->Nv2D; + int rect_format, ret; + + rect_format = nv50_format(dst->format); + if (rect_format < 0) + return 1; + + ret = nv50_surface_set(nv, dst, 1); + if (ret) + return ret; + + BEGIN_RING(chan, eng2d, 0x0580, 3); + OUT_RING (chan, 4); + OUT_RING (chan, rect_format); + OUT_RING (chan, value); + + BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, dx + w); + OUT_RING (chan, dy + h); + + FIRE_RING(chan); + return 0; +} + +int +nouveau_surface_channel_create_nv50(struct nouveau_channel_context *nvc) +{ + struct nouveau_channel *chan = nvc->channel; + struct nouveau_grobj *eng2d = NULL; + int ret; + + ret = nouveau_grobj_alloc(chan, nvc->next_handle++, NV50_2D, &eng2d); + if (ret) + return ret; + nvc->Nv2D = eng2d; + + BIND_RING (chan, eng2d, nvc->next_subchannel++); + BEGIN_RING(chan, eng2d, NV50_2D_DMA_NOTIFY, 4); + OUT_RING (chan, nvc->sync_notifier->handle); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + BEGIN_RING(chan, eng2d, NV50_2D_OPERATION, 1); + OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY); + BEGIN_RING(chan, eng2d, 0x0290, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, 0x0888, 1); + OUT_RING (chan, 1); + + return 0; +} + +int +nouveau_surface_init_nv50(struct nouveau_context *nv) +{ + nv->surface_copy_prep = nv50_surface_copy_prep; + nv->surface_copy = nv50_surface_copy; + nv->surface_copy_done = nv50_surface_copy_done; + nv->surface_fill = nv50_surface_fill; + return 0; +} + -- cgit v1.2.3 From 0e79e474de164a765b9759398c83b6bfa16a0012 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 5 Sep 2008 13:55:02 -0600 Subject: cell: comments, etc. --- .../drivers/cell/ppu/cell_state_per_fragment.c | 28 ++++++++++---- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 5 +-- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 44 +++++++++++++++------- 3 files changed, 52 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 53ae3aa50e7..705867107bf 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -132,9 +132,9 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, /** + * Generate code to perform Z testing. Four Z values are tested at once. * \param dsa Current depth-test state * \param f Function to which code should be appended - * \param m Mask of allocated / free SPE registers * \param mask Index of register to contain depth-pass mask * \param stored Index of register containing values from depth buffer * \param calculated Index of register containing per-fragment depth values @@ -198,6 +198,7 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, /** + * Generate code to apply the stencil operation (after testing). * \note Emits a maximum of 5 instructions. * * \warning @@ -222,9 +223,13 @@ emit_stencil_op(struct spe_function *f, spe_il(f, result, ref); break; case PIPE_STENCIL_OP_INCR: + /* clamp = [0xff, 0xff, 0xff, 0xff] */ spe_il(f, clamp, 0x0ff); + /* result[i] = in[i] + 1 */ spe_ai(f, result, in, 1); + /* clamp_mask[i] = (result[i] > 0xff) */ spe_clgti(f, clamp_mask, result, 0x0ff); + /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ spe_selb(f, result, result, clamp, clamp_mask); break; case PIPE_STENCIL_OP_DECR: @@ -259,10 +264,10 @@ emit_stencil_op(struct spe_function *f, /** + * Generate code to do stencil test. Four pixels are tested at once. * \param dsa Depth / stencil test state * \param face 0 for front face, 1 for back face * \param f Function to append instructions to - * \param reg_mask Mask of allocated registers * \param mask Register containing mask of fragments passing the * alpha test * \param depth_mask Register containing mask of fragments passing the @@ -310,13 +315,14 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, switch (dsa->stencil[face].func) { case PIPE_FUNC_NEVER: - spe_il(f, stencil_mask, 0); + spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ break; case PIPE_FUNC_NOTEQUAL: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_EQUAL: + /* stencil_mask[i] = (stored[i] == ref) */ spe_ceqi(f, stencil_mask, stored, ref); break; @@ -324,6 +330,8 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GREATER: + complement = TRUE; + /* stencil_mask[i] = (stored[i] > ref) */ spe_clgti(f, stencil_mask, stored, ref); break; @@ -331,8 +339,11 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GEQUAL: + /* stencil_mask[i] = (stored[i] > ref) */ spe_clgti(f, stencil_mask, stored, ref); + /* tmp[i] = (stored[i] == ref) */ spe_ceqi(f, tmp, stored, ref); + /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ spe_or(f, stencil_mask, stencil_mask, tmp); break; @@ -461,7 +472,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round * up to 64 to make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); /* Allocate registers for the function's input parameters. Cleverly (and @@ -540,7 +551,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) spe_selb(f, depth, depth, zvals, mask); } - spe_bi(f, 0, 0, 0); + spe_bi(f, 0, 0, 0); /* return from function call */ #if 0 @@ -956,7 +967,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to * make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); const int frag[4] = { @@ -1144,7 +1155,8 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) } -int PC_OFFSET(const struct spe_function *f, const void *d) +static int +PC_OFFSET(const struct spe_function *f, const void *d) { const intptr_t pc = (intptr_t) f->csr; const intptr_t ea = ~0x0f & (intptr_t) d; @@ -1178,7 +1190,7 @@ cell_generate_logic_op(struct spe_function *f, * bytes (equiv. to 8 instructions) are needed for data storage. Round up * to 64 to make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); /* Pixel colors in framebuffer format in AoS layout. diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 2ece0250f6f..566df7f59e3 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -297,10 +297,9 @@ void cell_update_vertex_fetch(struct draw_context *draw) /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). That means (34 * 4) = 136 bytes - * each maximum. + * actually a slight over-estimate). */ - spe_init_func(p, 136 * unique_attr_formats); + spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); /* Allocate registers for the function's input parameters. diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index c0a729b3d2c..db887352265 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -35,8 +35,17 @@ #define ZERO 0x80 + +/** + * Get a "quad" of four fragment Z/stencil values from the given tile. + * \param tile the tile of Z/stencil values + * \param x, y location of the quad in the tile, in pixels + * \param depth_format format of the tile's data + * \param detph returns four depth values + * \param stencil returns four stencil values + */ static void -read_ds_quad(tile_t *buffer, unsigned x, unsigned y, +read_ds_quad(tile_t *tile, unsigned x, unsigned y, enum pipe_format depth_format, qword *depth, qword *stencil) { @@ -45,14 +54,13 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y, switch (depth_format) { case PIPE_FORMAT_Z16_UNORM: { - qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; + qword *ptr = (qword *) &tile->us8[iy][ix / 2]; const qword shuf_vec = (qword) { ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7 }; - /* At even X values we want the first 4 shorts, and at odd X values we * want the second 4 shorts. */ @@ -65,18 +73,16 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y, break; } - case PIPE_FORMAT_Z32_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword *ptr = (qword *) &tile->ui4[iy][ix]; *depth = *ptr; *stencil = si_il(0); break; } - case PIPE_FORMAT_Z24S8_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword *ptr = (qword *) &tile->ui4[iy][ix]; qword mask = si_fsmbi(0xEEEE); *depth = si_rotmai(si_and(*ptr, mask), -8); @@ -84,16 +90,14 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y, break; } - case PIPE_FORMAT_S8Z24_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword *ptr = (qword *) &tile->ui4[iy][ix]; *depth = si_and(*ptr, si_fsmbi(0x7777)); *stencil = si_andi(si_roti(*ptr, 8), 0x0ff); break; } - default: ASSERT(0); break; @@ -101,6 +105,14 @@ read_ds_quad(tile_t *buffer, unsigned x, unsigned y, } +/** + * Put a quad of Z/stencil values into a tile. + * \param tile the tile of Z/stencil values to write into + * \param x, y location of the quad in the tile, in pixels + * \param depth_format format of the tile's data + * \param detph depth values to store + * \param stencil stencil values to store + */ static void write_ds_quad(tile_t *buffer, unsigned x, unsigned y, enum pipe_format depth_format, @@ -124,14 +136,12 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, break; } - case PIPE_FORMAT_Z32_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; *ptr = depth; break; } - case PIPE_FORMAT_Z24S8_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; qword mask = si_fsmbi(0xEEEE); @@ -141,7 +151,6 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, break; } - case PIPE_FORMAT_S8Z24_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; qword mask = si_fsmbi(0x7777); @@ -151,7 +160,6 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, break; } - default: ASSERT(0); break; @@ -159,6 +167,14 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, } +/** + * Do depth/stencil/alpha test for a "quad" of 4 fragments. + * \param x,y location of quad within tile + * \param frag_mask indicates which fragments are "alive" + * \param frag_depth four fragment depth values + * \param frag_alpha four fragment alpha values + * \param facing front/back facing for four fragments (1=front, 0=back) + */ qword spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, qword frag_alpha, -- cgit v1.2.3 From cd9722dcddcb41af3196860280d23542dc673700 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 8 Sep 2008 11:50:13 -0600 Subject: cell: comments --- src/gallium/drivers/cell/spu/spu_tri.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 2a4e0b423ca..a3ea0a3e69b 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -209,7 +209,7 @@ clip_emit_quad(struct setup_stage *setup) /** * Evaluate attribute coefficients (plane equations) to compute * attribute values for the four fragments in a quad. - * Eg: four colors will be compute. + * Eg: four colors will be computed (in AoS format). */ static INLINE void eval_coeff(uint slot, float x, float y, vector float result[4]) @@ -356,6 +356,7 @@ emit_quad( int x, int y, mask_t mask ) /* Convert fragment data from AoS to SoA format. + * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) */ qword soa_frag[4]; _transpose_matrix4x4((vec_float4 *) soa_frag, colors); @@ -373,6 +374,7 @@ emit_quad( int x, int y, mask_t mask ) if (spu.read_fb) { /* Convert pixel data from AoS to SoA format. + * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) */ vec_float4 aos_pix[4] = { spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]), @@ -393,6 +395,7 @@ emit_quad( int x, int y, mask_t mask ) /* Convert final pixel data from SoA to AoS format. + * I.e. (RRRR,GGGG,BBBB,AAAA) -> (RGBA,RGBA,RGBA,RGBA) */ result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3], result.r, result.g, result.b, result.a, -- cgit v1.2.3 From 04ae4fba3c0a656cf2747fc994b99f99576d0e2b Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 8 Sep 2008 11:53:14 -0600 Subject: cell: minor change to Z float/int conversion code (avoid switch) --- src/gallium/drivers/cell/spu/spu_main.c | 5 ++++ src/gallium/drivers/cell/spu/spu_main.h | 5 ++++ src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 34 +++++++++------------- 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index d223f32d941..c4236817a9a 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -252,12 +252,17 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) switch (spu.fb.depth_format) { case PIPE_FORMAT_Z32_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0xffffffffu; + break; case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_S8Z24_UNORM: spu.fb.zsize = 4; + spu.fb.zscale = (float) 0x00ffffffu; break; case PIPE_FORMAT_Z16_UNORM: spu.fb.zsize = 2; + spu.fb.zscale = (float) 0xffffu; break; default: spu.fb.zsize = 0; diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 4879f8c9c8d..c2a53c9dcf6 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -41,6 +41,10 @@ #define MAX_HEIGHT 1024 +/** + * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. + * The data may be addressed through several different types. + */ typedef union { ushort us[TILE_SIZE][TILE_SIZE]; uint ui[TILE_SIZE][TILE_SIZE]; @@ -99,6 +103,7 @@ struct spu_framebuffer { uint depth_clear_value; uint zsize; /**< 0, 2 or 4 bytes per Z */ + float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index db887352265..29dc07a2e8d 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -144,18 +144,22 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, case PIPE_FORMAT_Z24S8_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; + /* form select mask = 1110,1110,1110,1110 */ qword mask = si_fsmbi(0xEEEE); - + /* depth[i] = depth[i] << 8 */ depth = si_shli(depth, 8); + /* *ptr[i] = depth[i][31:8] | stencil[i][7:0] */ *ptr = si_selb(stencil, depth, mask); break; } case PIPE_FORMAT_S8Z24_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; + /* form select mask = 0111,0111,0111,0111 */ qword mask = si_fsmbi(0x7777); - + /* stencil[i] = stencil[i] << 24 */ stencil = si_shli(stencil, 24); + /* *ptr[i] = stencil[i][31:24] | depth[i][23:0] */ *ptr = si_selb(stencil, depth, mask); break; } @@ -191,25 +195,13 @@ spu_do_depth_stencil(int x, int y, read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, &pixel_depth, &pixel_stencil); } - - switch (spu.fb.depth_format) { - case PIPE_FORMAT_Z16_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - case PIPE_FORMAT_Z32_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - default: - ASSERT(0); - break; - } + + /* convert floating point Z values to 32-bit uint */ + + /* frag_depth *= spu.fb.zscale */ + frag_depth = si_fm(frag_depth, (qword)spu_splats(spu.fb.zscale)); + /* frag_depth = uint(frag_depth) */ + frag_depth = si_cfltu(frag_depth, 0); result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil, frag_depth, frag_alpha, facing); -- cgit v1.2.3 From 284ab5a6127f8b452acaa0e10ac1d9ebc87fac3e Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 10 Sep 2008 18:22:00 -0600 Subject: cell: checkpoint commit of new per-fragment processing Do code generation for alpha test, z test, stencil, blend, colormask and framebuffer/tile read/write as a single code block. Ian's previous blend/z/stencil test code is still there but mostly disabled and will be removed soon. --- src/gallium/drivers/cell/common.h | 20 +- src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 530 +++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_gen_fragment.h | 38 ++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 31 +- .../drivers/cell/ppu/cell_state_per_fragment.c | 2 +- src/gallium/drivers/cell/spu/Makefile | 2 +- src/gallium/drivers/cell/spu/spu_main.c | 53 ++- src/gallium/drivers/cell/spu/spu_main.h | 23 + src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 231 ++++++++- src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 11 + src/gallium/drivers/cell/spu/spu_tri.c | 30 ++ src/gallium/winsys/xlib/xm_api.c | 7 +- src/gallium/winsys/xlib/xm_winsys.c | 35 ++ 14 files changed, 998 insertions(+), 16 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_gen_fragment.c create mode 100644 src/gallium/drivers/cell/ppu/cell_gen_fragment.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index c0ca201e1d1..a62530c64da 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -97,6 +97,7 @@ #define CELL_CMD_STATE_LOGICOP 21 #define CELL_CMD_VS_EXECUTE 22 #define CELL_CMD_FLUSH_BUFFER_RANGE 23 +#define CELL_CMD_STATE_FRAGMENT_OPS 24 #define CELL_NUM_BUFFERS 4 @@ -112,30 +113,43 @@ /** */ -struct cell_command_depth_stencil_alpha_test { +struct cell_command_depth_stencil_alpha_test +{ uint64_t base; /**< Effective address of code start. */ unsigned size; /**< Size in bytes of SPE code. */ unsigned read_depth; /**< Flag: should depth be read? */ unsigned read_stencil; /**< Flag: should stencil be read? */ + struct pipe_depth_stencil_alpha_state state; }; /** * Upload code to perform framebuffer blend operation */ -struct cell_command_blend { +struct cell_command_blend +{ uint64_t base; /**< Effective address of code start. */ unsigned size; /**< Size in bytes of SPE code. */ unsigned read_fb; /**< Flag: should framebuffer be read? */ }; -struct cell_command_logicop { +struct cell_command_logicop +{ uint64_t base; /**< Effective address of code start. */ unsigned size; /**< Size in bytes of SPE code. */ }; +#define SPU_MAX_FRAGMENT_OPS_INSTS 64 + +struct cell_command_fragment_ops +{ + uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; +}; + + /** * Tell SPUs about the framebuffer size, location */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 25473e200cf..b5a6fcb8deb 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -25,6 +25,7 @@ SOURCES = \ cell_context.c \ cell_draw_arrays.c \ cell_flush.c \ + cell_gen_fragment.c \ cell_state_derived.c \ cell_state_emit.c \ cell_state_per_fragment.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c new file mode 100644 index 00000000000..df29476be66 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -0,0 +1,530 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +/** + * Generate SPU per-fragment code (actually per-quad code). + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" + + + +/** Do extra optimizations? */ +#define OPTIMIZATIONS 1 + + +/** + * Generate SPE code to perform Z/depth testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param ifragZ_reg register containing integer fragment Z values (in) + * \param ifbZ_reg register containing integer frame buffer Z values (in/out) + * \param zmask_reg register containing result of Z test/comparison (out) + */ +static void +gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, + int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) +{ + ASSERT(dsa->depth.enabled); + + switch (dsa->depth.func) { + case PIPE_FUNC_EQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GREATER: + /* zmask = (ifragZ > ref) */ + spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LESS: + /* zmask = (ref > ifragZ) */ + spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* zmask = (ifragZ > ref) */ + spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* zmask = (ref > ifragZ) */ + spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ + spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ + break; + + case PIPE_FUNC_ALWAYS: + /* mask unchanged */ + spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ + break; + + default: + ASSERT(0); + break; + } + + if (dsa->depth.writemask) { + /* + * If (ztest passed) { + * framebufferZ = fragmentZ; + * } + * OR, + * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; + */ + spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + } +} + + +/** + * Generate SPE code to perform alpha testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param fragA_reg register containing four fragment alpha values (in) + */ +static void +gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask_reg, int fragA_reg) +{ + int ref_reg = spe_allocate_available_register(f); + int amask_reg = spe_allocate_available_register(f); + + ASSERT(dsa->alpha.enabled); + + if ((dsa->alpha.func != PIPE_FUNC_NEVER) && + (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + /* load/splat the alpha reference float value */ + spe_load_float(f, ref_reg, dsa->alpha.ref); + } + + /* emit code to do the alpha comparison, updating 'mask' */ + switch (dsa->alpha.func) { + case PIPE_FUNC_EQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GREATER: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LESS: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ + break; + + case PIPE_FUNC_ALWAYS: + /* no-op, mask unchanged */ + break; + + default: + ASSERT(0); + break; + } + +#if OPTIMIZATIONS + /* if mask == {0,0,0,0} we're all done, return */ + { + /* re-use amask reg here */ + int tmp_reg = amask_reg; + /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ + spe_orx(f, tmp_reg, mask_reg); + /* if tmp[0] == 0 then return from function call */ + spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); + } +#endif + + spe_release_register(f, ref_reg); + spe_release_register(f, amask_reg); +} + + + +/** + * Generate SPE code to implement the fragment operations (alpha test, + * depth test, stencil test, blending, colormask, and final + * framebuffer write) as specified by the current context state. + * + * Logically, this code will be called after running the fragment + * shader. But under some circumstances we could run some of this + * code before the fragment shader to cull fragments/quads that are + * totally occluded/discarded. + * + * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now. + * + * See the spu_default_fragment_ops() function to see how the per-fragment + * operations would be done with ordinary C code. + * The code we generate here though has no branches, is SIMD, etc and + * should be much faster. + * + * \param cell the rendering context (in) + * \param f the generated function (out) + */ +void +gen_fragment_function(struct cell_context *cell, struct spe_function *f) +{ + const struct pipe_depth_stencil_alpha_state *dsa = + &cell->depth_stencil->base; + const struct pipe_blend_state *blend = &cell->blend->base; + + /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ + const int x_reg = 3; /* uint */ + const int y_reg = 4; /* uint */ + const int color_tile_reg = 5; /* tile_t * */ + const int depth_tile_reg = 6; /* tile_t * */ + const int fragZ_reg = 7; /* vector float */ + const int fragR_reg = 8; /* vector float */ + const int fragG_reg = 9; /* vector float */ + const int fragB_reg = 10; /* vector float */ + const int fragA_reg = 11; /* vector float */ + const int mask_reg = 12; /* vector uint */ + + /* offset of quad from start of tile + * XXX assuming 4-byte pixels for color AND Z/stencil!!!! + */ + int quad_offset_reg; + + int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ + int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ + + spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + spe_allocate_register(f, x_reg); + spe_allocate_register(f, y_reg); + spe_allocate_register(f, color_tile_reg); + spe_allocate_register(f, depth_tile_reg); + spe_allocate_register(f, fragZ_reg); + spe_allocate_register(f, fragR_reg); + spe_allocate_register(f, fragG_reg); + spe_allocate_register(f, fragB_reg); + spe_allocate_register(f, fragA_reg); + spe_allocate_register(f, mask_reg); + + quad_offset_reg = spe_allocate_available_register(f); + fbRGBA_reg = spe_allocate_available_register(f); + fbZS_reg = spe_allocate_available_register(f); + + /* compute offset of quad from start of tile, in bytes */ + { + int x2_reg = spe_allocate_available_register(f); + int y2_reg = spe_allocate_available_register(f); + + ASSERT(TILE_SIZE == 32); + + spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ + spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ + spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ + spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ + spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ + + spe_release_register(f, x2_reg); + spe_release_register(f, y2_reg); + } + + + if (dsa->alpha.enabled) { + gen_alpha_test(dsa, f, mask_reg, fragA_reg); + } + + if (dsa->depth.enabled || dsa->stencil[0].enabled) { + const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; + boolean write_depth_stencil; + + int fbZ_reg = spe_allocate_available_register(f); /* Z values */ + int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ + + /* fetch quad of depth/stencil values from tile at (x,y) */ + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + if (dsa->depth.enabled) { + /* Extract Z bits from fbZS_reg into fbZ_reg */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + int mask_reg = spe_allocate_available_register(f); + spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ + spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ + spe_release_register(f, mask_reg); + /* OK, fbZ_reg has four 24-bit Z values now */ + } + else { + /* XXX handle other z/stencil formats */ + ASSERT(0); + } + + /* Convert fragZ values from float[4] to uint[4] */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM || + zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + /* 24-bit Z values */ + int scale_reg = spe_allocate_available_register(f); + + /* scale_reg[0,1,2,3] = float(2^24-1) */ + spe_load_float(f, scale_reg, (float) 0xffffff); + + /* XXX these two instructions might be combined */ + spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */ + + spe_release_register(f, scale_reg); + } + else { + /* XXX handle 16-bit Z format */ + ASSERT(0); + } + } + + if (dsa->stencil[0].enabled) { + /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + /* XXX extract with a shift */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + /* XXX extract with a mask */ + ASSERT(0); + } + } + + + if (dsa->stencil[0].enabled) { + /* XXX this may involve depth testing too */ + // gen_stencil_test(dsa, f, ... ); + ASSERT(0); + } + else if (dsa->depth.enabled) { + int zmask_reg = spe_allocate_available_register(f); + gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + spe_release_register(f, zmask_reg); + } + + /* do we need to write Z and/or Stencil back into framebuffer? */ + write_depth_stencil = (dsa->depth.writemask | + dsa->stencil[0].write_mask | + dsa->stencil[1].write_mask); + + if (write_depth_stencil) { + /* Merge latest Z and Stencil values into fbZS_reg. + * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. + * fbS_reg has four 8-bit Z values in bits [7..0]. + */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_Z16_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_S8_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else { + /* bad zs_format */ + ASSERT(0); + } + + /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ + spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + } + + spe_release_register(f, fbZ_reg); + spe_release_register(f, fbS_reg); + } + + + /* Get framebuffer quad/colors. We'll need these for blending, + * color masking, and to obey the quad/pixel mask. + * Load: fbRGBA_reg = memory[color_tile + quad_offset] + * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking + * we could skip this load. + */ + spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); + + + if (blend->blend_enable) { + /* convert packed tile colors in fbRGBA_reg to float[4] vectors */ + + // gen_blend_code(blend, f, mask_reg, ... ); + + } + + + + /* + * Write fragment colors to framebuffer/tile. + * This involves converting the fragment colors from float[4] to the + * tile's specific format and obeying the quad/pixel mask. + */ + { + const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; + int rgba_reg = spe_allocate_available_register(f); + + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ + spe_cfltu(f, fragR_reg, fragR_reg, 32); + spe_cfltu(f, fragG_reg, fragG_reg, 32); + spe_cfltu(f, fragB_reg, fragB_reg, 32); + spe_cfltu(f, fragA_reg, fragA_reg, 32); + + /* Shift most the significant bytes to least the significant positions. + * I.e.: reg = reg >> 24 + */ + spe_rotmi(f, fragR_reg, fragR_reg, -24); + spe_rotmi(f, fragG_reg, fragG_reg, -24); + spe_rotmi(f, fragB_reg, fragB_reg, -24); + spe_rotmi(f, fragA_reg, fragA_reg, -24); + + /* Shift the color bytes according to the surface format */ + if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { + spe_roti(f, fragG_reg, fragG_reg, 8); /* green <<= 8 */ + spe_roti(f, fragR_reg, fragR_reg, 16); /* red <<= 16 */ + spe_roti(f, fragA_reg, fragA_reg, 24); /* alpha <<= 24 */ + } + else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { + spe_roti(f, fragR_reg, fragR_reg, 8); /* red <<= 8 */ + spe_roti(f, fragG_reg, fragG_reg, 16); /* green <<= 16 */ + spe_roti(f, fragB_reg, fragB_reg, 24); /* blue <<= 24 */ + } + else { + ASSERT(0); + } + + /* Merge red, green, blue, alpha registers to make packed RGBA colors. + * Eg: after shifting according to color_format we might have: + * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} + * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} + * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} + * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} + * OR-ing all those together gives us four packed colors: + * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} + */ + spe_or(f, rgba_reg, fragR_reg, fragG_reg); + spe_or(f, rgba_reg, rgba_reg, fragB_reg); + spe_or(f, rgba_reg, rgba_reg, fragA_reg); + + /* Mix fragment colors with framebuffer colors using the quad/pixel mask: + * if (mask[i]) + * rgba[i] = rgba[i]; + * else + * rgba[i] = framebuffer[i]; + */ + spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); + + /* Store updated quad in tile: + * memory[color_tile + quad_offset] = rgba_reg; + */ + spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); + + spe_release_register(f, rgba_reg); + } + + printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); + + spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ + + + spe_release_register(f, fbRGBA_reg); + spe_release_register(f, fbZS_reg); + spe_release_register(f, quad_offset_reg); +} + diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h new file mode 100644 index 00000000000..0ea0fc690c8 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_GEN_FRAGMENT_H +#define CELL_GEN_FRAGMENT_H + + +extern void +gen_fragment_function(struct cell_context *cell, struct spe_function *f); + + +#endif /* CELL_GEN_FRAGMENT_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index f2feaa329ab..06777aac14c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" #include "cell_context.h" +#include "cell_gen_fragment.h" #include "cell_state.h" #include "cell_state_emit.h" #include "cell_state_per_fragment.h" @@ -83,6 +84,29 @@ cell_emit_state(struct cell_context *cell) fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; fb->width = cell->framebuffer.width; fb->height = cell->framebuffer.height; +#if 0 + printf("EMIT color format %s\n", pf_name(fb->color_format)); + printf("EMIT depth format %s\n", pf_name(fb->depth_format)); +#endif + } + + + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL)) { + /* XXX we don't want to always do codegen here. We should have + * a hash/lookup table to cache previous results... + */ + struct cell_command_fragment_ops *fops + = cell_batch_alloc(cell, sizeof(*fops)); + struct spe_function spe_code; + + /* generate new code */ + gen_fragment_function(cell, &spe_code); + /* put the new code into the batch buffer */ + fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; + memcpy(&fops->code, spe_code.store, + SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + /* free codegen buffer */ + spe_release_func(&spe_code); } if (cell->dirty & CELL_NEW_BLEND) { @@ -90,8 +114,7 @@ cell_emit_state(struct cell_context *cell) if (cell->blend != NULL) { blend.base = (intptr_t) cell->blend->code.store; - blend.size = (char *) cell->blend->code.csr - - (char *) cell->blend->code.store; + blend.size = cell->blend->code.num_inst * SPE_INST_SIZE; blend.read_fb = TRUE; } else { @@ -108,10 +131,10 @@ cell_emit_state(struct cell_context *cell) if (cell->depth_stencil != NULL) { dsat.base = (intptr_t) cell->depth_stencil->code.store; - dsat.size = (char *) cell->depth_stencil->code.csr - - (char *) cell->depth_stencil->code.store; + dsat.size = cell->depth_stencil->code.num_inst * SPE_INST_SIZE; dsat.read_depth = TRUE; dsat.read_stencil = FALSE; + dsat.state = cell->depth_stencil->base; } else { dsat.base = 0; diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 705867107bf..78cb446c14a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -1158,7 +1158,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) static int PC_OFFSET(const struct spe_function *f, const void *d) { - const intptr_t pc = (intptr_t) f->csr; + const intptr_t pc = (intptr_t) &f->store[f->num_inst]; const intptr_t ea = ~0x0f & (intptr_t) d; return (ea - pc) >> 2; diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index d49abb2e825..e285ae9fdba 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -43,7 +43,7 @@ INCLUDE_DIRS = \ $(SPU_CC) $(SPU_CFLAGS) -c $< .c.s: - $(SPU_CC) $(SPU_CFLAGS) -S $< + $(SPU_CC) $(SPU_CFLAGS) -O3 -S $< # The .a file will be linked into the main/PPU executable diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index c4236817a9a..4e0ec159250 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -34,6 +34,7 @@ #include "spu_main.h" #include "spu_render.h" +#include "spu_per_fragment_op.h" #include "spu_texture.h" #include "spu_tile.h" //#include "spu_test.h" @@ -46,7 +47,7 @@ /* helpful headers: /usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h -/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h +/opt/cell/sdk/usr/include/libmisc.h */ boolean Debug = FALSE; @@ -226,6 +227,24 @@ cmd_release_verts(const struct cell_command_release_verts *release) } +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ + if (Debug) + printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_ops.code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + /* Point function pointer at new code */ + spu.fragment_ops.func = (spu_fragment_ops_func) spu.fragment_ops.code; +} + + static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { @@ -257,6 +276,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) break; case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: spu.fb.zsize = 4; spu.fb.zscale = (float) 0x00ffffffu; break; @@ -282,6 +303,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) } +#define NEW_FRAGMENT_FUNCTION 01 + static void cmd_state_blend(const struct cell_command_blend *state) { @@ -302,7 +325,9 @@ cmd_state_blend(const struct cell_command_blend *state) wait_on_mask(1 << TAG_BATCH_BUFFER); spu.blend = (blend_func) fb_blend_code_buffer; spu.read_fb = state->read_fb; - } else { + } + else + { spu.read_fb = FALSE; } } @@ -326,7 +351,9 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat 0, /* tid */ 0 /* rid */); wait_on_mask(1 << TAG_BATCH_BUFFER); - } else { + } + else + { /* If there is no code, emit a return instruction. */ depth_stencil_code_buffer[0] = 0x35; @@ -338,12 +365,14 @@ cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *stat spu.frag_test = (frag_test_func) depth_stencil_code_buffer; spu.read_depth = state->read_depth; spu.read_stencil = state->read_stencil; + spu.depth_stencil_alpha = state->state; } static void cmd_state_logicop(const struct cell_command_logicop * code) { +#if !NEW_FRAGMENT_FUNCTION mfc_get(logicop_code_buffer, (unsigned int) code->base, /* src */ code->size, @@ -353,6 +382,7 @@ cmd_state_logicop(const struct cell_command_logicop * code) wait_on_mask(1 << TAG_BATCH_BUFFER); spu.logicop = (logicop_func) logicop_code_buffer; +#endif } @@ -455,7 +485,9 @@ cmd_finish(void) /** - * Execute a batch of commands + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * * The opcode param encodes the location of the buffer and its size. */ static void @@ -519,6 +551,14 @@ cmd_batch(uint opcode) pos += pos_incr; } break; + case CELL_CMD_STATE_FRAGMENT_OPS: + { + struct cell_command_fragment_ops *fops + = (struct cell_command_fragment_ops *) &buffer[pos]; + cmd_state_fragment_ops(fops); + pos += sizeof(*fops) / 8; + } + break; case CELL_CMD_RELEASE_VERTS: { struct cell_command_release_verts *release @@ -680,6 +720,11 @@ one_time_init(void) memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); + + /* Install default/fallback fragment processing function. + * This will normally be overriden by a code-gen'd function. + */ + spu.fragment_ops.func = spu_fallback_fragment_ops; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index c2a53c9dcf6..7ab34f52229 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -91,6 +91,24 @@ typedef struct spu_blend_results (*logicop_func)( typedef vector float (*sample_texture_func)(uint unit, vector float texcoord); + +typedef void (*spu_fragment_ops_func)(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask); + +struct spu_fragment_ops +{ + uint code[SPU_MAX_FRAGMENT_OPS_INSTS]; + spu_fragment_ops_func func; /**< Current fragment ops function */ +} ALIGN16_ATTRIB; + + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -127,6 +145,9 @@ struct spu_global struct cell_init_info init; struct spu_framebuffer fb; + + struct pipe_depth_stencil_alpha_state depth_stencil_alpha; + boolean read_depth; boolean read_stencil; frag_test_func frag_test; /**< Current depth/stencil test code */ @@ -142,6 +163,8 @@ struct spu_global struct vertex_info vertex_info; + struct spu_fragment_ops fragment_ops; + /* XXX more state to come */ diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 29dc07a2e8d..ffc596aa622 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -29,8 +29,11 @@ * \author Ian Romanick <idr@us.ibm.com> */ + +#include <transpose_matrix4x4.h> #include "pipe/p_format.h" #include "spu_main.h" +#include "spu_colorpack.h" #include "spu_per_fragment_op.h" #define ZERO 0x80 @@ -90,7 +93,8 @@ read_ds_quad(tile_t *tile, unsigned x, unsigned y, break; } - case PIPE_FORMAT_S8Z24_UNORM: { + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: { qword *ptr = (qword *) &tile->ui4[iy][ix]; *depth = si_and(*ptr, si_fsmbi(0x7777)); @@ -153,7 +157,8 @@ write_ds_quad(tile_t *buffer, unsigned x, unsigned y, break; } - case PIPE_FORMAT_S8Z24_UNORM: { + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: { qword *ptr = (qword *) &buffer->ui4[iy][ix]; /* form select mask = 0111,0111,0111,0111 */ qword mask = si_fsmbi(0x7777); @@ -217,3 +222,225 @@ spu_do_depth_stencil(int x, int y, return result.mask; } + + + + +/** + * Called by rasterizer for each quad after the shader has run. This + * is a fallback/debug function. In reality we'll use a generated + * function produced by the PPU. But this function is useful for + * debug/validation. + */ +void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask) +{ + vector float frag_soa[4], frag_aos[4]; + unsigned int c0, c1, c2, c3; + + /* do alpha test */ + if (spu.depth_stencil_alpha.alpha.enabled) { + vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); + vector unsigned int amask; + + switch (spu.depth_stencil_alpha.alpha.func) { + case PIPE_FUNC_LESS: + amask = spu_cmpgt(ref, fragAlpha); /* mask = (fragAlpha < ref) */ + break; + case PIPE_FUNC_GREATER: + amask = spu_cmpgt(fragAlpha, ref); /* mask = (fragAlpha > ref) */ + break; + case PIPE_FUNC_GEQUAL: + amask = spu_cmpgt(ref, fragAlpha); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_LEQUAL: + amask = spu_cmpgt(fragAlpha, ref); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_EQUAL: + amask = spu_cmpeq(ref, fragAlpha); + break; + case PIPE_FUNC_NOTEQUAL: + amask = spu_cmpeq(ref, fragAlpha); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_ALWAYS: + amask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + amask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, amask); + } + + /* Z and/or stencil testing... */ + if (spu.depth_stencil_alpha.depth.enabled || + spu.depth_stencil_alpha.stencil[0].enabled) { + + /* get four Z/Stencil values from tile */ + vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); + vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; + vector unsigned int ifbZ = spu_and(ifbZS, mask24); + vector unsigned int ifbS = spu_andc(ifbZS, mask24); + + if (spu.depth_stencil_alpha.stencil[0].enabled) { + /* do stencil test */ + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); + + } + else if (spu.depth_stencil_alpha.depth.enabled) { + /* do depth test */ + + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || + spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); + + vector unsigned int ifragZ; + vector unsigned int zmask; + + /* convert four fragZ from float to uint */ + fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); + ifragZ = spu_convtu(fragZ, 0); + + /* do depth comparison, setting zmask with results */ + switch (spu.depth_stencil_alpha.depth.func) { + case PIPE_FUNC_LESS: + zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ + break; + case PIPE_FUNC_GREATER: + zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ + break; + case PIPE_FUNC_GEQUAL: + zmask = spu_cmpgt(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_LEQUAL: + zmask = spu_cmpgt(ifragZ, ifbZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_EQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + break; + case PIPE_FUNC_NOTEQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_ALWAYS: + zmask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + zmask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, zmask); + + /* merge framebuffer Z and fragment Z according to the mask */ + ifbZ = spu_or(spu_and(ifragZ, mask), + spu_andc(ifbZ, mask)); + } + + if (spu_extract(spu_orx(mask), 0)) { + /* put new fragment Z/Stencil values back into Z/Stencil tile */ + depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); + + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + } + + /* XXX do blending here */ + + /* XXX do colormask test here */ + + + if (spu_extract(spu_orx(mask), 0)) { + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + else { + return; + } + + /* convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA */ +#if 0 + { + vector float frag_soa[4]; + frag_soa[0] = fragRed; + frag_soa[1] = fragGreen; + frag_soa[2] = fragBlue; + frag_soa[3] = fragAlpha; + _transpose_matrix4x4(frag_aos, frag_soa); + } +#else + /* short-cut relying on function parameter layout: */ + _transpose_matrix4x4(frag_aos, &fragRed); + (void) fragGreen; + (void) fragBlue; +#endif + + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + c0 = spu_pack_A8R8G8B8(frag_aos[0]); + c1 = spu_pack_A8R8G8B8(frag_aos[1]); + c2 = spu_pack_A8R8G8B8(frag_aos[2]); + c3 = spu_pack_A8R8G8B8(frag_aos[3]); + break; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + c0 = spu_pack_B8G8R8A8(frag_aos[0]); + c1 = spu_pack_B8G8R8A8(frag_aos[1]); + c2 = spu_pack_B8G8R8A8(frag_aos[2]); + c3 = spu_pack_B8G8R8A8(frag_aos[3]); + break; + default: + fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); + ASSERT(0); + } + +#if 0 + /* + * Quad layout: + * +--+--+ + * |p0|p1| + * +--+--+ + * |p2|p3| + * +--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y+0][x+0] = c0; + if (spu_extract(mask, 1)) + colorTile->ui[y+0][x+1] = c1; + if (spu_extract(mask, 2)) + colorTile->ui[y+1][x+0] = c2; + if (spu_extract(mask, 3)) + colorTile->ui[y+1][x+1] = c3; +#else + /* + * Quad layout: + * +--+--+--+--+ + * |p0|p1|p2|p3| + * +--+--+--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y][x*2] = c0; + if (spu_extract(mask, 1)) + colorTile->ui[y][x*2+1] = c1; + if (spu_extract(mask, 2)) + colorTile->ui[y][x*2+2] = c2; + if (spu_extract(mask, 3)) + colorTile->ui[y][x*2+3] = c3; +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h index 65712586992..ffadf0661ca 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -29,4 +29,15 @@ extern qword spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, qword frag_alpha, qword facing); +extern void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask); + #endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index a3ea0a3e69b..71ef6ca24fb 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -297,9 +297,12 @@ emit_quad( int x, int y, mask_t mask ) sp->quad.first->run(sp->quad.first, &setup.quad); #else +#define NEW_FRAGMENT_FUNCTION 01 +#if !NEW_FRAGMENT_FUNCTION if (spu.read_depth) { mask = do_depth_test(x, y, mask); } +#endif /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { @@ -308,6 +311,7 @@ emit_quad( int x, int y, mask_t mask ) vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; + spu.cur_ztile_status = TILE_STATUS_DIRTY; if (spu.texture[0].start) { /* texture mapping */ @@ -355,6 +359,29 @@ emit_quad( int x, int y, mask_t mask ) } +#if NEW_FRAGMENT_FUNCTION + { + /* Convert fragment data from AoS to SoA format. + * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) + * This is temporary! + */ + vector float soa_frag[4]; + _transpose_matrix4x4(soa_frag, colors); + + float4 fragZ; + + fragZ.v = eval_z((float) x, (float) y); + + /* Do all per-fragment/quad operations here, including: + * alpha test, z test, stencil test, blend and framebuffer writing. + */ + spu.fragment_ops.func(ix, iy, &spu.ctile, &spu.ztile, + fragZ.v, + soa_frag[0], soa_frag[1], + soa_frag[2], soa_frag[3], + mask); + } +#else /* Convert fragment data from AoS to SoA format. * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) */ @@ -405,6 +432,9 @@ emit_quad( int x, int y, mask_t mask ) spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0); spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0); spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0); + +#endif /* NEW_FRAGMENT_FUNCTION */ + } #endif } diff --git a/src/gallium/winsys/xlib/xm_api.c b/src/gallium/winsys/xlib/xm_api.c index b0105131071..28bd6ceab42 100644 --- a/src/gallium/winsys/xlib/xm_api.c +++ b/src/gallium/winsys/xlib/xm_api.c @@ -349,12 +349,17 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type, if (vis->mesa_visual.depthBits == 0) depthFormat = PIPE_FORMAT_NONE; +#ifdef GALLIUM_CELL /* XXX temporary for Cell! */ + else + depthFormat = PIPE_FORMAT_S8Z24_UNORM; +#else else if (vis->mesa_visual.depthBits <= 16) - depthFormat = PIPE_FORMAT_Z16_UNORM; + depthFormat = PIPE_FORMAT_Z16UNORM; else if (vis->mesa_visual.depthBits <= 24) depthFormat = PIPE_FORMAT_S8Z24_UNORM; else depthFormat = PIPE_FORMAT_Z32_UNORM; +#endif if (vis->mesa_visual.stencilBits == 8) { if (depthFormat == PIPE_FORMAT_S8Z24_UNORM) diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 5e9a1f92f11..c4a30d37023 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -275,6 +275,39 @@ xm_buffer_destroy(struct pipe_winsys *pws, } +/** + * For Cell. Basically, rearrange the pixels/quads from this layout: + * +--+--+--+--+ + * |p0|p1|p2|p3|.... + * +--+--+--+--+ + * + * to this layout: + * +--+--+ + * |p0|p1|.... + * +--+--+ + * |p2|p3| + * +--+--+ + */ +static void +twiddle_tile(uint *tile) +{ + uint tile2[TILE_SIZE * TILE_SIZE]; + int y, x; + + for (y = 0; y < TILE_SIZE; y+=2) { + for (x = 0; x < TILE_SIZE; x+=2) { + int k = 4 * (y/2 * TILE_SIZE/2 + x/2); + tile2[y * TILE_SIZE + (x + 0)] = tile[k]; + tile2[y * TILE_SIZE + (x + 1)] = tile[k+1]; + tile2[(y + 1) * TILE_SIZE + (x + 0)] = tile[k+2]; + tile2[(y + 1) * TILE_SIZE + (x + 1)] = tile[k+3]; + } + } + memcpy(tile, tile2, sizeof(tile2)); +} + + + /** * Display a surface that's in a tiled configuration. That is, all the * pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory. @@ -321,6 +354,8 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf) ximage->data = (char *) xm_buf->data + offset; + twiddle_tile((uint *) ximage->data); + if (XSHM_ENABLED(xm_buf)) { #if defined(USE_XSHM) && !defined(XFree86Server) XShmPutImage(b->xm_visual->display, b->drawable, b->gc, -- cgit v1.2.3 From 701fcee65db6b72f98e926d838956bbcc54f1cc6 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 10 Sep 2008 18:51:43 -0600 Subject: cell: remove old per-fragment code, replace with all new code --- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 236 +++------------------ src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 47 ++-- src/gallium/drivers/cell/spu/spu_tri.c | 96 --------- 3 files changed, 48 insertions(+), 331 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index ffc596aa622..9ed5fc50cd3 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -1,32 +1,32 @@ -/* - * (C) Copyright IBM Corporation 2008 +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ /** - * \file spu_per_fragment_op.c - * SPU implementation various per-fragment operations. - * - * \author Ian Romanick <idr@us.ibm.com> + * \author Brian Paul */ @@ -36,194 +36,6 @@ #include "spu_colorpack.h" #include "spu_per_fragment_op.h" -#define ZERO 0x80 - - -/** - * Get a "quad" of four fragment Z/stencil values from the given tile. - * \param tile the tile of Z/stencil values - * \param x, y location of the quad in the tile, in pixels - * \param depth_format format of the tile's data - * \param detph returns four depth values - * \param stencil returns four stencil values - */ -static void -read_ds_quad(tile_t *tile, unsigned x, unsigned y, - enum pipe_format depth_format, qword *depth, - qword *stencil) -{ - const int ix = x / 2; - const int iy = y / 2; - - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: { - qword *ptr = (qword *) &tile->us8[iy][ix / 2]; - - const qword shuf_vec = (qword) { - ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, - ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7 - }; - - /* At even X values we want the first 4 shorts, and at odd X values we - * want the second 4 shorts. - */ - qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3)); - qword bias_mask = si_fsmbi(0x3333); - qword sv = si_a(shuf_vec, si_and(bias_mask, bias)); - - *depth = si_shufb(*ptr, *ptr, sv); - *stencil = si_il(0); - break; - } - - case PIPE_FORMAT_Z32_UNORM: { - qword *ptr = (qword *) &tile->ui4[iy][ix]; - - *depth = *ptr; - *stencil = si_il(0); - break; - } - - case PIPE_FORMAT_Z24S8_UNORM: { - qword *ptr = (qword *) &tile->ui4[iy][ix]; - qword mask = si_fsmbi(0xEEEE); - - *depth = si_rotmai(si_and(*ptr, mask), -8); - *stencil = si_andc(*ptr, mask); - break; - } - - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: { - qword *ptr = (qword *) &tile->ui4[iy][ix]; - - *depth = si_and(*ptr, si_fsmbi(0x7777)); - *stencil = si_andi(si_roti(*ptr, 8), 0x0ff); - break; - } - - default: - ASSERT(0); - break; - } -} - - -/** - * Put a quad of Z/stencil values into a tile. - * \param tile the tile of Z/stencil values to write into - * \param x, y location of the quad in the tile, in pixels - * \param depth_format format of the tile's data - * \param detph depth values to store - * \param stencil stencil values to store - */ -static void -write_ds_quad(tile_t *buffer, unsigned x, unsigned y, - enum pipe_format depth_format, - qword depth, qword stencil) -{ - const int ix = x / 2; - const int iy = y / 2; - - (void) stencil; - - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: { - qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; - - qword sv = ((ix & 0x01) == 0) - ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31 } - : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23, - 2, 3, 6, 7, 10, 11, 14, 15 }; - *ptr = si_shufb(depth, *ptr, sv); - break; - } - - case PIPE_FORMAT_Z32_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - *ptr = depth; - break; - } - - case PIPE_FORMAT_Z24S8_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - /* form select mask = 1110,1110,1110,1110 */ - qword mask = si_fsmbi(0xEEEE); - /* depth[i] = depth[i] << 8 */ - depth = si_shli(depth, 8); - /* *ptr[i] = depth[i][31:8] | stencil[i][7:0] */ - *ptr = si_selb(stencil, depth, mask); - break; - } - - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - /* form select mask = 0111,0111,0111,0111 */ - qword mask = si_fsmbi(0x7777); - /* stencil[i] = stencil[i] << 24 */ - stencil = si_shli(stencil, 24); - /* *ptr[i] = stencil[i][31:24] | depth[i][23:0] */ - *ptr = si_selb(stencil, depth, mask); - break; - } - - default: - ASSERT(0); - break; - } -} - - -/** - * Do depth/stencil/alpha test for a "quad" of 4 fragments. - * \param x,y location of quad within tile - * \param frag_mask indicates which fragments are "alive" - * \param frag_depth four fragment depth values - * \param frag_alpha four fragment alpha values - * \param facing front/back facing for four fragments (1=front, 0=back) - */ -qword -spu_do_depth_stencil(int x, int y, - qword frag_mask, qword frag_depth, qword frag_alpha, - qword facing) -{ - struct spu_frag_test_results result; - qword pixel_depth; - qword pixel_stencil; - - /* All of this preable code (everthing before the call to frag_test) should - * be generated on the PPU and upload to the SPU. - */ - if (spu.read_depth || spu.read_stencil) { - read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, - &pixel_depth, &pixel_stencil); - } - - /* convert floating point Z values to 32-bit uint */ - - /* frag_depth *= spu.fb.zscale */ - frag_depth = si_fm(frag_depth, (qword)spu_splats(spu.fb.zscale)); - /* frag_depth = uint(frag_depth) */ - frag_depth = si_cfltu(frag_depth, 0); - - result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil, - frag_depth, frag_alpha, facing); - - - /* This code (everthing after the call to frag_test) should - * be generated on the PPU and upload to the SPU. - */ - if (spu.read_depth || spu.read_stencil) { - write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, - result.depth, result.stencil); - } - - return result.mask; -} - - /** diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h index ffadf0661ca..f817abf0463 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -1,33 +1,33 @@ -/* - * (C) Copyright IBM Corporation 2008 +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef SPU_PER_FRAGMENT_OP #define SPU_PER_FRAGMENT_OP -extern qword -spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, - qword frag_alpha, qword facing); extern void spu_fallback_fragment_ops(uint x, uint y, @@ -40,4 +40,5 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragAlpha, vector unsigned int mask); + #endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 71ef6ca24fb..a5bf3270c7f 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -38,7 +38,6 @@ #include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" -#include "spu_per_fragment_op.h" /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ @@ -255,31 +254,6 @@ eval_z(float x, float y) } -static INLINE mask_t -do_depth_test(int x, int y, mask_t quadmask) -{ - float4 zvals; - mask_t mask; - - if (spu.fb.depth_format == PIPE_FORMAT_NONE) - return quadmask; - - zvals.v = eval_z((float) x, (float) y); - - mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx, - y - setup.cliprect_miny, - (qword) quadmask, - (qword) zvals.v, - (qword) spu_splats((unsigned char) 0x0ffu), - (qword) spu_splats((unsigned int) 0x01u)); - - if (spu_extract(spu_orx(mask), 0)) - spu.cur_ztile_status = TILE_STATUS_DIRTY; - - return mask; -} - - /** * Emit a quad (pass to next stage). No clipping is done. * Note: about 1/5 to 1/7 of the time, mask is zero and this function @@ -289,21 +263,6 @@ do_depth_test(int x, int y, mask_t quadmask) static INLINE void emit_quad( int x, int y, mask_t mask ) { -#if 0 - struct softpipe_context *sp = setup.softpipe; - setup.quad.x0 = x; - setup.quad.y0 = y; - setup.quad.mask = mask; - sp->quad.first->run(sp->quad.first, &setup.quad); -#else - -#define NEW_FRAGMENT_FUNCTION 01 -#if !NEW_FRAGMENT_FUNCTION - if (spu.read_depth) { - mask = do_depth_test(x, y, mask); - } -#endif - /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; @@ -359,7 +318,6 @@ emit_quad( int x, int y, mask_t mask ) } -#if NEW_FRAGMENT_FUNCTION { /* Convert fragment data from AoS to SoA format. * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) @@ -381,62 +339,8 @@ emit_quad( int x, int y, mask_t mask ) soa_frag[2], soa_frag[3], mask); } -#else - /* Convert fragment data from AoS to SoA format. - * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) - */ - qword soa_frag[4]; - _transpose_matrix4x4((vec_float4 *) soa_frag, colors); - - /* Read the current framebuffer values. - */ - const qword pix[4] = { - (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]), - (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]), - (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]), - (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]), - }; - - qword soa_pix[4]; - - if (spu.read_fb) { - /* Convert pixel data from AoS to SoA format. - * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) - */ - vec_float4 aos_pix[4] = { - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]), - }; - - _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix); - } - - - struct spu_blend_results result = - (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], - soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3], - spu.const_blend_color[0], spu.const_blend_color[1], - spu.const_blend_color[2], spu.const_blend_color[3]); - - - /* Convert final pixel data from SoA to AoS format. - * I.e. (RRRR,GGGG,BBBB,AAAA) -> (RGBA,RGBA,RGBA,RGBA) - */ - result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3], - result.r, result.g, result.b, result.a, - (qword) mask); - - spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0); - spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0); - spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0); - spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0); - -#endif /* NEW_FRAGMENT_FUNCTION */ } -#endif } -- cgit v1.2.3 From 5336e758a483d15d579ffe7cad536be95637d904 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 08:44:54 -0600 Subject: cell: added cast in spu_splats() call --- src/gallium/drivers/cell/spu/spu_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 5051774f00c..117b8a36f80 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -97,7 +97,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) const qword offset_y = si_andi((qword) y, 0x1f); const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); - const qword tile_size = (qword) spu_splats(sizeof(tile_t)); + const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); tile_offset = si_mpy((qword) tile_offset, tile_size); -- cgit v1.2.3 From 6092a057042c9f7a4cae0f0eb9e95307f5f850a1 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 09:55:39 -0600 Subject: cell: fix shuffle in spu_unpack_B8G8R8A8() --- src/gallium/drivers/cell/spu/spu_colorpack.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h index e9fee8a3a61..fd8dc6ded3e 100644 --- a/src/gallium/drivers/cell/spu/spu_colorpack.h +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -79,14 +79,14 @@ spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) static INLINE vector float -spu_unpack_color(uint color) +spu_unpack_B8G8R8A8(uint color) { vector unsigned int color_u4 = spu_splats(color); color_u4 = spu_shuffle(color_u4, color_u4, ((vector unsigned char) { - 0, 0, 0, 0, - 5, 5, 5, 5, 10, 10, 10, 10, + 5, 5, 5, 5, + 0, 0, 0, 0, 15, 15, 15, 15}) ); return spu_convtf(color_u4, 32); } -- cgit v1.2.3 From add86031db757b0e3abe48bd8fdea40d4e380e05 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:08:06 -0600 Subject: cell: begin new blending code (both codegen and fallback paths) --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 420 ++++++++++++++++++--- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 232 ++++++++++-- 2 files changed, 584 insertions(+), 68 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index df29476be66..7966c0916c7 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -231,6 +231,370 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, +/** + * Generate SPE code to implement the given blend mode for a quad of pixels. + * \param f SPE function to append instruction onto. + * \param fragR_reg register with fragment red values (float) (in/out) + * \param fragG_reg register with fragment green values (float) (in/out) + * \param fragB_reg register with fragment blue values (float) (in/out) + * \param fragA_reg register with fragment alpha values (float) (in/out) + * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) + */ +static void +gen_blend(const struct pipe_blend_state *blend, + struct spe_function *f, + enum pipe_format color_format, + int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, + int fbRGBA_reg) +{ + int term1R_reg = spe_allocate_available_register(f); + int term1G_reg = spe_allocate_available_register(f); + int term1B_reg = spe_allocate_available_register(f); + int term1A_reg = spe_allocate_available_register(f); + + int term2R_reg = spe_allocate_available_register(f); + int term2G_reg = spe_allocate_available_register(f); + int term2B_reg = spe_allocate_available_register(f); + int term2A_reg = spe_allocate_available_register(f); + + int fbR_reg = spe_allocate_available_register(f); + int fbG_reg = spe_allocate_available_register(f); + int fbB_reg = spe_allocate_available_register(f); + int fbA_reg = spe_allocate_available_register(f); + + int one_reg = spe_allocate_available_register(f); + int tmp_reg = spe_allocate_available_register(f); + + ASSERT(blend->blend_enable); + + /* Unpack/convert framebuffer colors from four 32-bit packed colors + * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). + * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + */ + { + int mask_reg = spe_allocate_available_register(f); + + /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ + spe_fsmbi(f, mask_reg, 0x1111); + + /* XXX there may be more clever ways to implement the following code */ + switch (color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* fbB = fbB & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 8 */ + spe_roti(f, fbB_reg, fbB_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 16 */ + spe_roti(f, fbB_reg, fbB_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* fbA = fbA >> 24 */ + spe_roti(f, fbA_reg, fbA_reg, -24); + break; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* fbA = fbA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 8 */ + spe_roti(f, fbR_reg, fbR_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 16 */ + spe_roti(f, fbG_reg, fbG_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* fbB = fbB >> 24 */ + spe_roti(f, fbB_reg, fbB_reg, -24); + break; + + default: + ASSERT(0); + } + + /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ + spe_cuflt(f, fbR_reg, fbR_reg, 8); + spe_cuflt(f, fbG_reg, fbG_reg, 8); + spe_cuflt(f, fbB_reg, fbB_reg, 8); + spe_cuflt(f, fbA_reg, fbA_reg, 8); + + spe_release_register(f, mask_reg); + } + + + /* + * Compute Src RGB terms + */ + switch (blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term1R_reg, fragR_reg); + spe_move(f, term1G_reg, fragG_reg); + spe_move(f, term1B_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term1R_reg); + spe_zero(f, term1G_reg); + spe_zero(f, term1B_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term1R_reg, fragR_reg, fragR_reg); + spe_fm(f, term1G_reg, fragG_reg, fragG_reg); + spe_fm(f, term1B_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1R_reg, fragR_reg, fragA_reg); + spe_fm(f, term1G_reg, fragG_reg, fragA_reg); + spe_fm(f, term1B_reg, fragB_reg, fragA_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term + */ + switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term1A_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms + */ + switch (blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term2R_reg, fbR_reg); + spe_move(f, term2G_reg, fbG_reg); + spe_move(f, term2B_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term2R_reg); + spe_zero(f, term2G_reg); + spe_zero(f, term2B_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term2R_reg, fbR_reg, fragR_reg); + spe_fm(f, term2G_reg, fbG_reg, fragG_reg); + spe_fm(f, term2B_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term2R_reg, fbR_reg, fragA_reg); + spe_fm(f, term2G_reg, fbG_reg, fragA_reg); + spe_fm(f, term2B_reg, fbB_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(f, one_reg, 1.0f); + /* tmp = one - fragA */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* term = fb * tmp */ + spe_fm(f, term2R_reg, fbR_reg, tmp_reg); + spe_fm(f, term2G_reg, fbG_reg, tmp_reg); + spe_fm(f, term2B_reg, fbB_reg, tmp_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term + */ + switch (blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term2A_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term2A_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term2A_reg, fbA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(f, one_reg, 1.0f); + /* tmp = one - fragA */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* termA = fbA * tmp */ + spe_fm(f, term2A_reg, fbA_reg, tmp_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (blend->rgb_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragR_reg, term1R_reg, term2R_reg); + spe_fa(f, fragG_reg, term1G_reg, term2G_reg); + spe_fa(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragR_reg, term1R_reg, term2R_reg); + spe_fs(f, fragG_reg, term1G_reg, term2G_reg); + spe_fs(f, fragB_reg, term1B_reg, term2B_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (blend->alpha_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragA_reg, term1A_reg, term2A_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + spe_release_register(f, term1R_reg); + spe_release_register(f, term1G_reg); + spe_release_register(f, term1B_reg); + spe_release_register(f, term1A_reg); + + spe_release_register(f, term2R_reg); + spe_release_register(f, term2G_reg); + spe_release_register(f, term2B_reg); + spe_release_register(f, term2A_reg); + + spe_release_register(f, fbR_reg); + spe_release_register(f, fbG_reg); + spe_release_register(f, fbB_reg); + spe_release_register(f, fbA_reg); + + spe_release_register(f, one_reg); + spe_release_register(f, tmp_reg); +} + + +static void +gen_logicop(const struct pipe_blend_state *blend, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* XXX to-do */ + /* operate on 32-bit packed pixels, not float colors */ +} + + +static void +gen_colormask(uint colormask, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* XXX to-do */ + /* operate on 32-bit packed pixels, not float colors */ +} + + + +/** + * Generate code to pack a quad of float colors into a four 32-bit integers. + * + * \param f SPE function to append instruction onto. + * \param color_format the dest color packing format + * \param r_reg register containing four red values (in/clobbered) + * \param g_reg register containing four green values (in/clobbered) + * \param b_reg register containing four blue values (in/clobbered) + * \param a_reg register containing four alpha values (in/clobbered) + * \param rgba_reg register to store the packed RGBA colors (out) + */ +static void +gen_pack_colors(struct spe_function *f, + enum pipe_format color_format, + int r_reg, int g_reg, int b_reg, int a_reg, + int rgba_reg) +{ + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ + spe_cfltu(f, r_reg, r_reg, 32); + spe_cfltu(f, g_reg, g_reg, 32); + spe_cfltu(f, b_reg, b_reg, 32); + spe_cfltu(f, a_reg, a_reg, 32); + + /* Shift the most significant bytes to least the significant positions. + * I.e.: reg = reg >> 24 + */ + spe_rotmi(f, r_reg, r_reg, -24); + spe_rotmi(f, g_reg, g_reg, -24); + spe_rotmi(f, b_reg, b_reg, -24); + spe_rotmi(f, a_reg, a_reg, -24); + + /* Shift the color bytes according to the surface format */ + if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { + spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ + spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ + spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ + } + else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { + spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ + spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ + spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ + } + else { + ASSERT(0); + } + + /* Merge red, green, blue, alpha registers to make packed RGBA colors. + * Eg: after shifting according to color_format we might have: + * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} + * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} + * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} + * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} + * OR-ing all those together gives us four packed colors: + * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} + */ + spe_or(f, rgba_reg, r_reg, g_reg); + spe_or(f, rgba_reg, rgba_reg, b_reg); + spe_or(f, rgba_reg, rgba_reg, a_reg); +} + + + + /** * Generate SPE code to implement the fragment operations (alpha test, * depth test, stencil test, blending, colormask, and final @@ -257,6 +621,7 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f) const struct pipe_depth_stencil_alpha_state *dsa = &cell->depth_stencil->base; const struct pipe_blend_state *blend = &cell->blend->base; + const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ const int x_reg = 3; /* uint */ @@ -443,64 +808,31 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f) if (blend->blend_enable) { - /* convert packed tile colors in fbRGBA_reg to float[4] vectors */ - - // gen_blend_code(blend, f, mask_reg, ... ); - + gen_blend(blend, f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } - - /* * Write fragment colors to framebuffer/tile. * This involves converting the fragment colors from float[4] to the * tile's specific format and obeying the quad/pixel mask. */ { - const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; int rgba_reg = spe_allocate_available_register(f); - /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ - spe_cfltu(f, fragR_reg, fragR_reg, 32); - spe_cfltu(f, fragG_reg, fragG_reg, 32); - spe_cfltu(f, fragB_reg, fragB_reg, 32); - spe_cfltu(f, fragA_reg, fragA_reg, 32); + /* Pack four float colors as four 32-bit int colors */ + gen_pack_colors(f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, + rgba_reg); - /* Shift most the significant bytes to least the significant positions. - * I.e.: reg = reg >> 24 - */ - spe_rotmi(f, fragR_reg, fragR_reg, -24); - spe_rotmi(f, fragG_reg, fragG_reg, -24); - spe_rotmi(f, fragB_reg, fragB_reg, -24); - spe_rotmi(f, fragA_reg, fragA_reg, -24); - - /* Shift the color bytes according to the surface format */ - if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { - spe_roti(f, fragG_reg, fragG_reg, 8); /* green <<= 8 */ - spe_roti(f, fragR_reg, fragR_reg, 16); /* red <<= 16 */ - spe_roti(f, fragA_reg, fragA_reg, 24); /* alpha <<= 24 */ - } - else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { - spe_roti(f, fragR_reg, fragR_reg, 8); /* red <<= 8 */ - spe_roti(f, fragG_reg, fragG_reg, 16); /* green <<= 16 */ - spe_roti(f, fragB_reg, fragB_reg, 24); /* blue <<= 24 */ + if (blend->logicop_enable) { + gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } - else { - ASSERT(0); + + if (blend->colormask != 0xf) { + gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); } - /* Merge red, green, blue, alpha registers to make packed RGBA colors. - * Eg: after shifting according to color_format we might have: - * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} - * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} - * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} - * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} - * OR-ing all those together gives us four packed colors: - * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} - */ - spe_or(f, rgba_reg, fragR_reg, fragG_reg); - spe_or(f, rgba_reg, rgba_reg, fragB_reg); - spe_or(f, rgba_reg, rgba_reg, fragA_reg); /* Mix fragment colors with framebuffer colors using the quad/pixel mask: * if (mask[i]) diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 9ed5fc50cd3..3f0eabaa050 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -39,9 +39,11 @@ /** - * Called by rasterizer for each quad after the shader has run. This - * is a fallback/debug function. In reality we'll use a generated - * function produced by the PPU. But this function is useful for + * Called by rasterizer for each quad after the shader has run. Do + * all the per-fragment operations including alpha test, z test, + * stencil test, blend, colormask and logicops. This is a + * fallback/debug function. In reality we'll use a generated function + * produced by the PPU. But this function is useful for * debug/validation. */ void @@ -49,13 +51,13 @@ spu_fallback_fragment_ops(uint x, uint y, tile_t *colorTile, tile_t *depthStencilTile, vector float fragZ, - vector float fragRed, - vector float fragGreen, - vector float fragBlue, - vector float fragAlpha, + vector float fragR, + vector float fragG, + vector float fragB, + vector float fragA, vector unsigned int mask) { - vector float frag_soa[4], frag_aos[4]; + vector float frag_aos[4]; unsigned int c0, c1, c2, c3; /* do alpha test */ @@ -65,24 +67,24 @@ spu_fallback_fragment_ops(uint x, uint y, switch (spu.depth_stencil_alpha.alpha.func) { case PIPE_FUNC_LESS: - amask = spu_cmpgt(ref, fragAlpha); /* mask = (fragAlpha < ref) */ + amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ break; case PIPE_FUNC_GREATER: - amask = spu_cmpgt(fragAlpha, ref); /* mask = (fragAlpha > ref) */ + amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ break; case PIPE_FUNC_GEQUAL: - amask = spu_cmpgt(ref, fragAlpha); + amask = spu_cmpgt(ref, fragA); amask = spu_nor(amask, amask); break; case PIPE_FUNC_LEQUAL: - amask = spu_cmpgt(fragAlpha, ref); + amask = spu_cmpgt(fragA, ref); amask = spu_nor(amask, amask); break; case PIPE_FUNC_EQUAL: - amask = spu_cmpeq(ref, fragAlpha); + amask = spu_cmpeq(ref, fragA); break; case PIPE_FUNC_NOTEQUAL: - amask = spu_cmpeq(ref, fragAlpha); + amask = spu_cmpeq(ref, fragA); amask = spu_nor(amask, amask); break; case PIPE_FUNC_ALWAYS: @@ -174,7 +176,189 @@ spu_fallback_fragment_ops(uint x, uint y, } } - /* XXX do blending here */ + if (spu.blend.blend_enable) { + vector float term1r, term1g, term1b, term1a; + vector float term2r, term2g, term2b, term2a; + + vector float fbRGBA[4]; + + vector float one, tmp; + + /* get colors from framebuffer */ + { + vector float fc[4]; + uint c0, c1, c2, c3; +#if 0 + c0 = colorTile->ui[y+0][x+0]; + c1 = colorTile->ui[y+0][x+1]; + c2 = colorTile->ui[y+1][x+0]; + c3 = colorTile->ui[y+1][x+1]; +#else + c0 = colorTile->ui[y][x*2+0]; + c1 = colorTile->ui[y][x*2+1]; + c2 = colorTile->ui[y][x*2+2]; + c3 = colorTile->ui[y][x*2+3]; +#endif + switch (spu.fb.color_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + fc[0] = spu_unpack_B8G8R8A8(c0); + fc[1] = spu_unpack_B8G8R8A8(c1); + fc[2] = spu_unpack_B8G8R8A8(c2); + fc[3] = spu_unpack_B8G8R8A8(c3); + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + fc[0] = spu_unpack_A8R8G8B8(c0); + fc[1] = spu_unpack_A8R8G8B8(c1); + fc[2] = spu_unpack_A8R8G8B8(c2); + fc[3] = spu_unpack_A8R8G8B8(c3); + break; + default: + ASSERT(0); + } + _transpose_matrix4x4(fbRGBA, fc); + } + + /* + * Compute Src RGB terms + */ + switch (spu.blend.rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1r = fragR; + term1g = fragG; + term1b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term1r = + term1g = + term1b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1r = spu_mul(fragR, fragR); + term1g = spu_mul(fragG, fragG); + term1b = spu_mul(fragB, fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1r = spu_mul(fragR, fragA); + term1g = spu_mul(fragG, fragA); + term1b = spu_mul(fragB, fragA); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term + */ + switch (spu.blend.alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1a = spu_mul(fragA, fragA); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms + */ + switch (spu.blend.rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2r = fragR; + term2g = fragG; + term2b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term2r = + term2g = + term2b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2r = spu_mul(fbRGBA[0], fragR); + term2g = spu_mul(fbRGBA[1], fragG); + term2b = spu_mul(fbRGBA[2], fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2r = spu_mul(fbRGBA[0], fragA); + term2g = spu_mul(fbRGBA[1], fragA); + term2b = spu_mul(fbRGBA[2], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2r = spu_mul(fbRGBA[0], tmp); + term2g = spu_mul(fbRGBA[1], tmp); + term2b = spu_mul(fbRGBA[2], tmp); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term + */ + switch (spu.blend.alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2a = spu_mul(fbRGBA[3], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2a = spu_mul(fbRGBA[3], tmp); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (spu.blend.rgb_func) { + case PIPE_BLEND_ADD: + fragR = spu_add(term1r, term2r); + fragG = spu_add(term1g, term2g); + fragB = spu_add(term1b, term2b); + break; + case PIPE_BLEND_SUBTRACT: + fragR = spu_sub(term1r, term2r); + fragG = spu_sub(term1g, term2g); + fragB = spu_sub(term1b, term2b); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (spu.blend.alpha_func) { + case PIPE_BLEND_ADD: + fragA = spu_add(term1a, term2a); + break; + case PIPE_BLEND_SUBTRACT: + fragA = spu_sub(term1a, term2a); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + } + /* XXX do colormask test here */ @@ -190,17 +374,17 @@ spu_fallback_fragment_ops(uint x, uint y, #if 0 { vector float frag_soa[4]; - frag_soa[0] = fragRed; - frag_soa[1] = fragGreen; - frag_soa[2] = fragBlue; - frag_soa[3] = fragAlpha; + frag_soa[0] = fragR; + frag_soa[1] = fragG; + frag_soa[2] = fragB; + frag_soa[3] = fragA; _transpose_matrix4x4(frag_aos, frag_soa); } #else /* short-cut relying on function parameter layout: */ - _transpose_matrix4x4(frag_aos, &fragRed); - (void) fragGreen; - (void) fragBlue; + _transpose_matrix4x4(frag_aos, &fragR); + (void) fragG; + (void) fragB; #endif switch (spu.fb.color_format) { @@ -238,7 +422,7 @@ spu_fallback_fragment_ops(uint x, uint y, if (spu_extract(mask, 2)) colorTile->ui[y+1][x+0] = c2; if (spu_extract(mask, 3)) - colorTile->ui[y+1][x+1] = c3; + colorTile->ui[y+1][x+1] = c3; #else /* * Quad layout: @@ -253,6 +437,6 @@ spu_fallback_fragment_ops(uint x, uint y, if (spu_extract(mask, 2)) colorTile->ui[y][x*2+2] = c2; if (spu_extract(mask, 3)) - colorTile->ui[y][x*2+3] = c3; + colorTile->ui[y][x*2+3] = c3; #endif } -- cgit v1.2.3 From 283ffdf99605c536d00e03ad6ec91a6f8e006fc2 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:13:20 -0600 Subject: cell: checkpoint: remove more of the old per-fragment code --- src/gallium/drivers/cell/common.h | 2 + src/gallium/drivers/cell/ppu/Makefile | 1 - src/gallium/drivers/cell/ppu/cell_state_emit.c | 60 ++----------- src/gallium/drivers/cell/spu/spu_main.c | 115 +++---------------------- src/gallium/drivers/cell/spu/spu_main.h | 37 +------- 5 files changed, 19 insertions(+), 196 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index a62530c64da..61d2b7d1aed 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -146,6 +146,8 @@ struct cell_command_logicop struct cell_command_fragment_ops { uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; }; diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index b5a6fcb8deb..8699f3f8ec2 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -28,7 +28,6 @@ SOURCES = \ cell_gen_fragment.c \ cell_state_derived.c \ cell_state_emit.c \ - cell_state_per_fragment.c \ cell_state_shader.c \ cell_pipe_state.c \ cell_screen.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 06777aac14c..2bfb976c597 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -55,23 +55,6 @@ emit_state_cmd(struct cell_context *cell, uint cmd, void cell_emit_state(struct cell_context *cell) { - if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_BLEND)) { - struct cell_command_logicop logicop; - - if (cell->logic_op.store != NULL) { - spe_release_func(& cell->logic_op); - } - - cell_generate_logic_op(& cell->logic_op, - & cell->blend->base, - cell->framebuffer.cbufs[0]); - - logicop.base = (intptr_t) cell->logic_op.store; - logicop.size = 64 * 4; - emit_state_cmd(cell, CELL_CMD_STATE_LOGICOP, &logicop, - sizeof(logicop)); - } - if (cell->dirty & CELL_NEW_FRAMEBUFFER) { struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; struct pipe_surface *zbuf = cell->framebuffer.zsbuf; @@ -91,7 +74,9 @@ cell_emit_state(struct cell_context *cell) } - if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL)) { + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | + CELL_NEW_DEPTH_STENCIL | + CELL_NEW_BLEND)) { /* XXX we don't want to always do codegen here. We should have * a hash/lookup table to cache previous results... */ @@ -105,47 +90,12 @@ cell_emit_state(struct cell_context *cell) fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; memcpy(&fops->code, spe_code.store, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + fops->dsa = cell->depth_stencil->base; + fops->blend = cell->blend->base; /* free codegen buffer */ spe_release_func(&spe_code); } - if (cell->dirty & CELL_NEW_BLEND) { - struct cell_command_blend blend; - - if (cell->blend != NULL) { - blend.base = (intptr_t) cell->blend->code.store; - blend.size = cell->blend->code.num_inst * SPE_INST_SIZE; - blend.read_fb = TRUE; - } - else { - blend.base = 0; - blend.size = 0; - blend.read_fb = FALSE; - } - - emit_state_cmd(cell, CELL_CMD_STATE_BLEND, &blend, sizeof(blend)); - } - - if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { - struct cell_command_depth_stencil_alpha_test dsat; - - if (cell->depth_stencil != NULL) { - dsat.base = (intptr_t) cell->depth_stencil->code.store; - dsat.size = cell->depth_stencil->code.num_inst * SPE_INST_SIZE; - dsat.read_depth = TRUE; - dsat.read_stencil = FALSE; - dsat.state = cell->depth_stencil->base; - } - else { - dsat.base = 0; - dsat.size = 0; - dsat.read_depth = FALSE; - dsat.read_stencil = FALSE; - } - - emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, sizeof(dsat)); - } - if (cell->dirty & CELL_NEW_SAMPLER) { uint i; for (i = 0; i < CELL_MAX_SAMPLERS; i++) { diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 4e0ec159250..6afca19dfd7 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -63,14 +63,6 @@ struct spu_vs_context draw; static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] ALIGN16_ATTRIB; -static unsigned char depth_stencil_code_buffer[4 * 64] - ALIGN16_ATTRIB; - -static unsigned char fb_blend_code_buffer[4 * 64] - ALIGN16_ATTRIB; - -static unsigned char logicop_code_buffer[4 * 64] - ALIGN16_ATTRIB; /** @@ -240,8 +232,15 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops.code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + /* Copy state info */ + memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); + memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + /* Point function pointer at new code */ spu.fragment_ops.func = (spu_fragment_ops_func) spu.fragment_ops.code; + + spu.read_depth = spu.depth_stencil_alpha.depth.enabled; + spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; } @@ -303,89 +302,6 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) } -#define NEW_FRAGMENT_FUNCTION 01 - -static void -cmd_state_blend(const struct cell_command_blend *state) -{ - if (Debug) - printf("SPU %u: BLEND: enabled %d\n", - spu.init.id, - (state->size != 0)); - - ASSERT_ALIGN16(state->base); - - if (state->size != 0) { - mfc_get(fb_blend_code_buffer, - (unsigned int) state->base, /* src */ - ROUNDUP16(state->size), - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - spu.blend = (blend_func) fb_blend_code_buffer; - spu.read_fb = state->read_fb; - } - else - { - spu.read_fb = FALSE; - } -} - - -static void -cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state) -{ - if (Debug) - printf("SPU %u: DEPTH_STENCIL: ztest %d\n", - spu.init.id, - state->read_depth); - - ASSERT_ALIGN16(state->base); - - if (state->size != 0) { - mfc_get(depth_stencil_code_buffer, - (unsigned int) state->base, /* src */ - ROUNDUP16(state->size), - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - } - else - { - /* If there is no code, emit a return instruction. - */ - depth_stencil_code_buffer[0] = 0x35; - depth_stencil_code_buffer[1] = 0x00; - depth_stencil_code_buffer[2] = 0x00; - depth_stencil_code_buffer[3] = 0x00; - } - - spu.frag_test = (frag_test_func) depth_stencil_code_buffer; - spu.read_depth = state->read_depth; - spu.read_stencil = state->read_stencil; - spu.depth_stencil_alpha = state->state; -} - - -static void -cmd_state_logicop(const struct cell_command_logicop * code) -{ -#if !NEW_FRAGMENT_FUNCTION - mfc_get(logicop_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - spu.logicop = (logicop_func) logicop_code_buffer; -#endif -} - - static void cmd_state_sampler(const struct cell_command_sampler *sampler) { @@ -571,15 +487,6 @@ cmd_batch(uint opcode) cmd_finish(); pos += 1; break; - case CELL_CMD_STATE_BLEND: - cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8); - break; - case CELL_CMD_STATE_DEPTH_STENCIL: - cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8); - break; case CELL_CMD_STATE_SAMPLER: { struct cell_command_sampler *sampler @@ -614,19 +521,17 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; case CELL_CMD_STATE_BIND_VS: +#if 01 spu_bind_vertex_shader(&draw, (struct cell_shader_info *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); +#endif break; case CELL_CMD_STATE_ATTRIB_FETCH: cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); break; - case CELL_CMD_STATE_LOGICOP: - cmd_state_logicop((struct cell_command_logicop *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8); - break; case CELL_CMD_FLUSH_BUFFER_RANGE: { struct cell_buffer_range *br = (struct cell_buffer_range *) &buffer[pos+1]; @@ -695,7 +600,9 @@ main_loop(void) exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: +#if 01 spu_execute_vertex_shader(&draw, &cmd.vs); +#endif break; case CELL_CMD_BATCH: cmd_batch(opcode); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 7ab34f52229..f0f8be47db6 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -60,35 +60,6 @@ typedef union { #define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ -struct spu_frag_test_results { - qword mask; - qword depth; - qword stencil; -}; - -typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask, - qword pixel_depth, qword pixel_stencil, qword frag_depth, - qword frag_alpha, qword facing); - - -struct spu_blend_results { - qword r; - qword g; - qword b; - qword a; -}; - -typedef struct spu_blend_results (*blend_func)( - qword frag_r, qword frag_g, qword frag_b, qword frag_a, - qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, - qword const_r, qword const_g, qword const_b, qword const_a); - -typedef struct spu_blend_results (*logicop_func)( - qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, - qword frag_r, qword frag_g, qword frag_b, qword frag_a, - qword frag_mask); - - typedef vector float (*sample_texture_func)(uint unit, vector float texcoord); @@ -147,16 +118,10 @@ struct spu_global struct spu_framebuffer fb; struct pipe_depth_stencil_alpha_state depth_stencil_alpha; + struct pipe_blend_state blend; boolean read_depth; boolean read_stencil; - frag_test_func frag_test; /**< Current depth/stencil test code */ - - boolean read_fb; /**< Does current blend mode require framebuffer read? */ - blend_func blend; /**< Current blend code */ - qword const_blend_color[4] ALIGN16_ATTRIB; - - logicop_func logicop; /**< Current logicop code **/ struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct spu_texture texture[PIPE_MAX_SAMPLERS]; -- cgit v1.2.3 From aa4a08d429712fa516342ec02253c2591794ea5f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:25:38 -0600 Subject: cell: asst. clean-up --- src/gallium/drivers/cell/spu/spu_main.c | 23 +++++----------- src/gallium/drivers/cell/spu/spu_main.h | 47 +++++++++++++++------------------ src/gallium/drivers/cell/spu/spu_tri.c | 10 +++---- 3 files changed, 32 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 6afca19dfd7..29686964d2f 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -231,13 +231,13 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) if (Debug) printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_ops.code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); /* Copy state info */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); /* Point function pointer at new code */ - spu.fragment_ops.func = (spu_fragment_ops_func) spu.fragment_ops.code; + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; spu.read_depth = spu.depth_stencil_alpha.depth.enabled; spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; @@ -288,17 +288,6 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) spu.fb.zsize = 0; break; } - - if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) - spu.color_shuffle = ((vector unsigned char) { - 12, 0, 4, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}); - else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) - spu.color_shuffle = ((vector unsigned char) { - 8, 4, 0, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}); - else - ASSERT(0); } @@ -521,11 +510,11 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; case CELL_CMD_STATE_BIND_VS: -#if 01 +#if 0 spu_bind_vertex_shader(&draw, (struct cell_shader_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); #endif + pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); break; case CELL_CMD_STATE_ATTRIB_FETCH: cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) @@ -600,7 +589,7 @@ main_loop(void) exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: -#if 01 +#if 0 spu_execute_vertex_shader(&draw, &cmd.vs); #endif break; @@ -631,7 +620,7 @@ one_time_init(void) /* Install default/fallback fragment processing function. * This will normally be overriden by a code-gen'd function. */ - spu.fragment_ops.func = spu_fallback_fragment_ops; + spu.fragment_ops = spu_fallback_fragment_ops; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index f0f8be47db6..d40539da83b 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -60,9 +60,11 @@ typedef union { #define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ -typedef vector float (*sample_texture_func)(uint unit, vector float texcoord); - +/** Function for sampling textures */ +typedef vector float (*spu_sample_texture_func)(uint unit, + vector float texcoord); +/** Function for performing per-fragment ops */ typedef void (*spu_fragment_ops_func)(uint x, uint y, tile_t *colorTile, tile_t *depthStencilTile, @@ -73,14 +75,8 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, vector float fragAlpha, vector unsigned int mask); -struct spu_fragment_ops +struct spu_framebuffer { - uint code[SPU_MAX_FRAGMENT_OPS_INSTS]; - spu_fragment_ops_func func; /**< Current fragment ops function */ -} ALIGN16_ATTRIB; - - -struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ enum pipe_format color_format; @@ -109,34 +105,31 @@ struct spu_texture /** - * All SPU global/context state will be in singleton object of this type: + * All SPU global/context state will be in a singleton object of this type: */ struct spu_global { + /** One-time init/constant info */ struct cell_init_info init; + /* + * Current state + */ struct spu_framebuffer fb; - struct pipe_depth_stencil_alpha_state depth_stencil_alpha; struct pipe_blend_state blend; - - boolean read_depth; - boolean read_stencil; - struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct spu_texture texture[PIPE_MAX_SAMPLERS]; - struct vertex_info vertex_info; - struct spu_fragment_ops fragment_ops; - - /* XXX more state to come */ - - - /** current color and Z tiles */ + /** Current color and Z tiles */ tile_t ctile ALIGN16_ATTRIB; tile_t ztile ALIGN16_ATTRIB; + /** Read depth/stencil tiles? */ + boolean read_depth; + boolean read_stencil; + /** Current tiles' status */ ubyte cur_ctile_status, cur_ztile_status; @@ -144,11 +137,13 @@ struct spu_global ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + /** Current fragment ops machine code */ + uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS]; + /** Current fragment ops function */ + spu_fragment_ops_func fragment_ops; - /** for converting RGBA to PIPE_FORMAT_x colors */ - vector unsigned char color_shuffle; - - sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; + /** Current texture sampler function */ + spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index a5bf3270c7f..f02cdd1f763 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -333,11 +333,11 @@ emit_quad( int x, int y, mask_t mask ) /* Do all per-fragment/quad operations here, including: * alpha test, z test, stencil test, blend and framebuffer writing. */ - spu.fragment_ops.func(ix, iy, &spu.ctile, &spu.ztile, - fragZ.v, - soa_frag[0], soa_frag[1], - soa_frag[2], soa_frag[3], - mask); + spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, + fragZ.v, + soa_frag[0], soa_frag[1], + soa_frag[2], soa_frag[3], + mask); } } -- cgit v1.2.3 From f19903aa83e9b6e18930cbda14cfec3cca2e1bf2 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:26:00 -0600 Subject: cell: remove old blend/depth/stencil/logicop structs --- src/gallium/drivers/cell/common.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 61d2b7d1aed..8aa2b23ec09 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -111,35 +111,6 @@ #define CELL_DEBUG_SYNC (1 << 1) -/** - */ -struct cell_command_depth_stencil_alpha_test -{ - uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of SPE code. */ - unsigned read_depth; /**< Flag: should depth be read? */ - unsigned read_stencil; /**< Flag: should stencil be read? */ - struct pipe_depth_stencil_alpha_state state; -}; - - -/** - * Upload code to perform framebuffer blend operation - */ -struct cell_command_blend -{ - uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of SPE code. */ - unsigned read_fb; /**< Flag: should framebuffer be read? */ -}; - - -struct cell_command_logicop -{ - uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of SPE code. */ -}; - #define SPU_MAX_FRAGMENT_OPS_INSTS 64 -- cgit v1.2.3 From 924653e37db4501d0f03721e9d74abffe46a3c72 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:27:17 -0600 Subject: cell: don't build unused sources --- src/gallium/drivers/cell/spu/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index e285ae9fdba..1ae0dfb8c10 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -22,12 +22,15 @@ SOURCES = \ spu_render.c \ spu_texture.c \ spu_tile.c \ - spu_tri.c \ + spu_tri.c + +OLD_SOURCES = \ spu_exec.c \ spu_util.c \ spu_vertex_fetch.c \ spu_vertex_shader.c + SPU_OBJECTS = $(SOURCES:.c=.o) \ SPU_ASM_OUT = $(SOURCES:.c=.s) \ -- cgit v1.2.3 From a558369ec66e3d9e2b88f4df9a3b5a3704b19ef3 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:33:13 -0600 Subject: cell: disable NEW_VS emit --- src/gallium/drivers/cell/ppu/cell_state_emit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 2bfb976c597..180b89c1f66 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -133,7 +133,8 @@ cell_emit_state(struct cell_context *cell) emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, &cell->vertex_info, sizeof(struct vertex_info)); } - + +#if 0 if (cell->dirty & CELL_NEW_VS) { const struct draw_context *const draw = cell->draw; struct cell_shader_info info; @@ -148,4 +149,5 @@ cell_emit_state(struct cell_context *cell) emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); } +#endif } -- cgit v1.2.3 From f6bf8d9d410d94372b72f4f6ede6196ae5a4a67f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:33:24 -0600 Subject: cell: clean-up, comments --- src/gallium/drivers/cell/spu/spu_main.c | 52 ++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 29686964d2f..2a7cb75f592 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -429,16 +429,14 @@ cmd_batch(uint opcode) printf("SPU %u: release batch buf %u\n", spu.init.id, buf); release_buffer(buf); + /* + * Loop over commands in the batch buffer + */ for (pos = 0; pos < usize; /* no incr */) { switch (buffer[pos]) { - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 8; - } - break; + /* + * rendering commands + */ case CELL_CMD_CLEAR_SURFACE: { struct cell_command_clear_surface *clr @@ -456,6 +454,17 @@ cmd_batch(uint opcode) pos += pos_incr; } break; + /* + * state-update commands + */ + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; + } + break; case CELL_CMD_STATE_FRAGMENT_OPS: { struct cell_command_fragment_ops *fops @@ -464,18 +473,6 @@ cmd_batch(uint opcode) pos += sizeof(*fops) / 8; } break; - case CELL_CMD_RELEASE_VERTS: - { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 8; - } - break; - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; case CELL_CMD_STATE_SAMPLER: { struct cell_command_sampler *sampler @@ -521,6 +518,21 @@ cmd_batch(uint opcode) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); break; + /* + * misc commands + */ + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } + break; case CELL_CMD_FLUSH_BUFFER_RANGE: { struct cell_buffer_range *br = (struct cell_buffer_range *) &buffer[pos+1]; -- cgit v1.2.3 From 73c6ae98c1c60635883a733f36d59d246e74aa2a Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:38:37 -0600 Subject: cell: remove old state CMDs, added comments --- src/gallium/drivers/cell/common.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 8aa2b23ec09..e989d8c2e5c 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -84,7 +84,7 @@ #define CELL_CMD_BATCH 5 #define CELL_CMD_RELEASE_VERTS 6 #define CELL_CMD_STATE_FRAMEBUFFER 10 -#define CELL_CMD_STATE_DEPTH_STENCIL 11 +#define CELL_CMD_STATE_FRAGMENT_OPS 11 #define CELL_CMD_STATE_SAMPLER 12 #define CELL_CMD_STATE_TEXTURE 13 #define CELL_CMD_STATE_VERTEX_INFO 14 @@ -92,12 +92,9 @@ #define CELL_CMD_STATE_UNIFORMS 16 #define CELL_CMD_STATE_VS_ARRAY_INFO 17 #define CELL_CMD_STATE_BIND_VS 18 -#define CELL_CMD_STATE_BLEND 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 -#define CELL_CMD_STATE_LOGICOP 21 #define CELL_CMD_VS_EXECUTE 22 #define CELL_CMD_FLUSH_BUFFER_RANGE 23 -#define CELL_CMD_STATE_FRAGMENT_OPS 24 #define CELL_NUM_BUFFERS 4 @@ -112,8 +109,13 @@ +/** Max instructions for doing per-fragment operations */ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 + +/** + * Command to specify per-fragment operations state and generated code. + */ struct cell_command_fragment_ops { uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ @@ -159,13 +161,15 @@ struct cell_array_info }; -struct cell_attribute_fetch_code { +struct cell_attribute_fetch_code +{ uint64_t base; uint size; }; -struct cell_buffer_range { +struct cell_buffer_range +{ uint64_t base; unsigned size; }; -- cgit v1.2.3 From 1b5331d7ebcf7b1a1693972cf13407184cab1e48 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:38:55 -0600 Subject: cell: fix typos in blend code-gen --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 7966c0916c7..79a82ef72b5 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -286,16 +286,16 @@ gen_blend(const struct pipe_blend_state *blend, spe_roti(f, mask_reg, mask_reg, 8); /* fbG = fbRGBA & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); /* fbG = fbG >> 8 */ - spe_roti(f, fbB_reg, fbB_reg, -8); + spe_roti(f, fbG_reg, fbG_reg, -8); /* mask = mask << 8 */ spe_roti(f, mask_reg, mask_reg, 8); /* fbR = fbRGBA & mask */ spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); /* fbR = fbR >> 16 */ - spe_roti(f, fbB_reg, fbB_reg, -16); + spe_roti(f, fbR_reg, fbR_reg, -16); /* mask = mask << 8 */ spe_roti(f, mask_reg, mask_reg, 8); -- cgit v1.2.3 From 7ce1d0fb6700fd4998a095de2c9edf5ed920464c Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 10:52:03 -0600 Subject: cell: more comments, stub code for colormask/logicop/etc --- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 107 ++++++++++++++------- 1 file changed, 70 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 3f0eabaa050..03dd547845b 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -37,6 +37,8 @@ #include "spu_per_fragment_op.h" +#define LINEAR_QUAD_LAYOUT 1 + /** * Called by rasterizer for each quad after the shader has run. Do @@ -177,27 +179,28 @@ spu_fallback_fragment_ops(uint x, uint y, } if (spu.blend.blend_enable) { + /* blending terms, misc regs */ vector float term1r, term1g, term1b, term1a; vector float term2r, term2g, term2b, term2a; - - vector float fbRGBA[4]; - vector float one, tmp; - /* get colors from framebuffer */ + vector float fbRGBA[4]; /* current framebuffer colors */ + + /* get colors from framebuffer/tile */ { vector float fc[4]; uint c0, c1, c2, c3; -#if 0 - c0 = colorTile->ui[y+0][x+0]; - c1 = colorTile->ui[y+0][x+1]; - c2 = colorTile->ui[y+1][x+0]; - c3 = colorTile->ui[y+1][x+1]; -#else + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ c0 = colorTile->ui[y][x*2+0]; c1 = colorTile->ui[y][x*2+1]; c2 = colorTile->ui[y][x*2+2]; c3 = colorTile->ui[y][x*2+3]; +#else + c0 = colorTile->ui[y+0][x+0]; + c1 = colorTile->ui[y+0][x+1]; + c2 = colorTile->ui[y+1][x+0]; + c3 = colorTile->ui[y+1][x+1]; #endif switch (spu.fb.color_format) { case PIPE_FORMAT_B8G8R8A8_UNORM: @@ -360,18 +363,11 @@ spu_fallback_fragment_ops(uint x, uint y, } - /* XXX do colormask test here */ - - - if (spu_extract(spu_orx(mask), 0)) { - spu.cur_ctile_status = TILE_STATUS_DIRTY; - } - else { - return; - } - - /* convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA */ + /* + * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. + */ #if 0 + /* original code */ { vector float frag_soa[4]; frag_soa[0] = fragR; @@ -387,6 +383,9 @@ spu_fallback_fragment_ops(uint x, uint y, (void) fragB; #endif + /* + * Pack float colors into 32-bit RGBA words. + */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: c0 = spu_pack_A8R8G8B8(frag_aos[0]); @@ -406,24 +405,41 @@ spu_fallback_fragment_ops(uint x, uint y, ASSERT(0); } -#if 0 + /* - * Quad layout: - * +--+--+ - * |p0|p1| - * +--+--+ - * |p2|p3| - * +--+--+ + * Color masking */ - if (spu_extract(mask, 0)) - colorTile->ui[y+0][x+0] = c0; - if (spu_extract(mask, 1)) - colorTile->ui[y+0][x+1] = c1; - if (spu_extract(mask, 2)) - colorTile->ui[y+1][x+0] = c2; - if (spu_extract(mask, 3)) - colorTile->ui[y+1][x+1] = c3; -#else + if (spu.blend.colormask != 0xf) { + /* XXX to do */ + /* apply color mask to 32-bit packed colors */ + } + + + /* + * Logic Ops + */ + if (spu.blend.logicop_enable) { + /* XXX to do */ + /* apply logicop to 32-bit packed colors */ + } + + + /* + * If mask is non-zero, mark tile as dirty. + */ + if (spu_extract(spu_orx(mask), 0)) { + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + else { + return; + } + + + /* + * Write new quad colors to the framebuffer/tile. + * Only write pixels where the corresponding mask word is set. + */ +#if LINEAR_QUAD_LAYOUT /* * Quad layout: * +--+--+--+--+ @@ -438,5 +454,22 @@ spu_fallback_fragment_ops(uint x, uint y, colorTile->ui[y][x*2+2] = c2; if (spu_extract(mask, 3)) colorTile->ui[y][x*2+3] = c3; +#else + /* + * Quad layout: + * +--+--+ + * |p0|p1| + * +--+--+ + * |p2|p3| + * +--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y+0][x+0] = c0; + if (spu_extract(mask, 1)) + colorTile->ui[y+0][x+1] = c1; + if (spu_extract(mask, 2)) + colorTile->ui[y+1][x+0] = c2; + if (spu_extract(mask, 3)) + colorTile->ui[y+1][x+1] = c3; #endif } -- cgit v1.2.3 From be925ab6e8ecf6758adb2c6f2c423af31c5f86ca Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 15:48:13 -0600 Subject: cell: put cell_ prefix on gen_fragment_function() --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_gen_fragment.h | 2 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 79a82ef72b5..5622701ddac 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -616,7 +616,7 @@ gen_pack_colors(struct spe_function *f, * \param f the generated function (out) */ void -gen_fragment_function(struct cell_context *cell, struct spe_function *f) +cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) { const struct pipe_depth_stencil_alpha_state *dsa = &cell->depth_stencil->base; @@ -850,7 +850,7 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_release_register(f, rgba_reg); } - printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); + //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h index 0ea0fc690c8..b59de198dce 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -31,7 +31,7 @@ extern void -gen_fragment_function(struct cell_context *cell, struct spe_function *f); +cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f); #endif /* CELL_GEN_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 180b89c1f66..3ebf0749ad6 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -85,7 +85,7 @@ cell_emit_state(struct cell_context *cell) struct spe_function spe_code; /* generate new code */ - gen_fragment_function(cell, &spe_code); + cell_gen_fragment_function(cell, &spe_code); /* put the new code into the batch buffer */ fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; memcpy(&fops->code, spe_code.store, -- cgit v1.2.3 From bc304bbd49d15ce1130f3ba07adaa85ef03ed931 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 17:08:52 -0600 Subject: cell: minor improvements to fragment code-gen --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 5622701ddac..06219d4e980 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -265,6 +265,8 @@ gen_blend(const struct pipe_blend_state *blend, int one_reg = spe_allocate_available_register(f); int tmp_reg = spe_allocate_available_register(f); + boolean one_reg_set = false; /* avoid setting one_reg more than once */ + ASSERT(blend->blend_enable); /* Unpack/convert framebuffer colors from four 32-bit packed colors @@ -275,7 +277,7 @@ gen_blend(const struct pipe_blend_state *blend, int mask_reg = spe_allocate_available_register(f); /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ - spe_fsmbi(f, mask_reg, 0x1111); + spe_load_int(f, mask_reg, 0xff); /* XXX there may be more clever ways to implement the following code */ switch (color_format) { @@ -418,7 +420,10 @@ gen_blend(const struct pipe_blend_state *blend, break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* one = {1.0, 1.0, 1.0, 1.0} */ - spe_load_float(f, one_reg, 1.0f); + if (!one_reg_set) { + spe_load_float(f, one_reg, 1.0f); + one_reg_set = true; + } /* tmp = one - fragA */ spe_fs(f, tmp_reg, one_reg, fragA_reg); /* term = fb * tmp */ @@ -446,7 +451,10 @@ gen_blend(const struct pipe_blend_state *blend, break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* one = {1.0, 1.0, 1.0, 1.0} */ - spe_load_float(f, one_reg, 1.0f); + if (!one_reg_set) { + spe_load_float(f, one_reg, 1.0f); + one_reg_set = true; + } /* tmp = one - fragA */ spe_fs(f, tmp_reg, one_reg, fragA_reg); /* termA = fbA * tmp */ -- cgit v1.2.3 From aa66f08a21b791f338b519f0c2162cd8f7b3aeb0 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 11 Sep 2008 17:59:52 -0600 Subject: cell: initial support for fragment shader code generation. TGSI shaders are translated into SPE instructions which are then sent to the SPEs for execution. Only a few opcodes work, no swizzling yet, no support for constants/immediates, etc. --- src/gallium/drivers/cell/common.h | 15 + src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_context.h | 1 + src/gallium/drivers/cell/ppu/cell_gen_fp.c | 523 +++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_gen_fp.h | 42 ++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 16 + src/gallium/drivers/cell/ppu/cell_state_shader.c | 8 +- src/gallium/drivers/cell/spu/spu_main.c | 25 +- src/gallium/drivers/cell/spu/spu_main.h | 15 + src/gallium/drivers/cell/spu/spu_tri.c | 35 ++ 10 files changed, 678 insertions(+), 3 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_gen_fp.c create mode 100644 src/gallium/drivers/cell/ppu/cell_gen_fp.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index e989d8c2e5c..cb0631baf52 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -92,6 +92,7 @@ #define CELL_CMD_STATE_UNIFORMS 16 #define CELL_CMD_STATE_VS_ARRAY_INFO 17 #define CELL_CMD_STATE_BIND_VS 18 +#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 #define CELL_CMD_VS_EXECUTE 22 #define CELL_CMD_FLUSH_BUFFER_RANGE 23 @@ -125,6 +126,20 @@ struct cell_command_fragment_ops }; +/** Max instructions for fragment programs */ +#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128 + +/** + * Command to send a fragment progra to SPUs. + */ +struct cell_command_fragment_program +{ + uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ + uint num_inst; /**< Number of instructions */ + unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; +}; + + /** * Tell SPUs about the framebuffer size, location */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 8699f3f8ec2..b28f4c5c31f 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -26,6 +26,7 @@ SOURCES = \ cell_draw_arrays.c \ cell_flush.c \ cell_gen_fragment.c \ + cell_gen_fp.c \ cell_state_derived.c \ cell_state_emit.c \ cell_state_shader.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 8cec9f45b2e..14914b9c6f8 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -61,6 +61,7 @@ struct cell_fragment_shader_state { struct pipe_shader_state shader; struct tgsi_shader_info info; + struct spe_function code; void *data; }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c new file mode 100644 index 00000000000..6ffe94eb14a --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -0,0 +1,523 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +/** + * Generate SPU fragment program/shader code. + * + * Note that we generate SOA-style code here. So each TGSI instruction + * operates on four pixels (and is translated into four SPU instructions, + * generally speaking). + * + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "util/u_memory.h" +#include "cell_context.h" +#include "cell_gen_fp.h" + + +/** Set to 1 to enable debug/disassembly printfs */ +#define DISASSEM 01 + + +/** + * Context needed during code generation. + */ +struct codegen +{ + int inputs_reg; /**< 1st function parameter */ + int outputs_reg; /**< 2nd function parameter */ + int constants_reg; /**< 3rd function parameter */ + int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */ + + int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ + + /** Per-instruction temps / intermediate temps */ + int num_itemps; + int itemps[3]; + + struct spe_function *f; + boolean error; +}; + + +/** + * Allocate an intermediate temporary register. + */ +static int +get_itemp(struct codegen *gen) +{ + int t = spe_allocate_available_register(gen->f); + assert(gen->num_itemps < Elements(gen->itemps)); + gen->itemps[gen->num_itemps++] = t; + return t; +} + +/** + * Free all intermediate temporary registers. To be called after each + * instruction has been emitted. + */ +static void +free_itemps(struct codegen *gen) +{ + int i; + for (i = 0; i < gen->num_itemps; i++) { + spe_release_register(gen->f, gen->itemps[i]); + } + gen->num_itemps = 0; +} + + +/** + * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. + * The register is allocated and initialized upon the first call. + */ +static int +get_const_one_reg(struct codegen *gen) +{ + if (gen->one_reg <= 0) { + gen->one_reg = spe_allocate_available_register(gen->f); + } + + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(gen->f, gen->one_reg, 1.0f); +#if DISASSEM + printf("il\tr%d, 1.0f\n", gen->one_reg); +#endif + + return gen->one_reg; +} + + +/** + * Return the index of the SPU temporary containing the named TGSI + * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we + * just return the corresponding SPE register. If the TGIS register + * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register + * and emit an SPE load instruction. + */ +static int +get_src_reg(struct codegen *gen, + int channel, + const struct tgsi_full_src_register *src) +{ + int reg; + + /* XXX need to examine src swizzle info here. + * That will involve changing the channel var... + */ + + + switch (src->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + reg = gen->temp_regs[src->SrcRegister.Index][channel]; + break; + case TGSI_FILE_INPUT: + { + /* offset is measured in quadwords, not bytes */ + int offset = src->SrcRegister.Index * 4 + channel; + reg = get_itemp(gen); + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->inputs_reg, offset); +#if DISASSEM + printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); +#endif + } + break; + case TGSI_FILE_IMMEDIATE: + /* xxx fall-through for now / fix */ + case TGSI_FILE_CONSTANT: + /* xxx fall-through for now / fix */ + default: + assert(0); + } + + return reg; +} + + +/** + * Return the index of an SPE register to use for the given TGSI register. + * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the + * corresponding SPE register is returned. If the TGSI register is + * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. + * See store_dest_reg() below... + */ +static int +get_dst_reg(struct codegen *gen, + int channel, + const struct tgsi_full_dst_register *dest) +{ + int reg; + + switch (dest->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + reg = gen->temp_regs[dest->DstRegister.Index][channel]; + break; + case TGSI_FILE_OUTPUT: + reg = get_itemp(gen); + break; + default: + assert(0); + } + + return reg; +} + + +/** + * When a TGSI instruction is writing to an output register, this + * function emits the SPE store instruction to store the value_reg. + * \param value_reg the SPE register containing the value to store. + * This would have been returned by get_dst_reg(). + */ +static void +store_dest_reg(struct codegen *gen, + int value_reg, int channel, + const struct tgsi_full_dst_register *dest) +{ + switch (dest->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + /* no-op */ + break; + case TGSI_FILE_OUTPUT: + { + /* offset is measured in quadwords, not bytes */ + int offset = dest->DstRegister.Index * 4 + channel; + /* Store: memory[(machine_reg) + offset] = reg */ + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); +#if DISASSEM + printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset); +#endif + } + break; + default: + assert(0); + } +} + + +static boolean +emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* XXX we don't always need to actually emit a mov instruction here */ + spe_move(gen->f, dst_reg, src_reg); +#if DISASSEM + printf("mov\tr%d, r%d\n", dst_reg, src_reg); +#endif + store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + + +/** + * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD + * becomes (up to) four SPU "fa" instructions because we're doing SOA + * processing. + */ +static boolean +emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + /* Loop over Red/Green/Blue/Alpha channels */ + for (ch = 0; ch < 4; ch++) { + /* If the dest R, G, B or A writemask is enabled... */ + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + /* get indexes of the two src, one dest SPE registers */ + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* Emit actual SPE instruction: d = s1 + s2 */ + spe_fa(gen->f, d_reg, s1_reg, s2_reg); +#if DISASSEM + printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); +#endif + + /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + /* Free any intermediate temps we allocated */ + free_itemps(gen); + } + } + return true; +} + + +/** + * Emit multiply. See emit_ADD for comments. + */ +static boolean +emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* d = s1 * s2 */ + spe_fm(gen->f, d_reg, s1_reg, s2_reg); +#if DISASSEM + printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); +#endif + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + + +/** + * Emit set-if-greater-than. + * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as + * the result but OpenGL/TGSI needs 0.0 and 1.0 results. + * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. + */ +static boolean +emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 > s2) */ + spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); +#if DISASSEM + printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); +#endif + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); +#if DISASSEM + printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); +#endif + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + + +/** + * Emit END instruction. + * We just return from the shader function at this point. + * + * Note that there may be more code after this that would be + * called by TGSI_OPCODE_CALL. + */ +static boolean +emit_END(struct codegen *gen) +{ + /* return from function call */ + spe_bi(gen->f, SPE_REG_RA, 0, 0); +#if DISASSEM + printf("bi\trRA\n"); +#endif + return true; +} + + +/** + * Emit code for the given instruction. Just a big switch stmt. + */ +static boolean +emit_instruction(struct codegen *gen, + const struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + return emit_MOV(gen, inst); + case TGSI_OPCODE_MUL: + return emit_MUL(gen, inst); + case TGSI_OPCODE_ADD: + return emit_ADD(gen, inst); + case TGSI_OPCODE_SGT: + return emit_SGT(gen, inst); + case TGSI_OPCODE_END: + return emit_END(gen); + + /* XXX lots more cases to do... */ + + default: + return false; + } + + return true; +} + + + +/** + * Emit "code" for a TGSI declaration. + * We only care about TGSI TEMPORARY register declarations at this time. + * For each TGSI TEMPORARY we allocate four SPE registers. + */ +static void +emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) +{ + int i, ch; + + switch (decl->Declaration.File) { + case TGSI_FILE_TEMPORARY: +#if DISASSEM + printf("Declare temp reg %d .. %d\n", + decl->DeclarationRange.First, + decl->DeclarationRange.Last); +#endif + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; + i++) { + for (ch = 0; ch < 4; ch++) { + gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); + } + + /* XXX if we run out of SPE registers, we need to spill + * to SPU memory. someday... + */ + +#if DISASSEM + printf(" SPE regs: %d %d %d %d\n", + gen->temp_regs[i][0], + gen->temp_regs[i][1], + gen->temp_regs[i][2], + gen->temp_regs[i][3]); +#endif + } + break; + default: + ; /* ignore */ + } +} + + +/** + * Translate TGSI shader code to SPE instructions. This is done when + * the state tracker gives us a new shader (via pipe->create_fs_state()). + * + * \param cell the rendering context (in) + * \param tokens the TGSI shader (in) + * \param f the generated function (out) + */ +boolean +cell_gen_fragment_program(struct cell_context *cell, + const struct tgsi_token *tokens, + struct spe_function *f) +{ + struct tgsi_parse_context parse; + struct codegen gen; + + memset(&gen, 0, sizeof(gen)); + gen.f = f; + + /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ + gen.inputs_reg = 3; /* pointer to inputs array */ + gen.outputs_reg = 4; /* pointer to outputs array */ + gen.constants_reg = 5; /* pointer to constants array */ + + spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); + spe_allocate_register(f, gen.inputs_reg); + spe_allocate_register(f, gen.outputs_reg); + spe_allocate_register(f, gen.constants_reg); + +#if DISASSEM + printf("Begin %s\n", __FUNCTION__); + tgsi_dump(tokens, 0); +#endif + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: +#if 0 + if (!note_immediate(&gen, &parse.FullToken.FullImmediate )) + goto fail; +#endif + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + emit_declaration(&gen, &parse.FullToken.FullDeclaration); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) { + gen.error = true; + } + break; + + default: + assert(0); + + } + } + + + if (gen.error) { + /* terminate the SPE code */ + return emit_END(&gen); + } + +#if DISASSEM + printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); + printf("End %s\n", __FUNCTION__); +#endif + + tgsi_parse_free( &parse ); + + return !gen.error; +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h new file mode 100644 index 00000000000..99faea70462 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef CELL_GEN_FP_H +#define CELL_GEN_FP_H + + + +extern boolean +cell_gen_fragment_program(struct cell_context *cell, + const struct tgsi_token *tokens, + struct spe_function *f); + + +#endif /* CELL_GEN_FP_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 3ebf0749ad6..2da3097983c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -73,6 +73,22 @@ cell_emit_state(struct cell_context *cell) #endif } + if (cell->dirty & (CELL_NEW_FS)) { + /* Send new fragment program to SPUs */ + struct cell_command_fragment_program *fp + = cell_batch_alloc(cell, sizeof(*fp)); + fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM; + fp->num_inst = cell->fs->code.num_inst; + memcpy(&fp->code, cell->fs->code.store, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); + if (0) { + int i; + printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); + for (i = 0; i < fp->num_inst; i++) { + printf(" %3d: 0x%08x\n", i, fp->code[i]); + } + } + } if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL | diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 97e44eeb1a4..3a0d066da2a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -34,7 +34,7 @@ #include "cell_context.h" #include "cell_state.h" - +#include "cell_gen_fp.h" /** cast wrapper */ @@ -61,7 +61,7 @@ static void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { - /*struct cell_context *cell = cell_context(pipe);*/ + struct cell_context *cell = cell_context(pipe); struct cell_fragment_shader_state *cfs; cfs = CALLOC_STRUCT(cell_fragment_shader_state); @@ -76,6 +76,8 @@ cell_create_fs_state(struct pipe_context *pipe, tgsi_scan_shader(templ->tokens, &cfs->info); + cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); + return cfs; } @@ -102,6 +104,8 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs) { struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); + spe_release_func(&cfs->code); + FREE((void *) cfs->shader.tokens); FREE(cfs); } diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 2a7cb75f592..78260c4259c 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -232,7 +232,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); - /* Copy state info */ + /* Copy state info (for fallback case only) */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); @@ -244,6 +244,21 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) } +static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ + if (Debug) + printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_program_code, fp->code, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 + /* Point function pointer at new code */ + spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { @@ -473,6 +488,14 @@ cmd_batch(uint opcode) pos += sizeof(*fops) / 8; } break; + case CELL_CMD_STATE_FRAGMENT_PROGRAM: + { + struct cell_command_fragment_program *fp + = (struct cell_command_fragment_program *) &buffer[pos]; + cmd_state_fragment_program(fp); + pos += sizeof(*fp) / 8; + } + break; case CELL_CMD_STATE_SAMPLER: { struct cell_command_sampler *sampler diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index d40539da83b..2c7b6258402 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -75,6 +75,12 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, vector float fragAlpha, vector unsigned int mask); +/** Function for running fragment program */ +typedef void (*spu_fragment_program_func)(vector float *inputs, + vector float *outputs, + vector float *constants); + + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ @@ -142,9 +148,18 @@ struct spu_global /** Current fragment ops function */ spu_fragment_ops_func fragment_ops; + /** Current fragment program machine code */ + uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; + /** Current fragment ops function */ + spu_fragment_program_func fragment_program; + /** Current texture sampler function */ spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; + /** Fragment program constants (XXX preliminary/used) */ +#define MAX_CONSTANTS 32 + vector float constants[MAX_CONSTANTS]; + } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index f02cdd1f763..8b938781920 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -314,7 +314,42 @@ emit_quad( int x, int y, mask_t mask ) } else { /* simple shading */ +#if 0 eval_coeff(1, (float) x, (float) y, colors); + +#else + /* XXX new fragment program code */ + + if (spu.fragment_program) { + vector float inputs[4*4], outputs[2*4]; + + /* setup inputs */ + eval_coeff(1, (float) x, (float) y, inputs); + + /* Execute the current fragment program */ + spu.fragment_program(inputs, outputs, spu.constants); + + /* Copy outputs */ + colors[0] = outputs[0*4+0]; + colors[1] = outputs[0*4+1]; + colors[2] = outputs[0*4+2]; + colors[3] = outputs[0*4+3]; + + if (0 && spu.init.id==0 && y == 48) { + printf("colors[0] = %f %f %f %f\n", + spu_extract(colors[0], 0), + spu_extract(colors[0], 1), + spu_extract(colors[0], 2), + spu_extract(colors[0], 3)); + printf("colors[1] = %f %f %f %f\n", + spu_extract(colors[1], 0), + spu_extract(colors[1], 1), + spu_extract(colors[1], 2), + spu_extract(colors[1], 3)); + } + + } +#endif } -- cgit v1.2.3 From fa0aa1443b3aede0fce960b7a15606ce2ff493eb Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 07:57:08 -0600 Subject: cell: disable calls to old gen code --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index e04cf5f274a..475c6ef0ce6 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -48,8 +48,9 @@ cell_create_blend_state(struct pipe_context *pipe, struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state)); (void) memcpy(cb, blend, sizeof(*blend)); +#if 0 cell_generate_alpha_blend(cb); - +#endif return cb; } @@ -100,8 +101,9 @@ cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, MALLOC(sizeof(struct cell_depth_stencil_alpha_state)); (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil)); +#if 0 cell_generate_depth_stencil_test(cdsa); - +#endif return cdsa; } -- cgit v1.2.3 From 33aa5b69642405fbc300430bed1f2ccf521f3086 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 07:57:08 -0600 Subject: cell: disable calls to old gen code --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index e04cf5f274a..475c6ef0ce6 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -48,8 +48,9 @@ cell_create_blend_state(struct pipe_context *pipe, struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state)); (void) memcpy(cb, blend, sizeof(*blend)); +#if 0 cell_generate_alpha_blend(cb); - +#endif return cb; } @@ -100,8 +101,9 @@ cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, MALLOC(sizeof(struct cell_depth_stencil_alpha_state)); (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil)); +#if 0 cell_generate_depth_stencil_test(cdsa); - +#endif return cdsa; } -- cgit v1.2.3 From 6c0fa798578ad247027dff861406a524821ddcdd Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 08:47:45 -0600 Subject: cell: setup fragment program inputs in SOA format Also remove old code, etc. --- src/gallium/drivers/cell/spu/spu_tri.c | 112 ++++++++++++++++----------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 8b938781920..b7faae6d60c 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -241,6 +241,19 @@ eval_coeff(uint slot, float x, float y, vector float result[4]) } +/** + * As above, but return 4 vectors in SOA format. + * XXX this will all be re-written someday. + */ +static INLINE void +eval_coeff_soa(uint slot, float x, float y, vector float result[4]) +{ + eval_coeff(slot, x, y, result); + _transpose_matrix4x4(result, result); +} + + + static INLINE vector float eval_z(float x, float y) { @@ -267,14 +280,17 @@ emit_quad( int x, int y, mask_t mask ) if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; const int iy = y - setup.cliprect_miny; - vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; if (spu.texture[0].start) { - /* texture mapping */ + /* + * Temporary texture mapping path + * This will go away when fragment programs support TEX inst. + */ const uint unit = 0; + vector float colors[4]; vector float texcoords[4]; eval_coeff(2, (float) x, (float) y, texcoords); @@ -311,70 +327,54 @@ emit_quad( int x, int y, mask_t mask ) colors[3] = spu_mul(colors[3], colors1[3]); } - } - else { - /* simple shading */ -#if 0 - eval_coeff(1, (float) x, (float) y, colors); - -#else - /* XXX new fragment program code */ - - if (spu.fragment_program) { - vector float inputs[4*4], outputs[2*4]; - - /* setup inputs */ - eval_coeff(1, (float) x, (float) y, inputs); - - /* Execute the current fragment program */ - spu.fragment_program(inputs, outputs, spu.constants); - - /* Copy outputs */ - colors[0] = outputs[0*4+0]; - colors[1] = outputs[0*4+1]; - colors[2] = outputs[0*4+2]; - colors[3] = outputs[0*4+3]; - - if (0 && spu.init.id==0 && y == 48) { - printf("colors[0] = %f %f %f %f\n", - spu_extract(colors[0], 0), - spu_extract(colors[0], 1), - spu_extract(colors[0], 2), - spu_extract(colors[0], 3)); - printf("colors[1] = %f %f %f %f\n", - spu_extract(colors[1], 0), - spu_extract(colors[1], 1), - spu_extract(colors[1], 2), - spu_extract(colors[1], 3)); - } - + { + /* Convert fragment data from AoS to SoA format. + * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) + * This is temporary! + */ + vector float soa_frag[4]; + _transpose_matrix4x4(soa_frag, colors); + + vector float fragZ = eval_z((float) x, (float) y); + + /* Do all per-fragment/quad operations here, including: + * alpha test, z test, stencil test, blend and framebuffer writing. + */ + spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, + fragZ, + soa_frag[0], soa_frag[1], + soa_frag[2], soa_frag[3], + mask); } -#endif - } - - { - /* Convert fragment data from AoS to SoA format. - * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) - * This is temporary! + } + else { + /* + * Run fragment shader, execute per-fragment ops, update fb/tile. */ - vector float soa_frag[4]; - _transpose_matrix4x4(soa_frag, colors); + vector float inputs[4*4], outputs[2*4]; + vector float fragZ = eval_z((float) x, (float) y); - float4 fragZ; + /* setup inputs */ + eval_coeff_soa(1, (float) x, (float) y, inputs); - fragZ.v = eval_z((float) x, (float) y); + ASSERT(spu.fragment_program); + ASSERT(spu.fragment_ops); - /* Do all per-fragment/quad operations here, including: - * alpha test, z test, stencil test, blend and framebuffer writing. + /* Execute the current fragment program */ + spu.fragment_program(inputs, outputs, spu.constants); + + /* Execute per-fragment/quad operations, including: + * alpha test, z test, stencil test, blend and framebuffer writing. */ spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, - fragZ.v, - soa_frag[0], soa_frag[1], - soa_frag[2], soa_frag[3], + fragZ, + outputs[0*4+0], + outputs[0*4+1], + outputs[0*4+2], + outputs[0*4+3], mask); } - } } -- cgit v1.2.3 From e8b199c6e3386f8858adf43e5b15bf8ca0b8ce84 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 08:48:08 -0600 Subject: cell: implement swizzling for src regs --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 6ffe94eb14a..d7a8846ab3a 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -137,11 +137,12 @@ get_src_reg(struct codegen *gen, const struct tgsi_full_src_register *src) { int reg; + int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); - /* XXX need to examine src swizzle info here. - * That will involve changing the channel var... - */ + assert(swizzle >= 0); + assert(swizzle <= 3); + channel = swizzle; switch (src->SrcRegister.File) { case TGSI_FILE_TEMPORARY: -- cgit v1.2.3 From a449465556d47d83c2314a7ac711ca523378102b Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 09:43:11 -0600 Subject: cell: fix non-debug build error --- src/gallium/drivers/cell/ppu/cell_context.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 71f1a3049d1..0a5c0baa471 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -85,13 +85,11 @@ cell_draw_create(struct cell_context *cell) } -#ifdef DEBUG static const struct debug_named_value cell_debug_flags[] = { {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ {NULL, 0} }; -#endif struct pipe_context * -- cgit v1.2.3 From 38bacb6f32d8a2cddc1116f7fbe2b21ea5a91a95 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 11:43:37 -0600 Subject: cell: implement colormask on fallback path Also, some var renaming and additional comments --- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 164 ++++++++++++++------- 1 file changed, 110 insertions(+), 54 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 03dd547845b..f107764fb28 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -60,9 +60,12 @@ spu_fallback_fragment_ops(uint x, uint y, vector unsigned int mask) { vector float frag_aos[4]; - unsigned int c0, c1, c2, c3; + unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ + unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */ - /* do alpha test */ + /* + * Do alpha test + */ if (spu.depth_stencil_alpha.alpha.enabled) { vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); vector unsigned int amask; @@ -102,7 +105,10 @@ spu_fallback_fragment_ops(uint x, uint y, mask = spu_and(mask, amask); } - /* Z and/or stencil testing... */ + + /* + * Z and/or stencil testing... + */ if (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled) { @@ -178,6 +184,32 @@ spu_fallback_fragment_ops(uint x, uint y, } } + + /* + * If we'll need the current framebuffer/tile colors for blending + * or logicop or colormask, fetch them now. + */ + if (spu.blend.blend_enable || + spu.blend.logicop_enable || + spu.blend.colormask != 0xf) { + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ + fbc0 = colorTile->ui[y][x*2+0]; + fbc1 = colorTile->ui[y][x*2+1]; + fbc2 = colorTile->ui[y][x*2+2]; + fbc3 = colorTile->ui[y][x*2+3]; +#else + fbc0 = colorTile->ui[y+0][x+0]; + fbc1 = colorTile->ui[y+0][x+1]; + fbc2 = colorTile->ui[y+1][x+0]; + fbc3 = colorTile->ui[y+1][x+1]; +#endif + } + + + /* + * Do blending + */ if (spu.blend.blend_enable) { /* blending terms, misc regs */ vector float term1r, term1g, term1b, term1a; @@ -186,39 +218,26 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fbRGBA[4]; /* current framebuffer colors */ - /* get colors from framebuffer/tile */ + /* convert framebuffer colors from packed int to vector float */ { - vector float fc[4]; - uint c0, c1, c2, c3; - -#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ - c0 = colorTile->ui[y][x*2+0]; - c1 = colorTile->ui[y][x*2+1]; - c2 = colorTile->ui[y][x*2+2]; - c3 = colorTile->ui[y][x*2+3]; -#else - c0 = colorTile->ui[y+0][x+0]; - c1 = colorTile->ui[y+0][x+1]; - c2 = colorTile->ui[y+1][x+0]; - c3 = colorTile->ui[y+1][x+1]; -#endif + vector float temp[4]; /* float colors in AOS form */ switch (spu.fb.color_format) { case PIPE_FORMAT_B8G8R8A8_UNORM: - fc[0] = spu_unpack_B8G8R8A8(c0); - fc[1] = spu_unpack_B8G8R8A8(c1); - fc[2] = spu_unpack_B8G8R8A8(c2); - fc[3] = spu_unpack_B8G8R8A8(c3); + temp[0] = spu_unpack_B8G8R8A8(fbc0); + temp[1] = spu_unpack_B8G8R8A8(fbc1); + temp[2] = spu_unpack_B8G8R8A8(fbc2); + temp[3] = spu_unpack_B8G8R8A8(fbc3); break; case PIPE_FORMAT_A8R8G8B8_UNORM: - fc[0] = spu_unpack_A8R8G8B8(c0); - fc[1] = spu_unpack_A8R8G8B8(c1); - fc[2] = spu_unpack_A8R8G8B8(c2); - fc[3] = spu_unpack_A8R8G8B8(c3); + temp[0] = spu_unpack_A8R8G8B8(fbc0); + temp[1] = spu_unpack_A8R8G8B8(fbc1); + temp[2] = spu_unpack_A8R8G8B8(fbc2); + temp[3] = spu_unpack_A8R8G8B8(fbc3); break; default: ASSERT(0); } - _transpose_matrix4x4(fbRGBA, fc); + _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ } /* @@ -384,21 +403,20 @@ spu_fallback_fragment_ops(uint x, uint y, #endif /* - * Pack float colors into 32-bit RGBA words. + * Pack fragment float colors into 32-bit RGBA words. */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - c0 = spu_pack_A8R8G8B8(frag_aos[0]); - c1 = spu_pack_A8R8G8B8(frag_aos[1]); - c2 = spu_pack_A8R8G8B8(frag_aos[2]); - c3 = spu_pack_A8R8G8B8(frag_aos[3]); + fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); + fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); + fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); + fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - c0 = spu_pack_B8G8R8A8(frag_aos[0]); - c1 = spu_pack_B8G8R8A8(frag_aos[1]); - c2 = spu_pack_B8G8R8A8(frag_aos[2]); - c3 = spu_pack_B8G8R8A8(frag_aos[3]); + fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); + fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); + fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); + fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); break; default: fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); @@ -407,20 +425,57 @@ spu_fallback_fragment_ops(uint x, uint y, /* - * Color masking + * Do color masking */ if (spu.blend.colormask != 0xf) { - /* XXX to do */ - /* apply color mask to 32-bit packed colors */ + uint cmask = 0x0; /* each byte corresponds to a color channel */ + + /* Form bitmask depending on color buffer format and colormask bits */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + if (spu.blend.colormask & (1<<0)) + cmask |= 0x00ff0000; /* red */ + if (spu.blend.colormask & (1<<1)) + cmask |= 0x0000ff00; /* green */ + if (spu.blend.colormask & (1<<2)) + cmask |= 0x000000ff; /* blue */ + if (spu.blend.colormask & (1<<3)) + cmask |= 0xff000000; /* alpha */ + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + if (spu.blend.colormask & (1<<0)) + cmask |= 0x0000ff00; /* red */ + if (spu.blend.colormask & (1<<1)) + cmask |= 0x00ff0000; /* green */ + if (spu.blend.colormask & (1<<2)) + cmask |= 0xff000000; /* blue */ + if (spu.blend.colormask & (1<<3)) + cmask |= 0x000000ff; /* alpha */ + break; + default: + ASSERT(0); + } + + /* + * Apply color mask to the 32-bit packed colors. + * if (cmask[i]) + * frag color[i] = frag color[i]; + * else + * frag color[i] = framebuffer color[i]; + */ + fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); + fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); + fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); + fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); } /* - * Logic Ops + * Do logic ops */ if (spu.blend.logicop_enable) { /* XXX to do */ - /* apply logicop to 32-bit packed colors */ + /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ } @@ -431,45 +486,46 @@ spu_fallback_fragment_ops(uint x, uint y, spu.cur_ctile_status = TILE_STATUS_DIRTY; } else { + /* write no fragments */ return; } /* - * Write new quad colors to the framebuffer/tile. + * Write new fragment/quad colors to the framebuffer/tile. * Only write pixels where the corresponding mask word is set. */ #if LINEAR_QUAD_LAYOUT /* * Quad layout: * +--+--+--+--+ - * |p0|p1|p2|p3| + * |p0|p1|p2|p3|... * +--+--+--+--+ */ if (spu_extract(mask, 0)) - colorTile->ui[y][x*2] = c0; + colorTile->ui[y][x*2] = fragc0; if (spu_extract(mask, 1)) - colorTile->ui[y][x*2+1] = c1; + colorTile->ui[y][x*2+1] = fragc1; if (spu_extract(mask, 2)) - colorTile->ui[y][x*2+2] = c2; + colorTile->ui[y][x*2+2] = fragc2; if (spu_extract(mask, 3)) - colorTile->ui[y][x*2+3] = c3; + colorTile->ui[y][x*2+3] = fragc3; #else /* * Quad layout: * +--+--+ - * |p0|p1| + * |p0|p1|... * +--+--+ - * |p2|p3| + * |p2|p3|... * +--+--+ */ if (spu_extract(mask, 0)) - colorTile->ui[y+0][x+0] = c0; + colorTile->ui[y+0][x+0] = fragc0; if (spu_extract(mask, 1)) - colorTile->ui[y+0][x+1] = c1; + colorTile->ui[y+0][x+1] = fragc1; if (spu_extract(mask, 2)) - colorTile->ui[y+1][x+0] = c2; + colorTile->ui[y+1][x+0] = fragc2; if (spu_extract(mask, 3)) - colorTile->ui[y+1][x+1] = c3; + colorTile->ui[y+1][x+1] = fragc3; #endif } -- cgit v1.2.3 From d598a5d2301faea810a2449db7a32ff48e80b979 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 15:07:04 -0600 Subject: cell: disable invalid spe_release_func() calls, fixes crash on exit --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 475c6ef0ce6..ea820aca744 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -72,7 +72,9 @@ cell_delete_blend_state(struct pipe_context *pipe, void *blend) { struct cell_blend_state *cb = (struct cell_blend_state *) blend; +#if 0 spe_release_func(& cb->code); +#endif FREE(cb); } @@ -128,7 +130,9 @@ cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) struct cell_depth_stencil_alpha_state *cdsa = (struct cell_depth_stencil_alpha_state *) depth; +#if 0 spe_release_func(& cdsa->code); +#endif FREE(cdsa); } -- cgit v1.2.3 From 5ab221549d5cdbf72817ff612464d83256765389 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 16:11:52 -0600 Subject: cell: evaluate multiple fragment inputs --- src/gallium/drivers/cell/spu/spu_tri.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index b7faae6d60c..0a8fb56a62c 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -356,8 +356,14 @@ emit_quad( int x, int y, mask_t mask ) vector float fragZ = eval_z((float) x, (float) y); /* setup inputs */ +#if 0 eval_coeff_soa(1, (float) x, (float) y, inputs); - +#else + uint i; + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + eval_coeff_soa(i+1, (float) x, (float) y, inputs + i * 4); + } +#endif ASSERT(spu.fragment_program); ASSERT(spu.fragment_ops); -- cgit v1.2.3 From af2ca5dc3823269636bfa8377ed971a761096b2e Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 16:31:53 -0600 Subject: cell: initial support for IF/ELSE/ENDIF in fragment shader codegen Only one level of if/else/endif nesting is currently working. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 193 ++++++++++++++++++++++++++--- 1 file changed, 175 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index d7a8846ab3a..8d8dfea0392 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -71,6 +71,11 @@ struct codegen int num_itemps; int itemps[3]; + /** Current IF/ELSE/ENDIF nesting level */ + int if_nesting; + /** Index of execution mask register */ + int exec_mask_reg; + struct spe_function *f; boolean error; }; @@ -112,18 +117,43 @@ get_const_one_reg(struct codegen *gen) { if (gen->one_reg <= 0) { gen->one_reg = spe_allocate_available_register(gen->f); - } - /* one = {1.0, 1.0, 1.0, 1.0} */ - spe_load_float(gen->f, gen->one_reg, 1.0f); + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(gen->f, gen->one_reg, 1.0f); #if DISASSEM - printf("il\tr%d, 1.0f\n", gen->one_reg); + printf("\til\tr%d, 1.0f\n", gen->one_reg); #endif + } return gen->one_reg; } +/** + * Return index of the pixel execution mask. + * The register is allocated an initialized upon the first call. + * + * The pixel execution mask controls which pixels in a quad are + * modified, according to surrounding conditionals, loops, etc. + */ +static int +get_exec_mask_reg(struct codegen *gen) +{ + if (gen->exec_mask_reg <= 0) { + gen->exec_mask_reg = spe_allocate_available_register(gen->f); + + /* exec_mask = {~0, ~0, ~0, ~0} */ + spe_load_int(gen->f, gen->exec_mask_reg, ~0); +#if DISASSEM + printf("INIT EXEC MASK:\n"); + printf("\tload\tr%d, 0x%x\n", gen->exec_mask_reg, ~0); +#endif + } + + return gen->exec_mask_reg; +} + + /** * Return the index of the SPU temporary containing the named TGSI * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we @@ -136,7 +166,7 @@ get_src_reg(struct codegen *gen, int channel, const struct tgsi_full_src_register *src) { - int reg; + int reg = -1; int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); assert(swizzle >= 0); @@ -156,7 +186,7 @@ get_src_reg(struct codegen *gen, /* Load: reg = memory[(machine_reg) + offset] */ spe_lqd(gen->f, reg, gen->inputs_reg, offset); #if DISASSEM - printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); + printf("\tlqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); #endif } break; @@ -184,11 +214,14 @@ get_dst_reg(struct codegen *gen, int channel, const struct tgsi_full_dst_register *dest) { - int reg; + int reg = -1; switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[dest->DstRegister.Index][channel]; + if (gen->if_nesting > 0) + reg = get_itemp(gen); + else + reg = gen->temp_regs[dest->DstRegister.Index][channel]; break; case TGSI_FILE_OUTPUT: reg = get_itemp(gen); @@ -214,17 +247,56 @@ store_dest_reg(struct codegen *gen, { switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: - /* no-op */ + if (gen->if_nesting > 0) { + int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; + int exec_reg = get_exec_mask_reg(gen); + /* Mix d with new value according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); +#if DISASSEM + printf("\tselb\tr%d, r%d, r%d, r%d # EXEC MASK'ed\n", + d_reg, d_reg, value_reg, exec_reg); +#endif + } + else { + /* we're not inside a condition or loop: do nothing special */ + } break; case TGSI_FILE_OUTPUT: { /* offset is measured in quadwords, not bytes */ int offset = dest->DstRegister.Index * 4 + channel; - /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); + if (gen->if_nesting > 0) { + int exec_reg = get_exec_mask_reg(gen); + int curval_reg = get_itemp(gen); + /* First read the current value from memory: + * Load: curval = memory[(machine_reg) + offset] + */ + spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset); + /* Mix curval with newvalue according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); + /* Store: memory[(machine_reg) + offset] = curval */ + spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset); #if DISASSEM - printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset); + printf("\tlqd\tr%d, r%d + %d\n", + curval_reg, gen->outputs_reg, offset); + printf("\tselb\tr%d, r%d, r%d, r%d # EXEC MASK'ed\n", + curval_reg, curval_reg, value_reg, exec_reg); + printf("\tstqd\tr%d, r%d + %d\n", + curval_reg, gen->outputs_reg, offset); #endif + } + else { + /* Store: memory[(machine_reg) + offset] = reg */ + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); +#if DISASSEM + printf("\tstqd\tr%d, r%d + %d\n", + value_reg, gen->outputs_reg, offset); +#endif + } } break; default: @@ -237,6 +309,9 @@ static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; +#if DISASSEM + printf("MOV:\n"); +#endif for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -244,7 +319,7 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) /* XXX we don't always need to actually emit a mov instruction here */ spe_move(gen->f, dst_reg, src_reg); #if DISASSEM - printf("mov\tr%d, r%d\n", dst_reg, src_reg); + printf("\tmov\tr%d, r%d\n", dst_reg, src_reg); #endif store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -254,6 +329,7 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) } + /** * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD * becomes (up to) four SPU "fa" instructions because we're doing SOA @@ -263,6 +339,9 @@ static boolean emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; +#if DISASSEM + printf("ADD:\n"); +#endif /* Loop over Red/Green/Blue/Alpha channels */ for (ch = 0; ch < 4; ch++) { /* If the dest R, G, B or A writemask is enabled... */ @@ -275,7 +354,7 @@ emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Emit actual SPE instruction: d = s1 + s2 */ spe_fa(gen->f, d_reg, s1_reg, s2_reg); #if DISASSEM - printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); + printf("\tfa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); #endif /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ @@ -295,6 +374,9 @@ static boolean emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; +#if DISASSEM + printf("MUL:\n"); +#endif for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -303,7 +385,7 @@ emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) /* d = s1 * s2 */ spe_fm(gen->f, d_reg, s1_reg, s2_reg); #if DISASSEM - printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); + printf("\tfm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); #endif store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -324,6 +406,9 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; +#if DISASSEM + printf("SGT:\n"); +#endif for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -333,14 +418,14 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) /* d = (s1 > s2) */ spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); #if DISASSEM - printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); + printf("\tfcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); #endif /* convert d from 0x0/0xffffffff to 0.0/1.0 */ /* d = d & one_reg */ spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); #if DISASSEM - printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); + printf("\tand\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); #endif store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); @@ -352,6 +437,71 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) } +static boolean +emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int channel = 0; + const int exec_reg = get_exec_mask_reg(gen); + + /* update execution mask with the predicate register */ + int tmp_reg = spe_allocate_available_register(gen->f); + int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + + /* tmp = (s1_reg == 0) */ + spe_ceqi(gen->f, tmp_reg, s1_reg, 0); + /* tmp = !tmp */ + spe_complement(gen->f, tmp_reg); + /* exec_mask = exec_mask & tmp */ + spe_and(gen->f, exec_reg, exec_reg, tmp_reg); +#if DISASSEM + printf("IF:\n"); + printf("\tseqi\tr%d, r%d, 0;\n", tmp_reg, s1_reg); + printf("\tcomp\tr%d\n", tmp_reg); + printf("\tand\tr%d, r%d, r%d\n", exec_reg, exec_reg, tmp_reg); +#endif + + gen->if_nesting++; + + spe_release_register(gen->f, tmp_reg); + + return true; +} + + +static boolean +emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int exec_reg = get_exec_mask_reg(gen); + + /* exec_mask = !exec_mask */ + spe_complement(gen->f, exec_reg); +#if DISASSEM + printf("ELSE:\n"); + printf("\tcomp\tr%d;\n", exec_reg); +#endif + return true; +} + + +static boolean +emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int exec_reg = get_exec_mask_reg(gen); + + /* XXX todo: pop execution mask */ + + spe_load_int(gen->f, exec_reg, ~0x0); +#if DISASSEM + printf("ENDIF:\n"); + printf("\tli\tr%d, ~0x0\n", exec_reg); +#endif + + gen->if_nesting--; + return true; +} + + + /** * Emit END instruction. * We just return from the shader function at this point. @@ -365,7 +515,7 @@ emit_END(struct codegen *gen) /* return from function call */ spe_bi(gen->f, SPE_REG_RA, 0, 0); #if DISASSEM - printf("bi\trRA\n"); + printf("\tbi\trRA\n"); #endif return true; } @@ -390,6 +540,13 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_END: return emit_END(gen); + case TGSI_OPCODE_IF: + return emit_IF(gen, inst); + case TGSI_OPCODE_ELSE: + return emit_ELSE(gen, inst); + case TGSI_OPCODE_ENDIF: + return emit_ENDIF(gen, inst); + /* XXX lots more cases to do... */ default: -- cgit v1.2.3 From 5f3ec823385b34b8db6013fdf701c5522dc86524 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 17:10:20 -0600 Subject: cell: implement TGSI immediates in SPE code generator --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 75 +++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 8d8dfea0392..33579fc7033 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -52,7 +52,11 @@ /** Set to 1 to enable debug/disassembly printfs */ -#define DISASSEM 01 +#define DISASSEM 0 + + +#define MAX_TEMPS 16 +#define MAX_IMMED 8 /** @@ -63,7 +67,10 @@ struct codegen int inputs_reg; /**< 1st function parameter */ int outputs_reg; /**< 2nd function parameter */ int constants_reg; /**< 3rd function parameter */ - int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */ + int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ + int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */ + + int num_imm; /**< number of immediates */ int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ @@ -191,7 +198,8 @@ get_src_reg(struct codegen *gen, } break; case TGSI_FILE_IMMEDIATE: - /* xxx fall-through for now / fix */ + reg = gen->imm_regs[src->SrcRegister.Index][channel]; + break; case TGSI_FILE_CONSTANT: /* xxx fall-through for now / fix */ default: @@ -558,12 +566,53 @@ emit_instruction(struct codegen *gen, +/** + * Emit code for a TGSI immediate value (vector of four floats). + * This involves register allocation and initialization. + * XXX the initialization should be done by a "prepare" stage, not + * per quad execution! + */ +static boolean +emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) +{ + int ch; + + assert(gen->num_imm < MAX_TEMPS); + +#if DISASSEM + printf("IMMEDIATE %d:\n", gen->num_imm); +#endif + + for (ch = 0; ch < 4; ch++) { + float val = immed->u.ImmediateFloat32[ch].Float; + int reg = spe_allocate_available_register(gen->f); + + if (reg < 0) + return false; + + /* update immediate map */ + gen->imm_regs[gen->num_imm][ch] = reg; + + /* emit initializer instruction */ + spe_load_float(gen->f, reg, val); +#if DISASSEM + printf("\tload\tr%d, %f\n", reg, val); +#endif + } + + gen->num_imm++; + + return true; +} + + + /** * Emit "code" for a TGSI declaration. * We only care about TGSI TEMPORARY register declarations at this time. * For each TGSI TEMPORARY we allocate four SPE registers. */ -static void +static boolean emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) { int i, ch; @@ -578,8 +627,11 @@ emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { + assert(i < MAX_TEMPS); for (ch = 0; ch < 4; ch++) { gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); + if (gen->temp_regs[i][ch] < 0) + return false; /* out of regs */ } /* XXX if we run out of SPE registers, we need to spill @@ -598,6 +650,8 @@ emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) default: ; /* ignore */ } + + return true; } @@ -642,25 +696,22 @@ cell_gen_fragment_program(struct cell_context *cell, switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: -#if 0 - if (!note_immediate(&gen, &parse.FullToken.FullImmediate )) - goto fail; -#endif + if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) + gen.error = true; break; case TGSI_TOKEN_TYPE_DECLARATION: - emit_declaration(&gen, &parse.FullToken.FullDeclaration); + if (!emit_declaration(&gen, &parse.FullToken.FullDeclaration)) + gen.error = true; break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) { + if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) gen.error = true; - } break; default: assert(0); - } } -- cgit v1.2.3 From 6f3eee921327ce76c05620eec714f2ff4f500826 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 21:09:10 -0600 Subject: cell: implement DDX/DDY codegen (untested) --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 33579fc7033..a7b7dd03d37 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -509,6 +509,38 @@ emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) } +static boolean +emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, + boolean ddx) +{ + int ch; + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + int t1_reg = get_itemp(gen); + int t2_reg = get_itemp(gen); + + spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ + if (ddx) { + spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ + } + else { + spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ + } + spe_fs(gen->f, d_reg, t2_reg, t1_reg); + + free_itemps(gen); + } + } + + return true; +} + + + /** * Emit END instruction. @@ -555,6 +587,11 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_ENDIF: return emit_ENDIF(gen, inst); + case TGSI_OPCODE_DDX: + return emit_DDX_DDY(gen, inst, true); + case TGSI_OPCODE_DDY: + return emit_DDX_DDY(gen, inst, false); + /* XXX lots more cases to do... */ default: -- cgit v1.2.3 From 8d768c51018841b66dbed87ae6b50358e53ad2c4 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 12 Sep 2008 21:54:25 -0600 Subject: cell: remove old disassembly/dump code; use dumper code in SPE emitter. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 104 ++++++++--------------------- 1 file changed, 28 insertions(+), 76 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index a7b7dd03d37..0712d05b40b 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -125,11 +125,13 @@ get_const_one_reg(struct codegen *gen) if (gen->one_reg <= 0) { gen->one_reg = spe_allocate_available_register(gen->f); + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "INIT CONSTANT 1.0:"); + /* one = {1.0, 1.0, 1.0, 1.0} */ spe_load_float(gen->f, gen->one_reg, 1.0f); -#if DISASSEM - printf("\til\tr%d, 1.0f\n", gen->one_reg); -#endif + + spe_indent(gen->f, -4); } return gen->one_reg; @@ -149,12 +151,13 @@ get_exec_mask_reg(struct codegen *gen) if (gen->exec_mask_reg <= 0) { gen->exec_mask_reg = spe_allocate_available_register(gen->f); + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:"); + /* exec_mask = {~0, ~0, ~0, ~0} */ spe_load_int(gen->f, gen->exec_mask_reg, ~0); -#if DISASSEM - printf("INIT EXEC MASK:\n"); - printf("\tload\tr%d, 0x%x\n", gen->exec_mask_reg, ~0); -#endif + + spe_indent(gen->f, -4); } return gen->exec_mask_reg; @@ -192,9 +195,6 @@ get_src_reg(struct codegen *gen, reg = get_itemp(gen); /* Load: reg = memory[(machine_reg) + offset] */ spe_lqd(gen->f, reg, gen->inputs_reg, offset); -#if DISASSEM - printf("\tlqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); -#endif } break; case TGSI_FILE_IMMEDIATE: @@ -262,10 +262,6 @@ store_dest_reg(struct codegen *gen, * d[i] = mask_reg[i] ? value_reg : d_reg */ spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); -#if DISASSEM - printf("\tselb\tr%d, r%d, r%d, r%d # EXEC MASK'ed\n", - d_reg, d_reg, value_reg, exec_reg); -#endif } else { /* we're not inside a condition or loop: do nothing special */ @@ -288,22 +284,10 @@ store_dest_reg(struct codegen *gen, spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); /* Store: memory[(machine_reg) + offset] = curval */ spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset); -#if DISASSEM - printf("\tlqd\tr%d, r%d + %d\n", - curval_reg, gen->outputs_reg, offset); - printf("\tselb\tr%d, r%d, r%d, r%d # EXEC MASK'ed\n", - curval_reg, curval_reg, value_reg, exec_reg); - printf("\tstqd\tr%d, r%d + %d\n", - curval_reg, gen->outputs_reg, offset); -#endif } else { /* Store: memory[(machine_reg) + offset] = reg */ spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); -#if DISASSEM - printf("\tstqd\tr%d, r%d + %d\n", - value_reg, gen->outputs_reg, offset); -#endif } } break; @@ -317,18 +301,13 @@ static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; -#if DISASSEM - printf("MOV:\n"); -#endif + spe_comment(gen->f, -4, "MOV:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); /* XXX we don't always need to actually emit a mov instruction here */ spe_move(gen->f, dst_reg, src_reg); -#if DISASSEM - printf("\tmov\tr%d, r%d\n", dst_reg, src_reg); -#endif store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); } @@ -347,9 +326,7 @@ static boolean emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; -#if DISASSEM - printf("ADD:\n"); -#endif + spe_comment(gen->f, -4, "ADD:"); /* Loop over Red/Green/Blue/Alpha channels */ for (ch = 0; ch < 4; ch++) { /* If the dest R, G, B or A writemask is enabled... */ @@ -361,9 +338,6 @@ emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Emit actual SPE instruction: d = s1 + s2 */ spe_fa(gen->f, d_reg, s1_reg, s2_reg); -#if DISASSEM - printf("\tfa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); -#endif /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); @@ -382,9 +356,7 @@ static boolean emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; -#if DISASSEM - printf("MUL:\n"); -#endif + spe_comment(gen->f, -4, "MUL:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -392,9 +364,6 @@ emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); /* d = s1 * s2 */ spe_fm(gen->f, d_reg, s1_reg, s2_reg); -#if DISASSEM - printf("\tfm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); -#endif store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); } @@ -414,9 +383,8 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; -#if DISASSEM - printf("SGT:\n"); -#endif + spe_comment(gen->f, -4, "SGT:"); + for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -425,16 +393,10 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) /* d = (s1 > s2) */ spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); -#if DISASSEM - printf("\tfcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); -#endif /* convert d from 0x0/0xffffffff to 0.0/1.0 */ /* d = d & one_reg */ spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); -#if DISASSEM - printf("\tand\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); -#endif store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -451,6 +413,8 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) const int channel = 0; const int exec_reg = get_exec_mask_reg(gen); + spe_comment(gen->f, -4, "IF:"); + /* update execution mask with the predicate register */ int tmp_reg = spe_allocate_available_register(gen->f); int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); @@ -461,12 +425,6 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_complement(gen->f, tmp_reg); /* exec_mask = exec_mask & tmp */ spe_and(gen->f, exec_reg, exec_reg, tmp_reg); -#if DISASSEM - printf("IF:\n"); - printf("\tseqi\tr%d, r%d, 0;\n", tmp_reg, s1_reg); - printf("\tcomp\tr%d\n", tmp_reg); - printf("\tand\tr%d, r%d, r%d\n", exec_reg, exec_reg, tmp_reg); -#endif gen->if_nesting++; @@ -481,12 +439,11 @@ emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) { const int exec_reg = get_exec_mask_reg(gen); + spe_comment(gen->f, -4, "ELSE:"); + /* exec_mask = !exec_mask */ spe_complement(gen->f, exec_reg); -#if DISASSEM - printf("ELSE:\n"); - printf("\tcomp\tr%d;\n", exec_reg); -#endif + return true; } @@ -496,13 +453,11 @@ emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) { const int exec_reg = get_exec_mask_reg(gen); + spe_comment(gen->f, -4, "ENDIF:"); + /* XXX todo: pop execution mask */ spe_load_int(gen->f, exec_reg, ~0x0); -#if DISASSEM - printf("ENDIF:\n"); - printf("\tli\tr%d, ~0x0\n", exec_reg); -#endif gen->if_nesting--; return true; @@ -515,6 +470,8 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, { int ch; + spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:"); + for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -552,11 +509,9 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, static boolean emit_END(struct codegen *gen) { + spe_comment(gen->f, -4, "END:"); /* return from function call */ spe_bi(gen->f, SPE_REG_RA, 0, 0); -#if DISASSEM - printf("\tbi\trRA\n"); -#endif return true; } @@ -616,9 +571,7 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) assert(gen->num_imm < MAX_TEMPS); -#if DISASSEM - printf("IMMEDIATE %d:\n", gen->num_imm); -#endif + spe_comment(gen->f, -4, "IMMEDIATE:"); for (ch = 0; ch < 4; ch++) { float val = immed->u.ImmediateFloat32[ch].Float; @@ -632,9 +585,6 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) /* emit initializer instruction */ spe_load_float(gen->f, reg, val); -#if DISASSEM - printf("\tload\tr%d, %f\n", reg, val); -#endif } gen->num_imm++; @@ -722,6 +672,8 @@ cell_gen_fragment_program(struct cell_context *cell, spe_allocate_register(f, gen.constants_reg); #if DISASSEM + spe_print_code(f, true); + spe_indent(f, 8); printf("Begin %s\n", __FUNCTION__); tgsi_dump(tokens, 0); #endif -- cgit v1.2.3 From 777aca8fc99986dacf043cc3c25911df4252cb42 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Sat, 13 Sep 2008 15:32:46 -0600 Subject: cell: implement negation, absolute value and set-sign for src regs in code gen --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 44 ++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 0712d05b40b..8d8c095a7e2 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -178,6 +178,8 @@ get_src_reg(struct codegen *gen, { int reg = -1; int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + boolean reg_is_itemp = FALSE; + uint sign_op; assert(swizzle >= 0); assert(swizzle <= 3); @@ -193,6 +195,7 @@ get_src_reg(struct codegen *gen, /* offset is measured in quadwords, not bytes */ int offset = src->SrcRegister.Index * 4 + channel; reg = get_itemp(gen); + reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ spe_lqd(gen->f, reg, gen->inputs_reg, offset); } @@ -206,6 +209,43 @@ get_src_reg(struct codegen *gen, assert(0); } + /* + * Handle absolute value, negate or set-negative of src register. + */ + sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + /* + * All sign ops are done by manipulating bit 31, the IEEE float sign bit. + */ + const int bit31mask_reg = get_itemp(gen); + int result_reg; + + if (reg_is_itemp) { + /* re-use 'reg' for the result */ + result_reg = reg; + } + else { + /* alloc a new reg for the result */ + result_reg = get_itemp(gen); + } + + /* mask with bit 31 set, the rest cleared */ + spe_load_int(gen->f, bit31mask_reg, (1 << 31)); + + if (sign_op == TGSI_UTIL_SIGN_CLEAR) { + spe_andc(gen->f, result_reg, reg, bit31mask_reg); + } + else if (sign_op == TGSI_UTIL_SIGN_SET) { + spe_and(gen->f, result_reg, reg, bit31mask_reg); + } + else { + assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); + spe_xor(gen->f, result_reg, reg, bit31mask_reg); + } + + reg = result_reg; + } + return reg; } @@ -416,7 +456,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_comment(gen->f, -4, "IF:"); /* update execution mask with the predicate register */ - int tmp_reg = spe_allocate_available_register(gen->f); + int tmp_reg = get_itemp(gen); int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); /* tmp = (s1_reg == 0) */ @@ -428,7 +468,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) gen->if_nesting++; - spe_release_register(gen->f, tmp_reg); + free_itemps(gen); return true; } -- cgit v1.2.3 From 5a4ab148a76f6c6d33b9784f99531a6bf2d9101b Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Mon, 15 Sep 2008 11:56:51 -0600 Subject: Added support for SUB and MAD instructions --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 61 ++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 8d8c095a7e2..9eae57bb76b 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -52,7 +52,7 @@ /** Set to 1 to enable debug/disassembly printfs */ -#define DISASSEM 0 +#define DISASSEM 1 #define MAX_TEMPS 16 @@ -76,7 +76,7 @@ struct codegen /** Per-instruction temps / intermediate temps */ int num_itemps; - int itemps[3]; + int itemps[4]; /** Current IF/ELSE/ENDIF nesting level */ int if_nesting; @@ -388,6 +388,58 @@ emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit subtract. See emit_ADD for comments. + */ +static boolean +emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "SUB:"); + /* Loop over Red/Green/Blue/Alpha channels */ + for (ch = 0; ch < 4; ch++) { + /* If the dest R, G, B or A writemask is enabled... */ + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + /* get indexes of the two src, one dest SPE registers */ + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* Emit actual SPE instruction: d = s1 - s2 */ + spe_fs(gen->f, d_reg, s1_reg, s2_reg); + + /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + /* Free any intermediate temps we allocated */ + free_itemps(gen); + } + } + return true; +} + +/** + * Emit multiply add. See emit_ADD for comments. + */ +static boolean +emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "MUL:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* d = s1 * s2 + s3 */ + spe_fma(gen->f, d_reg, s1_reg, s2_reg, s3_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + /** * Emit multiply. See emit_ADD for comments. @@ -411,7 +463,6 @@ emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } - /** * Emit set-if-greater-than. * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as @@ -570,6 +621,10 @@ emit_instruction(struct codegen *gen, return emit_MUL(gen, inst); case TGSI_OPCODE_ADD: return emit_ADD(gen, inst); + case TGSI_OPCODE_SUB: + return emit_SUB(gen, inst); + case TGSI_OPCODE_MAD: + return emit_MAD(gen, inst); case TGSI_OPCODE_SGT: return emit_SGT(gen, inst); case TGSI_OPCODE_END: -- cgit v1.2.3 From 0a75773fed3f2d74d697fae5aee9ae8f18298631 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Mon, 15 Sep 2008 12:27:10 -0600 Subject: cell: Added support for ABS instruction --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 31 +++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 9eae57bb76b..33f3c74b569 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -424,7 +424,7 @@ static boolean emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; - spe_comment(gen->f, -4, "MUL:"); + spe_comment(gen->f, -4, "MAD:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -463,6 +463,33 @@ emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit absolute value. See emit_ADD for comments. + */ +static boolean +emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "ABS:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + const int bit31mask_reg = get_itemp(gen); + + /* mask with bit 31 set, the rest cleared */ + spe_load_int(gen->f, bit31mask_reg, (1 << 31)); + + /* d = sign bit cleared in s1 */ + spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + /** * Emit set-if-greater-than. * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as @@ -625,6 +652,8 @@ emit_instruction(struct codegen *gen, return emit_SUB(gen, inst); case TGSI_OPCODE_MAD: return emit_MAD(gen, inst); + case TGSI_OPCODE_ABS: + return emit_ABS(gen, inst); case TGSI_OPCODE_SGT: return emit_SGT(gen, inst); case TGSI_OPCODE_END: -- cgit v1.2.3 From 81aa90e8837128423e37a776cdfbf63b0604903f Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Mon, 15 Sep 2008 13:45:09 -0600 Subject: cell: Added support for SLT, SEQ and SNE instructions --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 100 +++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 33f3c74b569..c48200d5ccc 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -524,6 +524,100 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit set-if_less-then. See emit_SGT for comments. + */ +static boolean +emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SLT:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 < s2) */ + spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit set-if_equal. See emit_SGT for comments. + */ +static boolean +emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SEQ:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 == s2) */ + spe_fceq(gen->f, d_reg, s1_reg, s2_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit set-if_not_equal. See emit_SGT for comments. + */ +static boolean +emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SNE:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 != s2) */ + spe_fceq(gen->f, d_reg, s1_reg, s2_reg); + spe_nor(gen->f, d_reg, d_reg, d_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + static boolean emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) @@ -656,6 +750,12 @@ emit_instruction(struct codegen *gen, return emit_ABS(gen, inst); case TGSI_OPCODE_SGT: return emit_SGT(gen, inst); + case TGSI_OPCODE_SLT: + return emit_SLT(gen, inst); + case TGSI_OPCODE_SEQ: + return emit_SEQ(gen, inst); + case TGSI_OPCODE_SNE: + return emit_SNE(gen, inst); case TGSI_OPCODE_END: return emit_END(gen); -- cgit v1.2.3 From e67374b6b2f6fd846c368ec70e80f0f4cf508f97 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Mon, 15 Sep 2008 15:45:51 -0600 Subject: cell: Added LERP instruction --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 92 +++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index c48200d5ccc..7a672478c5d 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -52,7 +52,7 @@ /** Set to 1 to enable debug/disassembly printfs */ -#define DISASSEM 1 +#define DISASSEM 0 #define MAX_TEMPS 16 @@ -441,6 +441,31 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) } +/** + * Emit linear interpolate. See emit_ADD for comments. + */ +static boolean +emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "LERP:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* d = s3 + s1(s2 - s3) */ + spe_fs(gen->f, d_reg, s2_reg, s3_reg); + spe_fm(gen->f, d_reg, d_reg, s1_reg); + spe_fa(gen->f, d_reg, d_reg, s3_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + /** * Emit multiply. See emit_ADD for comments. */ @@ -618,6 +643,65 @@ emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit max. See emit_SGT for comments. + */ +static boolean +emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "MAX:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 > s2) ? s1 : s2 */ + spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); + spe_and(gen->f, d_reg, d_reg, s1_reg); + spe_nor(gen->f, d_reg, d_reg, d_reg); + spe_and(gen->f, d_reg, d_reg, s2_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit max. See emit_SGT for comments. + */ +static boolean +emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "MIN:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 < s2) ? s1 : s2 */ + spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); + spe_and(gen->f, d_reg, d_reg, s1_reg); + spe_nor(gen->f, d_reg, d_reg, d_reg); + spe_and(gen->f, d_reg, d_reg, s2_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} static boolean emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) @@ -746,6 +830,8 @@ emit_instruction(struct codegen *gen, return emit_SUB(gen, inst); case TGSI_OPCODE_MAD: return emit_MAD(gen, inst); + case TGSI_OPCODE_LERP: + return emit_LERP(gen, inst); case TGSI_OPCODE_ABS: return emit_ABS(gen, inst); case TGSI_OPCODE_SGT: @@ -756,6 +842,10 @@ emit_instruction(struct codegen *gen, return emit_SEQ(gen, inst); case TGSI_OPCODE_SNE: return emit_SNE(gen, inst); + case TGSI_OPCODE_MAX: + return emit_MAX(gen, inst); + case TGSI_OPCODE_MIN: + return emit_MIN(gen, inst); case TGSI_OPCODE_END: return emit_END(gen); -- cgit v1.2.3 From 32250eb959b1355b2f6984ea892a86a6ecf9d3c3 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 15 Sep 2008 19:38:39 -0600 Subject: cell: export CELL_DEBUG=asm to dump SPU assembly code --- src/gallium/drivers/cell/common.h | 3 +- src/gallium/drivers/cell/ppu/cell_context.c | 1 + src/gallium/drivers/cell/ppu/cell_gen_fp.c | 56 ++++++++++++++--------------- 3 files changed, 31 insertions(+), 29 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index cb0631baf52..8f088541173 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -106,7 +106,8 @@ #define CELL_DEBUG_CHECKER (1 << 0) -#define CELL_DEBUG_SYNC (1 << 1) +#define CELL_DEBUG_ASM (1 << 1) +#define CELL_DEBUG_SYNC (1 << 2) diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 0a5c0baa471..b418271dca2 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -87,6 +87,7 @@ cell_draw_create(struct cell_context *cell) static const struct debug_named_value cell_debug_flags[] = { {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ + {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */ {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ {NULL, 0} }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 7a672478c5d..98ee5af2790 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -51,10 +51,6 @@ #include "cell_gen_fp.h" -/** Set to 1 to enable debug/disassembly printfs */ -#define DISASSEM 0 - - #define MAX_TEMPS 16 #define MAX_IMMED 8 @@ -864,6 +860,8 @@ emit_instruction(struct codegen *gen, /* XXX lots more cases to do... */ default: + fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", + inst->Instruction.Opcode); return false; } @@ -914,17 +912,19 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) * For each TGSI TEMPORARY we allocate four SPE registers. */ static boolean -emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) +emit_declaration(struct cell_context *cell, + struct codegen *gen, const struct tgsi_full_declaration *decl) { int i, ch; switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: -#if DISASSEM - printf("Declare temp reg %d .. %d\n", - decl->DeclarationRange.First, - decl->DeclarationRange.Last); -#endif + if (cell->debug_flags & CELL_DEBUG_ASM) { + printf("Declare temp reg %d .. %d\n", + decl->DeclarationRange.First, + decl->DeclarationRange.Last); + } + for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { @@ -939,13 +939,13 @@ emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) * to SPU memory. someday... */ -#if DISASSEM - printf(" SPE regs: %d %d %d %d\n", - gen->temp_regs[i][0], - gen->temp_regs[i][1], - gen->temp_regs[i][2], - gen->temp_regs[i][3]); -#endif + if (cell->debug_flags & CELL_DEBUG_ASM) { + printf(" SPE regs: %d %d %d %d\n", + gen->temp_regs[i][0], + gen->temp_regs[i][1], + gen->temp_regs[i][2], + gen->temp_regs[i][3]); + } } break; default: @@ -985,12 +985,12 @@ cell_gen_fragment_program(struct cell_context *cell, spe_allocate_register(f, gen.outputs_reg); spe_allocate_register(f, gen.constants_reg); -#if DISASSEM - spe_print_code(f, true); - spe_indent(f, 8); - printf("Begin %s\n", __FUNCTION__); - tgsi_dump(tokens, 0); -#endif + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_print_code(f, true); + spe_indent(f, 8); + printf("Begin %s\n", __FUNCTION__); + tgsi_dump(tokens, 0); + } tgsi_parse_init(&parse, tokens); @@ -1004,7 +1004,7 @@ cell_gen_fragment_program(struct cell_context *cell, break; case TGSI_TOKEN_TYPE_DECLARATION: - if (!emit_declaration(&gen, &parse.FullToken.FullDeclaration)) + if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) gen.error = true; break; @@ -1024,10 +1024,10 @@ cell_gen_fragment_program(struct cell_context *cell, return emit_END(&gen); } -#if DISASSEM - printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); - printf("End %s\n", __FUNCTION__); -#endif + if (cell->debug_flags & CELL_DEBUG_ASM) { + printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); + printf("End %s\n", __FUNCTION__); + } tgsi_parse_free( &parse ); -- cgit v1.2.3 From eef2edadf33cdb4ce033565ebc5b6aaf56e9288f Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 16 Sep 2008 20:24:43 +0900 Subject: trace: Fix typo in build instructions. --- src/gallium/drivers/trace/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index e7a2f12b022..f0e1cd596d3 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -10,7 +10,7 @@ This directory contains a Gallium3D pipe driver which traces all incoming calls. To build, invoke scons on the top dir as - scons statetrackers=mesa drivers=softpipe,i915simple,trace winsys=xlib + scons statetrackers=mesa drivers=softpipe,i965simple,trace winsys=xlib = Usage = -- cgit v1.2.3 From 2c54a6ee798ae22f92ef1fc4a1658ec5e701388a Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Tue, 16 Sep 2008 09:36:38 -0600 Subject: cell: Fixed MIN/MAX algorithm --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 98ee5af2790..612749507be 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -657,9 +657,7 @@ emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) /* d = (s1 > s2) ? s1 : s2 */ spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); - spe_and(gen->f, d_reg, d_reg, s1_reg); - spe_nor(gen->f, d_reg, d_reg, d_reg); - spe_and(gen->f, d_reg, d_reg, s2_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -685,11 +683,9 @@ emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst) int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* d = (s1 < s2) ? s1 : s2 */ + /* d = (s2 > s1) ? s1 : s2 */ spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); - spe_and(gen->f, d_reg, d_reg, s1_reg); - spe_nor(gen->f, d_reg, d_reg, d_reg); - spe_and(gen->f, d_reg, d_reg, s2_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); -- cgit v1.2.3 From dd75ca89ebce58a69da20c1efbf2a53575b2c96e Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Tue, 16 Sep 2008 09:42:28 -0600 Subject: cell: Optimized LERP with fma Please enter the commit message for your changes. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 612749507be..a80d8ff5d69 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -453,8 +453,7 @@ emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); /* d = s3 + s1(s2 - s3) */ spe_fs(gen->f, d_reg, s2_reg, s3_reg); - spe_fm(gen->f, d_reg, d_reg, s1_reg); - spe_fa(gen->f, d_reg, d_reg, s3_reg); + spe_fma(gen->f, d_reg, d_reg, s1_reg, s3_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); } @@ -657,7 +656,7 @@ emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) /* d = (s1 > s2) ? s1 : s2 */ spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); -- cgit v1.2.3 From fbbaad14a6b6de07631d5a9fd6e4b847a9e9dd5a Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Tue, 16 Sep 2008 13:56:56 -0600 Subject: cell: Added DP3 and DP4 instructions --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 81 ++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index a80d8ff5d69..34d283b51ed 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -54,6 +54,10 @@ #define MAX_TEMPS 16 #define MAX_IMMED 8 +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 /** * Context needed during code generation. @@ -510,6 +514,79 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit 3 component dot product. See emit_ADD for comments. + */ +static boolean +emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "DP3:"); + + int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]); + /* d = x * x */ + spe_fm(gen->f, d_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* d = y * y + d */ + spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* d = z * z + d */ + spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + +/** + * Emit 4 component dot product. See emit_ADD for comments. + */ +static boolean +emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "DP3:"); + + int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]); + /* d = x * x */ + spe_fm(gen->f, d_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* d = y * y + d */ + spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* d = z * z + d */ + spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + + s1_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + /* d = w * w + d */ + spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + /** * Emit set-if-greater-than. * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as @@ -823,6 +900,10 @@ emit_instruction(struct codegen *gen, return emit_MAD(gen, inst); case TGSI_OPCODE_LERP: return emit_LERP(gen, inst); + case TGSI_OPCODE_DP3: + return emit_DP3(gen, inst); + case TGSI_OPCODE_DP4: + return emit_DP4(gen, inst); case TGSI_OPCODE_ABS: return emit_ABS(gen, inst); case TGSI_OPCODE_SGT: -- cgit v1.2.3 From a3a797ffa84975330d5632ce7a71c65c9c2ad0d8 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Tue, 16 Sep 2008 16:00:42 -0600 Subject: cell: Added RCP and RSQ instruction support. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 34d283b51ed..77386b30250 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -487,6 +487,50 @@ emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit reciprocal. See emit_ADD for comments. + */ +static boolean +emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "RCP:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* d = 1/s1 */ + spe_frest(gen->f, d_reg, s1_reg); + spe_fi(gen->f, d_reg, s1_reg, d_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + +/** + * Emit reciprocal sqrt. See emit_ADD for comments. + */ +static boolean +emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "RSQ:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* d = 1/s1 */ + spe_frsqest(gen->f, d_reg, s1_reg); + spe_fi(gen->f, d_reg, s1_reg, d_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + /** * Emit absolute value. See emit_ADD for comments. */ @@ -904,6 +948,10 @@ emit_instruction(struct codegen *gen, return emit_DP3(gen, inst); case TGSI_OPCODE_DP4: return emit_DP4(gen, inst); + case TGSI_OPCODE_RCP: + return emit_RCP(gen, inst); + case TGSI_OPCODE_RSQ: + return emit_RSQ(gen, inst); case TGSI_OPCODE_ABS: return emit_ABS(gen, inst); case TGSI_OPCODE_SGT: -- cgit v1.2.3 From 858ced051551aa5d0ddd41936253d3a4ee5c142f Mon Sep 17 00:00:00 2001 From: Robert Ellison <papillo@tungstengraphics.com> Date: Wed, 17 Sep 2008 02:30:20 -0600 Subject: CELL: fleshing out the blending fragment ops - Added two new debug flags (to be used with the CELL_DEBUG environment variable). The first, "CELL_DEBUG=fragops", activates SPE fragment ops debug messages. The second, "CELL_DEBUG=fragopfallback", will eventually be used to disable the use of generated SPE code for fragment ops in favor of the default fallback reference routine. (During development, though, the parity of this flag is reversed: all users will get the reference code *unless* CELL_DEBUG=fragopfallback is set. This will prevent hiccups in code generation from affecting the other developers.) - Formalized debug message usage and macros in spu/spu_main.c. - Added lots of new code to ppu/cell_gen_fragment.c to extend the number of supported source RGB factors from 4 to 15, and to complete the list of supported blend equations. More coming, to complete the source and destination RGB and alpha factors, and to complete the rest of the fragment operations... --- src/gallium/drivers/cell/common.h | 11 +- src/gallium/drivers/cell/ppu/cell_context.c | 2 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 272 ++++++++++++++++++++++- src/gallium/drivers/cell/ppu/cell_state_emit.c | 5 + src/gallium/drivers/cell/spu/spu_main.c | 115 +++++----- 5 files changed, 337 insertions(+), 68 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 8f088541173..f0ff96eb478 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -104,12 +104,11 @@ #define CELL_BUFFER_STATUS_FREE 10 #define CELL_BUFFER_STATUS_USED 20 - -#define CELL_DEBUG_CHECKER (1 << 0) -#define CELL_DEBUG_ASM (1 << 1) -#define CELL_DEBUG_SYNC (1 << 2) - - +#define CELL_DEBUG_CHECKER (1 << 0) +#define CELL_DEBUG_ASM (1 << 1) +#define CELL_DEBUG_SYNC (1 << 2) +#define CELL_DEBUG_FRAGMENT_OPS (1 << 3) +#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) /** Max instructions for doing per-fragment operations */ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index b418271dca2..62e213ea354 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -89,6 +89,8 @@ static const struct debug_named_value cell_debug_flags[] = { {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */ {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ + {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ + {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ {NULL, 0} }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 06219d4e980..2c8c9e0d2c6 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -229,7 +229,36 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, spe_release_register(f, amask_reg); } +/* This is a convenient and oft-used sequence. It chooses + * the smaller of each element of reg1 and reg2, and combines them + * into the result register, as follows: + * + * The Float Compare Greater Than (fcgt) instruction will put + * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2. + * + * Then the Select Bits (selb) instruction will take bits from + * reg1 where compare_reg is 0, and from reg2 where compare_reg is + * 1. Ergo, result_reg will have the bits from reg1 where reg1 <= reg2, + * and the bits from reg2 where reg1 > reg2, which is exactly the + * MIN operation. + */ +#define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\ + int compare_reg = spe_allocate_available_register(f); \ + spe_fcgt(f, compare_reg, reg1, reg2); \ + spe_selb(f, result_reg, reg1, reg2, compare_reg); \ + spe_release_register(f, compare_reg); \ +} +/* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN + * sequence above, except that the registers specified when selecting + * bits are reversed. + */ +#define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\ + int compare_reg = spe_allocate_available_register(f); \ + spe_fcgt(f, compare_reg, reg1, reg2); \ + spe_selb(f, result_reg, reg2, reg1, compare_reg); \ + spe_release_register(f, compare_reg); \ +} /** * Generate SPE code to implement the given blend mode for a quad of pixels. @@ -242,6 +271,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, */ static void gen_blend(const struct pipe_blend_state *blend, + const struct pipe_blend_color *blend_color, struct spe_function *f, enum pipe_format color_format, int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, @@ -262,10 +292,53 @@ gen_blend(const struct pipe_blend_state *blend, int fbB_reg = spe_allocate_available_register(f); int fbA_reg = spe_allocate_available_register(f); - int one_reg = spe_allocate_available_register(f); int tmp_reg = spe_allocate_available_register(f); - boolean one_reg_set = false; /* avoid setting one_reg more than once */ + /* These values might or might not eventually get put into + * registers. We avoid allocating them and setting them until + * they're actually needed; then we avoid setting them more than + * once, and release them at the end of code generation. + */ + boolean one_reg_set = false; + int one_reg; +#define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\ + one_reg = spe_allocate_available_register(f); \ + spe_load_float(f, one_reg, 1.0f); \ + one_reg_set = true; \ +} +#define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\ + spe_release_register(f, one_reg); \ +} + + boolean const_color_set = false; + int constR_reg, constG_reg, constB_reg; +#define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\ + constR_reg = spe_allocate_available_register(f); \ + constG_reg = spe_allocate_available_register(f); \ + constG_reg = spe_allocate_available_register(f); \ + spe_load_float(f, constR_reg, blend_color->color[0]); \ + spe_load_float(f, constG_reg, blend_color->color[1]); \ + spe_load_float(f, constB_reg, blend_color->color[2]); \ + const_color_set = true;\ +} +#define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\ + spe_release_register(f, constR_reg); \ + spe_release_register(f, constG_reg); \ + spe_release_register(f, constB_reg); \ +} + + boolean const_alpha_set = false; + int constA_reg; +#define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\ + constA_reg = spe_allocate_available_register(f); \ + spe_load_float(f, constA_reg, blend_color->color[3]); \ + const_alpha_set = true; \ +} +#define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\ + spe_release_register(f, constA_reg); \ +} + + /* Real code starts here */ ASSERT(blend->blend_enable); @@ -348,30 +421,161 @@ gen_blend(const struct pipe_blend_state *blend, /* - * Compute Src RGB terms + * Compute Src RGB terms. We're actually looking for the value + * of (the appropriate RGB factors) * (the incoming source RGB color). */ switch (blend->rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (R,G,B) */ spe_move(f, term1R_reg, fragR_reg); spe_move(f, term1G_reg, fragG_reg); spe_move(f, term1B_reg, fragB_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term1R_reg); - spe_zero(f, term1G_reg); - spe_zero(f, term1B_reg); + /* factors = (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term1R_reg, 0.0f); + spe_load_float(f, term1G_reg, 0.0f); + spe_load_float(f, term1B_reg, 0.0f); break; case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ spe_fm(f, term1R_reg, fragR_reg, fragR_reg); spe_fm(f, term1G_reg, fragG_reg, fragG_reg); spe_fm(f, term1B_reg, fragB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ spe_fm(f, term1R_reg, fragR_reg, fragA_reg); spe_fm(f, term1G_reg, fragG_reg, fragA_reg); spe_fm(f, term1B_reg, fragB_reg, fragA_reg); break; - /* XXX more cases */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - R */ + spe_fs(f, tmp_reg, one_reg, fragR_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B */ + spe_fs(f, tmp_reg, one_reg, fragG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, fragB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ + spe_fm(f, term1R_reg, fragR_reg, fbR_reg); + spe_fm(f, term1G_reg, fragG_reg, fbG_reg); + spe_fm(f, term1B_reg, fragB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - Rfb */ + spe_fs(f, tmp_reg, one_reg, fbR_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B */ + spe_fs(f, tmp_reg, one_reg, fbG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, fbB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - A */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B with the same (1-A) factor */ + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ + spe_fm(f, term1R_reg, fragR_reg, fbA_reg); + spe_fm(f, term1G_reg, fragG_reg, fbA_reg); + spe_fm(f, term1B_reg, fragB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - A */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* term = R * tmp, G*tmp, and B*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We'll need the optional blend color registers */ + SET_CONST_COLOR_IF_UNSET(f,blend_color) + /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ + spe_fm(f, term1R_reg, fragR_reg, constR_reg); + spe_fm(f, term1G_reg, fragG_reg, constG_reg); + spe_fm(f, term1B_reg, fragB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + SET_CONST_ALPHA_IF_UNSET(f, blend_color) + /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ + spe_fm(f, term1R_reg, fragR_reg, constA_reg); + spe_fm(f, term1G_reg, fragG_reg, constA_reg); + spe_fm(f, term1B_reg, fragB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need both the optional {1,1,1,1} register, and the optional + * constant color registers + */ + SET_ONE_REG_IF_UNSET(f) + SET_CONST_COLOR_IF_UNSET(f, blend_color) + /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */ + spe_fs(f, tmp_reg, one_reg, constR_reg); + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, constG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, constB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional {1,1,1,1} register and the optional + * constant alpha register + */ + SET_ONE_REG_IF_UNSET(f) + SET_CONST_ALPHA_IF_UNSET(f, blend_color) + /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */ + spe_fs(f, tmp_reg, one_reg, constA_reg); + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* We'll need the optional {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so + * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) + */ + /* tmp = 1 - Afb */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* tmp = min(A,tmp) */ + FLOAT_VECTOR_MIN(f, tmp_reg, fragA_reg, tmp_reg) + /* term = R*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + + /* non-OpenGL cases? */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: ASSERT(0); } @@ -421,6 +625,7 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* one = {1.0, 1.0, 1.0, 1.0} */ if (!one_reg_set) { + one_reg = spe_allocate_available_register(f); spe_load_float(f, one_reg, 1.0f); one_reg_set = true; } @@ -432,6 +637,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2B_reg, fbB_reg, tmp_reg); break; /* XXX more cases */ + // GL_ONE_MINUS_SRC_COLOR + // GL_DST_COLOR + // GL_ONE_MINUS_DST_COLOR + // GL_DST_ALPHA + // GL_CONSTANT_COLOR + // GL_ONE_MINUS_CONSTANT_COLOR + // GL_CONSTANT_ALPHA + // GL_ONE_MINUS_CONSTANT_ALPHA default: ASSERT(0); } @@ -452,6 +665,7 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* one = {1.0, 1.0, 1.0, 1.0} */ if (!one_reg_set) { + one_reg = spe_allocate_available_register(f); spe_load_float(f, one_reg, 1.0f); one_reg_set = true; } @@ -461,6 +675,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2A_reg, fbA_reg, tmp_reg); break; /* XXX more cases */ + // GL_ONE_MINUS_SRC_COLOR + // GL_DST_COLOR + // GL_ONE_MINUS_DST_COLOR + // GL_DST_ALPHA + // GL_CONSTANT_COLOR + // GL_ONE_MINUS_CONSTANT_COLOR + // GL_CONSTANT_ALPHA + // GL_ONE_MINUS_CONSTANT_ALPHA default: ASSERT(0); } @@ -479,7 +701,21 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, fragG_reg, term1G_reg, term2G_reg); spe_fs(f, fragB_reg, term1B_reg, term2B_reg); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragR_reg, term2R_reg, term1R_reg); + spe_fs(f, fragG_reg, term2G_reg, term1G_reg); + spe_fs(f, fragB_reg, term2B_reg, term1B_reg); + break; + case PIPE_BLEND_MIN: + FLOAT_VECTOR_MIN(f, fragR_reg, term1R_reg, term2R_reg) + FLOAT_VECTOR_MIN(f, fragG_reg, term1G_reg, term2G_reg) + FLOAT_VECTOR_MIN(f, fragB_reg, term1B_reg, term2B_reg) + break; + case PIPE_BLEND_MAX: + FLOAT_VECTOR_MAX(f, fragR_reg, term1R_reg, term2R_reg) + FLOAT_VECTOR_MAX(f, fragG_reg, term1G_reg, term2G_reg) + FLOAT_VECTOR_MAX(f, fragB_reg, term1B_reg, term2B_reg) + break; default: ASSERT(0); } @@ -494,7 +730,15 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLEND_SUBTRACT: spe_fs(f, fragA_reg, term1A_reg, term2A_reg); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragA_reg, term2A_reg, term1A_reg); + break; + case PIPE_BLEND_MIN: + FLOAT_VECTOR_MIN(f, fragA_reg, term1A_reg, term2A_reg) + break; + case PIPE_BLEND_MAX: + FLOAT_VECTOR_MAX(f, fragA_reg, term1A_reg, term2A_reg) + break; default: ASSERT(0); } @@ -514,8 +758,12 @@ gen_blend(const struct pipe_blend_state *blend, spe_release_register(f, fbB_reg); spe_release_register(f, fbA_reg); - spe_release_register(f, one_reg); spe_release_register(f, tmp_reg); + + /* Free any optional registers that actually got used */ + RELEASE_ONE_REG_IF_USED(f) + RELEASE_CONST_COLOR_IF_USED(f) + RELEASE_CONST_ALPHA_IF_USED(f) } @@ -629,6 +877,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) const struct pipe_depth_stencil_alpha_state *dsa = &cell->depth_stencil->base; const struct pipe_blend_state *blend = &cell->blend->base; + const struct pipe_blend_color *blend_color = &cell->blend_color; const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ @@ -651,7 +900,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ - spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); spe_allocate_register(f, x_reg); spe_allocate_register(f, y_reg); spe_allocate_register(f, color_tile_reg); @@ -816,7 +1064,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) if (blend->blend_enable) { - gen_blend(blend, f, color_format, + gen_blend(blend, blend_color, f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 2da3097983c..8a389cd6aae 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -100,14 +100,19 @@ cell_emit_state(struct cell_context *cell) = cell_batch_alloc(cell, sizeof(*fops)); struct spe_function spe_code; + /* Prepare the buffer that will hold the generated code. */ + spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + /* generate new code */ cell_gen_fragment_function(cell, &spe_code); + /* put the new code into the batch buffer */ fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; memcpy(&fops->code, spe_code.store, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); fops->dsa = cell->depth_stencil->base; fops->blend = cell->blend->base; + /* free codegen buffer */ spe_release_func(&spe_code); } diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 78260c4259c..da2cb089722 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -50,7 +50,31 @@ helpful headers: /opt/cell/sdk/usr/include/libmisc.h */ +/* Set to 0 to disable all extraneous debugging code */ +#define DEBUG 1 + +#if DEBUG boolean Debug = FALSE; +boolean force_fragment_ops_fallback = TRUE; + +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define DEBUG_PRINTF(format,...) \ + if (Debug) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) + +#else + +#define DEBUG_PRINTF(...) +#define D_PRINTF(...) + +#endif struct spu_global spu; @@ -133,9 +157,7 @@ really_clear_tiles(uint surfaceIndex) static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { - if (Debug) - printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, - clear->surface, clear->value); + DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; @@ -203,17 +225,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #endif /* CLEAR_OPT */ - if (Debug) - printf("SPU %u: CLEAR SURF done\n", spu.init.id); + DEBUG_PRINTF("CLEAR SURF done\n"); } static void cmd_release_verts(const struct cell_command_release_verts *release) { - if (Debug) - printf("SPU %u: RELEASE VERTS %u\n", - spu.init.id, release->vertex_buf); + DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); ASSERT(release->vertex_buf != ~0U); release_buffer(release->vertex_buf); } @@ -228,16 +247,30 @@ cmd_release_verts(const struct cell_command_release_verts *release) static void cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { - if (Debug) - printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); + DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); /* Copy state info (for fallback case only) */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); - /* Point function pointer at new code */ - spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + /* Parity twist! For now, always use the fallback code by default, + * only switching to codegen when specifically requested. This + * allows us to develop freely without risking taking down the + * branch. + * + * Later, the parity of this check will be reversed, so that + * codegen is *always* used, unless we specifically indicate that + * we don't want it. + * + * Eventually, the option will be removed completely, because in + * final code we'll always use codegen and won't even provide the + * raw state records that the fallback code requires. + */ + if (spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) { + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + } + /* otherwise, the default fallback code remains in place */ spu.read_depth = spu.depth_stencil_alpha.depth.enabled; spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; @@ -247,8 +280,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) static void cmd_state_fragment_program(const struct cell_command_fragment_program *fp) { - if (Debug) - printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); + DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_program_code, fp->code, SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); @@ -262,9 +294,7 @@ cmd_state_fragment_program(const struct cell_command_fragment_program *fp) static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { - if (Debug) - printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - spu.init.id, + DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", cmd->width, cmd->height, cmd->color_start, @@ -309,9 +339,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) static void cmd_state_sampler(const struct cell_command_sampler *sampler) { - if (Debug) - printf("SPU %u: SAMPLER [%u]\n", - spu.init.id, sampler->unit); + DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) @@ -328,11 +356,9 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint width = texture->width; const uint height = texture->height; - if (Debug) { - printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id, + DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", texture->unit, texture->start, texture->width, texture->height); - } spu.texture[unit].start = texture->start; spu.texture[unit].width = width; @@ -351,10 +377,7 @@ cmd_state_texture(const struct cell_command_texture *texture) static void cmd_state_vertex_info(const struct vertex_info *vinfo) { - if (Debug) { - printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, - vinfo->num_attribs); - } + DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); ASSERT(vinfo->num_attribs >= 1); ASSERT(vinfo->num_attribs <= 8); memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); @@ -393,8 +416,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) static void cmd_finish(void) { - if (Debug) - printf("SPU %u: FINISH\n", spu.init.id); + DEBUG_PRINTF("FINISH\n"); really_clear_tiles(0); /* wait for all outstanding DMAs to finish */ mfc_write_tag_mask(~0); @@ -419,9 +441,8 @@ cmd_batch(uint opcode) const unsigned usize = size / sizeof(buffer[0]); uint pos; - if (Debug) - printf("SPU %u: BATCH buffer %u, len %u, from %p\n", - spu.init.id, buf, size, spu.init.buffers[buf]); + DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", + buf, size, spu.init.buffers[buf]); ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); @@ -440,8 +461,7 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ - if (Debug) - printf("SPU %u: release batch buf %u\n", spu.init.id, buf); + DEBUG_PRINTF("release batch buf %u\n", buf); release_buffer(buf); /* @@ -571,8 +591,7 @@ cmd_batch(uint opcode) } } - if (Debug) - printf("SPU %u: BATCH complete\n", spu.init.id); + DEBUG_PRINTF("BATCH complete\n"); } @@ -585,8 +604,7 @@ main_loop(void) struct cell_command cmd; int exitFlag = 0; - if (Debug) - printf("SPU %u: Enter main loop\n", spu.init.id); + DEBUG_PRINTF("Enter main loop\n"); ASSERT((sizeof(struct cell_command) & 0xf) == 0); ASSERT_ALIGN16(&cmd); @@ -595,14 +613,12 @@ main_loop(void) unsigned opcode; int tag = 0; - if (Debug) - printf("SPU %u: Wait for cmd...\n", spu.init.id); + DEBUG_PRINTF("Wait for cmd...\n"); /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); - if (Debug) - printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); + DEBUG_PRINTF("got cmd 0x%x\n", opcode); /* command payload */ mfc_get(&cmd, /* dest */ @@ -619,8 +635,7 @@ main_loop(void) switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: - if (Debug) - printf("SPU %u: EXIT\n", spu.init.id); + DEBUG_PRINTF("EXIT\n"); exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: @@ -632,13 +647,12 @@ main_loop(void) cmd_batch(opcode); break; default: - printf("Bad opcode!\n"); + printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); } } - if (Debug) - printf("SPU %u: Exit main loop\n", spu.init.id); + DEBUG_PRINTF("Exit main loop\n"); spu_dcache_report(); } @@ -653,7 +667,8 @@ one_time_init(void) invalidate_tex_cache(); /* Install default/fallback fragment processing function. - * This will normally be overriden by a code-gen'd function. + * This will normally be overriden by a code-gen'd function + * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. */ spu.fragment_ops = spu_fallback_fragment_ops; } @@ -685,8 +700,8 @@ main(main_param_t speid, main_param_t argp) one_time_init(); - if (Debug) - printf("SPU: main() speid=%lu\n", (unsigned long) speid); + DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ -- cgit v1.2.3 From 05aeb92a092c26e7773beb95692fc72e70a40e56 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 17 Sep 2008 08:11:42 -0600 Subject: cell: dump generated code if CELL_DEBUG=asm --- progs/demos/fslight.c | 2 +- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 13 ++++++++++- src/gallium/drivers/cell/ppu/cell_screen.c | 14 ++++++------ src/gallium/drivers/softpipe/sp_fs_exec.c | 29 ++++++++++++++++++++---- 4 files changed, 44 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/progs/demos/fslight.c b/progs/demos/fslight.c index e79b5cc1970..c7931f4697e 100644 --- a/progs/demos/fslight.c +++ b/progs/demos/fslight.c @@ -45,7 +45,7 @@ static GLint uTexture; static GLuint SphereList, RectList, CurList; static GLint win = 0; -static GLboolean anim = GL_TRUE; +static GLboolean anim = 0*GL_TRUE; static GLboolean wire = GL_FALSE; static GLboolean pixelLight = GL_TRUE; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 2c8c9e0d2c6..99407b8acee 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -900,6 +900,14 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ + spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_print_code(f, true); + spe_indent(f, 8); + spe_comment(f, -4, "Begin per-fragment ops"); + } + spe_allocate_register(f, x_reg); spe_allocate_register(f, y_reg); spe_allocate_register(f, color_tile_reg); @@ -1114,5 +1122,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_release_register(f, fbRGBA_reg); spe_release_register(f, fbZS_reg); spe_release_register(f, quad_offset_reg); -} + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_comment(f, -4, "End per-fragment ops"); + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 139b3719b62..47ba6fa2909 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -58,9 +58,9 @@ cell_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return CELL_MAX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: - return 0; + return 1; case PIPE_CAP_TWO_SIDED_STENCIL: - return 0; + return 1; case PIPE_CAP_GLSL: return 1; case PIPE_CAP_S3TC: @@ -68,13 +68,13 @@ cell_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: - return 0; + return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 1; case PIPE_CAP_OCCLUSION_QUERY: - return 0; + return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 0; + return 10; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 12; /* max 2Kx2K */ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: @@ -82,7 +82,7 @@ cell_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 12; /* max 2Kx2K */ default: - return 0; + return 10; } } @@ -108,7 +108,7 @@ cell_get_paramf(struct pipe_screen *screen, int param) return 16.0; /* arbitrary */ default: - return 0; + return 10; } } diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 701ee4c72f2..ffc0c5e578e 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -39,11 +39,20 @@ #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_parse.h" -struct sp_exec_fragment_shader { +struct sp_exec_fragment_shader +{ struct sp_fragment_shader base; + struct tgsi_token *machine_tokens; }; +/** cast wrapper */ +static INLINE struct sp_exec_fragment_shader * +sp_exec_fragment_shader(struct sp_fragment_shader *base) +{ + return (struct sp_exec_fragment_shader *) base; +} + /** * Compute quad X,Y,Z,W for the four fragments in a quad. @@ -86,10 +95,20 @@ exec_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers ) { - tgsi_exec_machine_bind_shader( machine, - base->shader.tokens, - PIPE_MAX_SAMPLERS, - samplers ); + struct sp_exec_fragment_shader *spefs = + sp_exec_fragment_shader(base); + + /* + * Bind tokens/shader to the interpreter's machine state. + * Avoid redundant binding. + */ + if (spefs->machine_tokens != base->shader.tokens) { + tgsi_exec_machine_bind_shader( machine, + base->shader.tokens, + PIPE_MAX_SAMPLERS, + samplers ); + spefs->machine_tokens = base->shader.tokens; + } } -- cgit v1.2.3 From f631093ce76ad14dee63293761d7da7b7b42fc6d Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 17 Sep 2008 08:17:02 -0600 Subject: cell: example of doing fs/fm sequence with fnms in blending --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 99407b8acee..2c80dd712e6 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -623,6 +623,7 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2B_reg, fbB_reg, fragA_reg); break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +#if 0 /* one = {1.0, 1.0, 1.0, 1.0} */ if (!one_reg_set) { one_reg = spe_allocate_available_register(f); @@ -635,6 +636,15 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2R_reg, fbR_reg, tmp_reg); spe_fm(f, term2G_reg, fbG_reg, tmp_reg); spe_fm(f, term2B_reg, fbB_reg, tmp_reg); +#else + /* Compute: term2x = fbx * (1.0 - fragA) + * Which is: term2x = fbx - fbx * fragA + * Use fnms t,a,b,c which computes t=c-a*b + */ + spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); +#endif break; /* XXX more cases */ // GL_ONE_MINUS_SRC_COLOR @@ -663,6 +673,7 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2A_reg, fbA_reg, fragA_reg); break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +#if 0 /* one = {1.0, 1.0, 1.0, 1.0} */ if (!one_reg_set) { one_reg = spe_allocate_available_register(f); @@ -673,6 +684,13 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, tmp_reg, one_reg, fragA_reg); /* termA = fbA * tmp */ spe_fm(f, term2A_reg, fbA_reg, tmp_reg); +#else + /* Compute: term2A = fbA * (1.0 - fragA) + * Which is: term2A = fbA - fbA * fragA + * Use fnms t,a,b,c which computes t=c-a*b + */ + spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); +#endif break; /* XXX more cases */ // GL_ONE_MINUS_SRC_COLOR -- cgit v1.2.3 From 562b31195cc8140d3bd0ad7ed8532200e9df520a Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 18 Sep 2008 13:15:55 +1000 Subject: nv50: hack surface alloc a bit for now --- src/gallium/drivers/nv50/nv50_miptree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index b0e8fe2f0b7..28a8bdc0fab 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -48,6 +48,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) } pitch = ((pt->width[0] + 63) & ~63) * pt->block.size; + /*XXX*/ + pitch *= 2; mt->buffer = ws->buffer_create(ws, 256, usage, pitch * pt->height[0]); if (!mt->buffer) { -- cgit v1.2.3 From ed0c308f39f7d7a5cb05571e37440469eacbd624 Mon Sep 17 00:00:00 2001 From: Ben Skeggs <skeggsb@gmail.com> Date: Thu, 18 Sep 2008 13:39:52 +1000 Subject: nv50: use 3D engine clears, 2D engine doesn't understand zeta formats --- src/gallium/drivers/nv50/nv50_clear.c | 57 ++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c index fbc6cd09cee..a31a42d6b54 100644 --- a/src/gallium/drivers/nv50/nv50_clear.c +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -30,6 +30,61 @@ void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { - pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer; + struct pipe_scissor_state sc, s_sc = nv50->scissor; + unsigned dirty = nv50->dirty; + + nv50->dirty = 0; + + if (ps->format == PIPE_FORMAT_Z24S8_UNORM || + ps->format == PIPE_FORMAT_Z16_UNORM) { + fb.num_cbufs = 0; + fb.zsbuf = ps; + } else { + fb.num_cbufs = 1; + fb.cbufs[0] = ps; + fb.zsbuf = NULL; + } + fb.width = ps->width; + fb.height = ps->height; + pipe->set_framebuffer_state(pipe, &fb); + + sc.minx = sc.miny = 0; + sc.maxx = fb.width; + sc.maxy = fb.height; + pipe->set_scissor_state(pipe, &sc); + + nv50_state_validate(nv50); + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + BEGIN_RING(tesla, 0x0d80, 4); + OUT_RINGf (ubyte_to_float((clearValue >> 16) & 0xff)); + OUT_RINGf (ubyte_to_float((clearValue >> 8) & 0xff)); + OUT_RINGf (ubyte_to_float((clearValue >> 0) & 0xff)); + OUT_RINGf (ubyte_to_float((clearValue >> 24) & 0xff)); + BEGIN_RING(tesla, 0x19d0, 1); + OUT_RING (0x3c); + break; + case PIPE_FORMAT_Z24S8_UNORM: + BEGIN_RING(tesla, 0x0d90, 1); + OUT_RINGf ((float)(clearValue >> 8) * (1.0 / 16777215.0)); + BEGIN_RING(tesla, 0x0da0, 1); + OUT_RING (clearValue & 0xff); + BEGIN_RING(tesla, 0x19d0, 1); + OUT_RING (0x03); + break; + default: + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, + clearValue); + break; + } + + pipe->set_framebuffer_state(pipe, &s_fb); + pipe->set_scissor_state(pipe, &s_sc); + nv50->dirty |= dirty; + ps->status = PIPE_SURFACE_STATUS_CLEAR; } + -- cgit v1.2.3 From f8bba34d4e12ef4c620cac881a4b697a1e668377 Mon Sep 17 00:00:00 2001 From: Robert Ellison <papillo@tungstengraphics.com> Date: Thu, 18 Sep 2008 01:29:41 -0600 Subject: CELL: finish fragment ops blending (except for unusual D3D modes) - Added new "macro" functions spe_float_min() and spe_float_max() to rtasm_ppc_spe.{ch}. These emit instructions that cause the minimum or maximum of each element in a vector of floats to be saved in the destination register. - Major changes to cell_gen_fragment.c to implement all the blending modes (except for the mysterious D3D-based PIPE_BLENDFACTOR_SRC1_COLOR, PIPE_BLENDFACTOR_SRC1_ALPHA, PIPE_BLENDFACTOR_INV_SRC1_COLOR, and PIPE_BLENDFACTOR_INV_SRC1_ALPHA). - Some revamping of code in cell_gen_fragment.c: use the new spe_float_min() and spe_float_max() functions (instead of expanding these calculations inline via macros); create and use an inline utility function for handling "optional" register allocation (for the {1,1,1,1} vector, and the blend color vectors) instead of expanding with macros; use the Float Multiply and Subtract (fnms) instruction to simplify and optimize many blending calculations. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 41 +- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 8 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 546 ++++++++++++++--------- 3 files changed, 377 insertions(+), 218 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 870ae802c52..12e0826fb9b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -384,7 +384,7 @@ void spe_release_func(struct spe_function *p) /** - * Alloate a SPE register. + * Allocate a SPE register. * \return register index or -1 if none left. */ int spe_allocate_available_register(struct spe_function *p) @@ -646,5 +646,44 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) } } +/* For each 32-bit float element of rA and rB, choose the smaller of the + * two, compositing them into the rT register. + * + * The Float Compare Greater Than (fcgt) instruction will put 1s into + * compare_reg where rA > rB, and 0s where rA <= rB. + * + * Then the Select Bits (selb) instruction will take bits from rA where + * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA + * where rA <= rB and from rB where rB > rA, which is exactly the + * "min" operation. + * + * The compare_reg could in many cases be the same as rT, unless + * rT == rA || rt == rB. But since this is common in constructions + * like "x = min(x, a)", we always allocate a new register to be safe. + */ +void +spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB) +{ + unsigned int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rA, rB, compare_reg); + spe_release_register(p, compare_reg); +} + +/* For each 32-bit float element of rA and rB, choose the greater of the + * two, compositing them into the rT register. + * + * The logic is similar to that of spe_float_min() above; the only + * difference is that the registers on spe_selb() have been reversed, + * so that the larger of the two is selected instead of the smaller. + */ +void +spe_float_max(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB) +{ + unsigned int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rB, rA, compare_reg); + spe_release_register(p, compare_reg); +} #endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 25790452325..4ef05ea27d1 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -322,6 +322,14 @@ spe_zero(struct spe_function *p, unsigned rT); extern void spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word); +/** rT = float min(rA, rB) */ +extern void +spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); + +/** rT = float max(rA, rB) */ +extern void +spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); + /* Floating-point instructions */ diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 2c80dd712e6..9d25e820ad9 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -229,35 +229,26 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, spe_release_register(f, amask_reg); } -/* This is a convenient and oft-used sequence. It chooses - * the smaller of each element of reg1 and reg2, and combines them - * into the result register, as follows: - * - * The Float Compare Greater Than (fcgt) instruction will put - * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2. - * - * Then the Select Bits (selb) instruction will take bits from - * reg1 where compare_reg is 0, and from reg2 where compare_reg is - * 1. Ergo, result_reg will have the bits from reg1 where reg1 <= reg2, - * and the bits from reg2 where reg1 > reg2, which is exactly the - * MIN operation. +/* This pair of functions is used inline to allocate and deallocate + * optional constant registers. Once a constant is discovered to be + * needed, we will likely need it again, so we don't want to deallocate + * it and have to allocate and load it again unnecessarily. */ -#define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\ - int compare_reg = spe_allocate_available_register(f); \ - spe_fcgt(f, compare_reg, reg1, reg2); \ - spe_selb(f, result_reg, reg1, reg2, compare_reg); \ - spe_release_register(f, compare_reg); \ +static inline void +setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +{ + if (*is_already_set) return; + *r = spe_allocate_available_register(f); + spe_load_float(f, *r, value); + *is_already_set = true; } -/* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN - * sequence above, except that the registers specified when selecting - * bits are reversed. - */ -#define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\ - int compare_reg = spe_allocate_available_register(f); \ - spe_fcgt(f, compare_reg, reg1, reg2); \ - spe_selb(f, result_reg, reg2, reg1, compare_reg); \ - spe_release_register(f, compare_reg); \ +static inline void +release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +{ + if (!*is_already_set) return; + spe_release_register(f, r); + *is_already_set = false; } /** @@ -294,51 +285,15 @@ gen_blend(const struct pipe_blend_state *blend, int tmp_reg = spe_allocate_available_register(f); - /* These values might or might not eventually get put into - * registers. We avoid allocating them and setting them until - * they're actually needed; then we avoid setting them more than - * once, and release them at the end of code generation. + /* Optional constant registers we might or might not end up using; + * if we do use them, make sure we only allocate them once by + * keeping a flag on each one. */ - boolean one_reg_set = false; - int one_reg; -#define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\ - one_reg = spe_allocate_available_register(f); \ - spe_load_float(f, one_reg, 1.0f); \ - one_reg_set = true; \ -} -#define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\ - spe_release_register(f, one_reg); \ -} - - boolean const_color_set = false; - int constR_reg, constG_reg, constB_reg; -#define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\ - constR_reg = spe_allocate_available_register(f); \ - constG_reg = spe_allocate_available_register(f); \ - constG_reg = spe_allocate_available_register(f); \ - spe_load_float(f, constR_reg, blend_color->color[0]); \ - spe_load_float(f, constG_reg, blend_color->color[1]); \ - spe_load_float(f, constB_reg, blend_color->color[2]); \ - const_color_set = true;\ -} -#define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\ - spe_release_register(f, constR_reg); \ - spe_release_register(f, constG_reg); \ - spe_release_register(f, constB_reg); \ -} - - boolean const_alpha_set = false; - int constA_reg; -#define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\ - constA_reg = spe_allocate_available_register(f); \ - spe_load_float(f, constA_reg, blend_color->color[3]); \ - const_alpha_set = true; \ -} -#define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\ - spe_release_register(f, constA_reg); \ -} - - /* Real code starts here */ + boolean one_reg_set = false; + unsigned int one_reg; + boolean constR_reg_set = false, constG_reg_set = false, + constB_reg_set = false, constA_reg_set = false; + unsigned int constR_reg, constG_reg, constB_reg, constA_reg; ASSERT(blend->blend_enable); @@ -419,10 +374,11 @@ gen_blend(const struct pipe_blend_state *blend, spe_release_register(f, mask_reg); } - /* * Compute Src RGB terms. We're actually looking for the value - * of (the appropriate RGB factors) * (the incoming source RGB color). + * of (the appropriate RGB factors) * (the incoming source RGB color), + * because in some cases (like PIPE_BLENDFACTOR_ONE and + * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. */ switch (blend->rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: @@ -450,18 +406,13 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term1B_reg, fragB_reg, fragA_reg); break; case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */ - /* we'll need the optional constant {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) - /* tmp = 1 - R */ - spe_fs(f, tmp_reg, one_reg, fragR_reg); - /* term = R * tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - /* repeat for G and B */ - spe_fs(f, tmp_reg, one_reg, fragG_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fs(f, tmp_reg, one_reg, fragB_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) + * or in other words term = (R-R*R, G-G*G, B-B*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_DST_COLOR: /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ @@ -470,30 +421,22 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term1B_reg, fragB_reg, fbB_reg); break; case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */ - /* we'll need the optional constant {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) - /* tmp = 1 - Rfb */ - spe_fs(f, tmp_reg, one_reg, fbR_reg); - /* term = R * tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - /* repeat for G and B */ - spe_fs(f, tmp_reg, one_reg, fbG_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fs(f, tmp_reg, one_reg, fbB_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) + * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */ - /* we'll need the optional constant {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) - /* tmp = 1 - A */ - spe_fs(f, tmp_reg, one_reg, fragA_reg); - /* term = R * tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - /* repeat for G and B with the same (1-A) factor */ - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) + * or term = (R-R*A,G-G*A,B-B*A) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); break; case PIPE_BLENDFACTOR_DST_ALPHA: /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ @@ -502,19 +445,19 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term1B_reg, fragB_reg, fbA_reg); break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: - /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */ - /* we'll need the optional constant {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) - /* tmp = 1 - A */ - spe_fs(f, tmp_reg, one_reg, fbA_reg); - /* term = R * tmp, G*tmp, and B*tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) + * or term = (R-R*Afb,G-G*Afb,b-B*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); break; case PIPE_BLENDFACTOR_CONST_COLOR: - /* We'll need the optional blend color registers */ - SET_CONST_COLOR_IF_UNSET(f,blend_color) + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ spe_fm(f, term1R_reg, fragR_reg, constR_reg); spe_fm(f, term1G_reg, fragG_reg, constG_reg); @@ -522,55 +465,61 @@ gen_blend(const struct pipe_blend_state *blend, break; case PIPE_BLENDFACTOR_CONST_ALPHA: /* we'll need the optional constant alpha register */ - SET_CONST_ALPHA_IF_UNSET(f, blend_color) + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ spe_fm(f, term1R_reg, fragR_reg, constA_reg); spe_fm(f, term1G_reg, fragG_reg, constA_reg); spe_fm(f, term1B_reg, fragB_reg, constA_reg); break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need both the optional {1,1,1,1} register, and the optional - * constant color registers + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) + * or term = (R-R*Rc, G-G*Gc, B-B*Bc) + * fnms(a,b,c,d) computes a = d - b*c */ - SET_ONE_REG_IF_UNSET(f) - SET_CONST_COLOR_IF_UNSET(f, blend_color) - /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */ - spe_fs(f, tmp_reg, one_reg, constR_reg); - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fs(f, tmp_reg, one_reg, constG_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fs(f, tmp_reg, one_reg, constB_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - /* We need the optional {1,1,1,1} register and the optional - * constant alpha register + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) + * or term = (R-R*Ac,G-G*Ac,B-B*Ac) + * fnms(a,b,c,d) computes a = d - b*c */ - SET_ONE_REG_IF_UNSET(f) - SET_CONST_ALPHA_IF_UNSET(f, blend_color) - /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */ - spe_fs(f, tmp_reg, one_reg, constA_reg); - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* We'll need the optional {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) + setup_const_register(f, &one_reg_set, &one_reg, 1.0f); /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) + * We could expand the term (as a*min(b,c) == min(a*b,a*c) + * as long as a is positive), but then we'd have to do three + * spe_float_min() functions instead of one, so this is simpler. */ /* tmp = 1 - Afb */ spe_fs(f, tmp_reg, one_reg, fbA_reg); /* tmp = min(A,tmp) */ - FLOAT_VECTOR_MIN(f, tmp_reg, fragA_reg, tmp_reg) + spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); /* term = R*tmp */ spe_fm(f, term1R_reg, fragR_reg, tmp_reg); spe_fm(f, term1G_reg, fragG_reg, tmp_reg); spe_fm(f, term1B_reg, fragB_reg, tmp_reg); break; - /* non-OpenGL cases? */ + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_ALPHA: case PIPE_BLENDFACTOR_INV_SRC1_COLOR: @@ -581,132 +530,293 @@ gen_blend(const struct pipe_blend_state *blend, } /* - * Compute Src Alpha term + * Compute Src Alpha term. Like the above, we're looking for + * the full term A*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ZERO: + /* factor = 0, so term = 0 */ + spe_load_float(f, term1A_reg, 0.0f); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = A */ spe_move(f, term1A_reg, fragA_reg); break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = A*A */ spe_fm(f, term1A_reg, fragA_reg, fragA_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA: spe_fm(f, term1A_reg, fragA_reg, fragA_reg); break; - /* XXX more cases */ + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = A*(1-A) = A-A*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = A*Afb */ + spe_fm(f, term1A_reg, fragA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = A*Ac */ + spe_fm(f, term1A_reg, fragA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: default: ASSERT(0); } /* - * Compute Dest RGB terms + * Compute Dest RGB term. Like the above, we're looking for + * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ spe_move(f, term2R_reg, fbR_reg); spe_move(f, term2G_reg, fbG_reg); spe_move(f, term2B_reg, fbB_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term2R_reg); - spe_zero(f, term2G_reg); - spe_zero(f, term2B_reg); + /* factor s= (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term2R_reg, 0.0f); + spe_load_float(f, term2G_reg, 0.0f); + spe_load_float(f, term2B_reg, 0.0f); break; case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ spe_fm(f, term2R_reg, fbR_reg, fragR_reg); spe_fm(f, term2G_reg, fbG_reg, fragG_reg); spe_fm(f, term2B_reg, fbB_reg, fragB_reg); break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) + * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); + break; case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ spe_fm(f, term2R_reg, fbR_reg, fragA_reg); spe_fm(f, term2G_reg, fbG_reg, fragA_reg); spe_fm(f, term2B_reg, fbB_reg, fragA_reg); break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: -#if 0 - /* one = {1.0, 1.0, 1.0, 1.0} */ - if (!one_reg_set) { - one_reg = spe_allocate_available_register(f); - spe_load_float(f, one_reg, 1.0f); - one_reg_set = true; - } - /* tmp = one - fragA */ - spe_fs(f, tmp_reg, one_reg, fragA_reg); - /* term = fb * tmp */ - spe_fm(f, term2R_reg, fbR_reg, tmp_reg); - spe_fm(f, term2G_reg, fbG_reg, tmp_reg); - spe_fm(f, term2B_reg, fbB_reg, tmp_reg); -#else - /* Compute: term2x = fbx * (1.0 - fragA) - * Which is: term2x = fbx - fbx * fragA - * Use fnms t,a,b,c which computes t=c-a*b - */ + /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ + /* fnms(a,b,c,d) computes a = d - b*c */ spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); -#endif break; - /* XXX more cases */ - // GL_ONE_MINUS_SRC_COLOR - // GL_DST_COLOR - // GL_ONE_MINUS_DST_COLOR - // GL_DST_ALPHA - // GL_CONSTANT_COLOR - // GL_ONE_MINUS_CONSTANT_COLOR - // GL_CONSTANT_ALPHA - // GL_ONE_MINUS_CONSTANT_ALPHA + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ + spe_fm(f, term2R_reg, fbR_reg, fbR_reg); + spe_fm(f, term2G_reg, fbG_reg, fbG_reg); + spe_fm(f, term2B_reg, fbB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) + * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ + spe_fm(f, term2R_reg, fbR_reg, fbA_reg); + spe_fm(f, term2G_reg, fbG_reg, fbA_reg); + spe_fm(f, term2B_reg, fbB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) + * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ + spe_fm(f, term2R_reg, fbR_reg, constR_reg); + spe_fm(f, term2G_reg, fbG_reg, constG_reg); + spe_fm(f, term2B_reg, fbB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ + spe_fm(f, term2R_reg, fbR_reg, constA_reg); + spe_fm(f, term2G_reg, fbG_reg, constA_reg); + spe_fm(f, term2B_reg, fbB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) + * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) + * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: ASSERT(0); } /* - * Compute Dest Alpha term + * Compute Dest Alpha term. Like the above, we're looking for + * the full term Afb*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = Afb */ spe_move(f, term2A_reg, fbA_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term2A_reg); + /* factor = 0, so term = 0 */ + spe_load_float(f, term2A_reg, 0.0f); break; - case PIPE_BLENDFACTOR_SRC_ALPHA: + + case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = Afb*A */ spe_fm(f, term2A_reg, fbA_reg, fragA_reg); break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: -#if 0 - /* one = {1.0, 1.0, 1.0, 1.0} */ - if (!one_reg_set) { - one_reg = spe_allocate_available_register(f); - spe_load_float(f, one_reg, 1.0f); - one_reg_set = true; - } - /* tmp = one - fragA */ - spe_fs(f, tmp_reg, one_reg, fragA_reg); - /* termA = fbA * tmp */ - spe_fm(f, term2A_reg, fbA_reg, tmp_reg); -#else - /* Compute: term2A = fbA * (1.0 - fragA) - * Which is: term2A = fbA - fbA * fragA - * Use fnms t,a,b,c which computes t=c-a*b - */ + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); -#endif break; - /* XXX more cases */ - // GL_ONE_MINUS_SRC_COLOR - // GL_DST_COLOR - // GL_ONE_MINUS_DST_COLOR - // GL_DST_ALPHA - // GL_CONSTANT_COLOR - // GL_ONE_MINUS_CONSTANT_COLOR - // GL_CONSTANT_ALPHA - // GL_ONE_MINUS_CONSTANT_ALPHA + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = Afb*Afb */ + spe_fm(f, term2A_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = Afb*Ac */ + spe_fm(f, term2A_reg, fbA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: default: ASSERT(0); } /* - * Combine Src/Dest RGB terms + * Combine Src/Dest RGB terms as per the blend equation. */ switch (blend->rgb_func) { case PIPE_BLEND_ADD: @@ -725,14 +835,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, fragB_reg, term2B_reg, term1B_reg); break; case PIPE_BLEND_MIN: - FLOAT_VECTOR_MIN(f, fragR_reg, term1R_reg, term2R_reg) - FLOAT_VECTOR_MIN(f, fragG_reg, term1G_reg, term2G_reg) - FLOAT_VECTOR_MIN(f, fragB_reg, term1B_reg, term2B_reg) + spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); break; case PIPE_BLEND_MAX: - FLOAT_VECTOR_MAX(f, fragR_reg, term1R_reg, term2R_reg) - FLOAT_VECTOR_MAX(f, fragG_reg, term1G_reg, term2G_reg) - FLOAT_VECTOR_MAX(f, fragB_reg, term1B_reg, term2B_reg) + spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); break; default: ASSERT(0); @@ -752,10 +862,10 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, fragA_reg, term2A_reg, term1A_reg); break; case PIPE_BLEND_MIN: - FLOAT_VECTOR_MIN(f, fragA_reg, term1A_reg, term2A_reg) + spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); break; case PIPE_BLEND_MAX: - FLOAT_VECTOR_MAX(f, fragA_reg, term1A_reg, term2A_reg) + spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); break; default: ASSERT(0); @@ -779,9 +889,11 @@ gen_blend(const struct pipe_blend_state *blend, spe_release_register(f, tmp_reg); /* Free any optional registers that actually got used */ - RELEASE_ONE_REG_IF_USED(f) - RELEASE_CONST_COLOR_IF_USED(f) - RELEASE_CONST_ALPHA_IF_USED(f) + release_const_register(f, &one_reg_set, one_reg); + release_const_register(f, &constR_reg_set, constR_reg); + release_const_register(f, &constG_reg_set, constG_reg); + release_const_register(f, &constB_reg_set, constB_reg); + release_const_register(f, &constA_reg_set, constA_reg); } -- cgit v1.2.3 From c868a1c32d70295f425333f9e8a35235b129704b Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Thu, 18 Sep 2008 10:36:09 -0600 Subject: cell: Added SGE and SLE instructions --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 62 ++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 77386b30250..92681408e9f 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -696,6 +696,68 @@ emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit set-if_greater-then-or-equal. See emit_SGT for comments. + */ +static boolean +emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SGE:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 >= s2) */ + spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & ~one_reg */ + spe_andc(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit set-if_less-then-or-equal. See emit_SGT for comments. + */ +static boolean +emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SLE:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 <= s2) */ + spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & ~one_reg */ + spe_andc(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + /** * Emit set-if_equal. See emit_SGT for comments. */ -- cgit v1.2.3 From 3d2449247afce18e6a0604b794778d1373c879be Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Thu, 18 Sep 2008 10:37:45 -0600 Subject: cell: Added SGE and SLE instructions to dispatch function --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 92681408e9f..2607b410aa3 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1020,6 +1020,10 @@ emit_instruction(struct codegen *gen, return emit_SGT(gen, inst); case TGSI_OPCODE_SLT: return emit_SLT(gen, inst); + case TGSI_OPCODE_SGE: + return emit_SGE(gen, inst); + case TGSI_OPCODE_SLE: + return emit_SLE(gen, inst); case TGSI_OPCODE_SEQ: return emit_SEQ(gen, inst); case TGSI_OPCODE_SNE: -- cgit v1.2.3 From 15fceac0404f450f026f10bd2f4bdd0c939b5d00 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Thu, 18 Sep 2008 11:11:49 -0600 Subject: cell: Fix bug with complement logic for SGE and SLE --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 42 +++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 2607b410aa3..4f01897199c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -716,8 +716,8 @@ emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = d & ~one_reg */ - spe_andc(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + /* d = ~d & one_reg */ + spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -747,8 +747,8 @@ emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = d & ~one_reg */ - spe_andc(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + /* d = ~d & one_reg */ + spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -821,6 +821,38 @@ emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit compare. See emit_SGT for comments. + */ +static boolean +emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "CMP:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 != s2) */ + spe_fceq(gen->f, d_reg, s1_reg, s2_reg); + spe_nor(gen->f, d_reg, d_reg, d_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + /** * Emit max. See emit_SGT for comments. */ @@ -1028,6 +1060,8 @@ emit_instruction(struct codegen *gen, return emit_SEQ(gen, inst); case TGSI_OPCODE_SNE: return emit_SNE(gen, inst); + case TGSI_OPCODE_CMP: + return emit_CMP(gen, inst); case TGSI_OPCODE_MAX: return emit_MAX(gen, inst); case TGSI_OPCODE_MIN: -- cgit v1.2.3 From 698bffb8844f6f45e09ed0c9fea39298ac6423d2 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Thu, 18 Sep 2008 14:49:00 -0600 Subject: cell: Added CMP instruction --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 4f01897199c..6f2b89c695c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -835,15 +835,15 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int zero_reg = get_itemp(gen); + + spe_xor(gen->f, zero_reg, zero_reg, zero_reg); - /* d = (s1 != s2) */ - spe_fceq(gen->f, d_reg, s1_reg, s2_reg); - spe_nor(gen->f, d_reg, d_reg, d_reg); - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = d & one_reg */ - spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + /* d = (s1 < 0) ? s2 : s3 */ + spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); + spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); -- cgit v1.2.3 From 4485ac87c2cf69bef443ac36cccaa70054c6a7bb Mon Sep 17 00:00:00 2001 From: Robert Ellison <papillo@tungstengraphics.com> Date: Thu, 18 Sep 2008 16:36:37 -0600 Subject: CELL: mark several transient files as .gitignore progs/demos: added new demo "fbo_firecube" progs/glsl: added new demo "pointcoord" src/gallium/drivers/cell/spu: added the g3d_spu executable, a Cell SPU executable file, which seems to be occasionally built as part of the cell driver src/glu/sgi: added "exptmp", a byproduct of the "mklib" process that sometimes gets deleted and sometimes not. --- progs/demos/.gitignore | 1 + progs/glsl/.gitignore | 1 + src/gallium/drivers/cell/spu/.gitignore | 1 + src/glu/sgi/.gitignore | 1 + 4 files changed, 4 insertions(+) create mode 100644 src/gallium/drivers/cell/spu/.gitignore create mode 100644 src/glu/sgi/.gitignore (limited to 'src/gallium/drivers') diff --git a/progs/demos/.gitignore b/progs/demos/.gitignore index 3693fafd4ee..f033a0505d8 100644 --- a/progs/demos/.gitignore +++ b/progs/demos/.gitignore @@ -8,6 +8,7 @@ cubemap drawpix engine extfuncs.h +fbo_firecube fire fogcoord fplight diff --git a/progs/glsl/.gitignore b/progs/glsl/.gitignore index 09340ff2adb..978e31c6cc9 100644 --- a/progs/glsl/.gitignore +++ b/progs/glsl/.gitignore @@ -7,6 +7,7 @@ extfuncs.h mandelbrot multitex noise +pointcoord points readtex.c readtex.h diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore new file mode 100644 index 00000000000..2be9a2d3242 --- /dev/null +++ b/src/gallium/drivers/cell/spu/.gitignore @@ -0,0 +1 @@ +g3d_spu diff --git a/src/glu/sgi/.gitignore b/src/glu/sgi/.gitignore new file mode 100644 index 00000000000..279ea7d4345 --- /dev/null +++ b/src/glu/sgi/.gitignore @@ -0,0 +1 @@ +exptmp -- cgit v1.2.3 From 5f88d871ea051e0c89bbbfc832a565fc8de70c6a Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Fri, 19 Sep 2008 10:20:10 +0900 Subject: softpipe: Obey const qualifier. --- src/gallium/drivers/softpipe/sp_fs_exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index ffc0c5e578e..4fea71c3140 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -42,13 +42,13 @@ struct sp_exec_fragment_shader { struct sp_fragment_shader base; - struct tgsi_token *machine_tokens; + const struct tgsi_token *machine_tokens; }; /** cast wrapper */ static INLINE struct sp_exec_fragment_shader * -sp_exec_fragment_shader(struct sp_fragment_shader *base) +sp_exec_fragment_shader(const struct sp_fragment_shader *base) { return (struct sp_exec_fragment_shader *) base; } -- cgit v1.2.3 From a57fbe53dcb54694da9c9b4be1533c9d800079d2 Mon Sep 17 00:00:00 2001 From: Robert Ellison <papillo@tungstengraphics.com> Date: Fri, 19 Sep 2008 01:55:00 -0600 Subject: CELL: add codegen for logic op, color mask - rtasm_ppc_spe.c, rtasm_ppc_spe.h: added a new macro function "spe_load_uint" for loading and splatting unsigned integers in a register; it will use "ila" for values 18 bits or less, "ilh" for word values that are symmetric across halfwords, "ilhu" for values that have zeroes in their bottom halfwords, or "ilhu" followed by "iohl" for general 32-bit values. Of the 15 color masks of interest, 4 are 18 bits or less, 2 are symmetric across halfwords, 3 are zero in the bottom halfword, and 6 require two instructions to load. - cell_gen_fragment.c: added full codegen for logic op and color mask. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 23 +++- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 4 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 143 ++++++++++++++++++++++- 3 files changed, 163 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 12e0826fb9b..f60bfba3f51 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -592,11 +592,32 @@ spe_load_int(struct spe_function *p, unsigned rT, int i) } } +void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) +{ + /* If the whole value is in the lower 18 bits, use ila, which + * doesn't sign-extend. Otherwise, if the two halfwords of + * the constant are identical, use ilh. Otherwise, we have + * to use ilhu followed by iohl. + */ + if ((ui & 0xfffc0000) == ui) { + spe_ila(p, rT, ui); + } + else if ((ui >> 16) == (ui & 0xffff)) { + spe_ilh(p, rT, ui & 0xffff); + } + else { + spe_ilhu(p, rT, ui >> 16); + if (ui & 0xffff) + spe_iohl(p, rT, ui & 0xffff); + } +} + void spe_splat(struct spe_function *p, unsigned rT, unsigned rA) { - spe_ila(p, rT, 66051); + /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ + spe_ila(p, rT, 0x00010203); spe_shufb(p, rT, rA, rA, rT); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 4ef05ea27d1..09400b3fb2a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -302,6 +302,10 @@ spe_load_float(struct spe_function *p, unsigned rT, float x); extern void spe_load_int(struct spe_function *p, unsigned rT, int i); +/** Load/splat immediate unsigned int into rT. */ +extern void +spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); + /** Replicate word 0 of rA across rT. */ extern void spe_splat(struct spe_function *p, unsigned rT, unsigned rA); diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 9d25e820ad9..899d8423b24 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -902,8 +902,69 @@ gen_logicop(const struct pipe_blend_state *blend, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg) { - /* XXX to-do */ - /* operate on 32-bit packed pixels, not float colors */ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. + * */ + ASSERT(blend->logicop_enable); + + switch(blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: /* 0 */ + spe_zero(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOR: /* ~(s | d) */ + spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ + spe_complement(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_INVERT: /* ~d */ + /* Note that (A nor A) == ~(A|A) == ~A */ + spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_XOR: /* s ^ d */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_NAND: /* ~(s & d) */ + spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND: /* s & d */ + spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + spe_complement(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOOP: /* d */ + spe_move(f, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY: /* s */ + break; + case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR: /* s | d */ + spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_SET: /* 1 */ + spe_load_int(f, fragRGBA_reg, 0xffffffff); + break; + default: + ASSERT(0); + } } @@ -912,11 +973,81 @@ gen_colormask(uint colormask, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg) { - /* XXX to-do */ - /* operate on 32-bit packed pixels, not float colors */ -} + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. + * */ + + /* The color mask operation can prevent any set of color + * components in the incoming fragment from being written to the frame + * buffer; we do this by replacing the masked components of the + * fragment with the frame buffer values. + * + * There are only 16 possibilities, with a unique mask for + * each of the possibilities. (Technically, there are only 15 + * possibilities, since we shouldn't be called for the one mask + * that does nothing, but the complete implementation is here + * anyway to avoid confusion.) + * + * We implement this via a constant static array which we'll index + * into to get the correct mask. + * + * We're dependent on the mask values being low-order bits, + * with particular values for each bit; so we start with a + * few assertions, which will fail if any of the values were + * to change. + */ + ASSERT(PIPE_MASK_R == 0x1); + ASSERT(PIPE_MASK_G == 0x2); + ASSERT(PIPE_MASK_B == 0x4); + ASSERT(PIPE_MASK_A == 0x8); + /* Here's the list of all possible colormasks, indexed by the + * value of the combined mask specifier. + */ + static const unsigned int colormasks[16] = { + 0x00000000, /* 0: all colors masked */ + 0xff000000, /* 1: PIPE_MASK_R */ + 0x00ff0000, /* 2: PIPE_MASK_G */ + 0xffff0000, /* 3: PIPE_MASK_R | PIPE_MASK_G */ + 0x0000ff00, /* 4: PIPE_MASK_B */ + 0xff00ff00, /* 5: PIPE_MASK_R | PIPE_MASK_B */ + 0x00ffff00, /* 6: PIPE_MASK_G | PIPE_MASK_B */ + 0xffffff00, /* 7: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B */ + 0x000000ff, /* 8: PIPE_MASK_A */ + 0xff0000ff, /* 9: PIPE_MASK_R | PIPE_MASK_A */ + 0x00ff00ff, /* 10: PIPE_MASK_G | PIPE_MASK_A */ + 0xffff00ff, /* 11: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_A */ + 0x0000ffff, /* 12: PIPE_MASK_B | PIPE_MASK_A */ + 0xff00ffff, /* 13: PIPE_MASK_R | PIPE_MASK_B | PIPE_MASK_A */ + 0x00ffffff, /* 14: PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */ + 0xffffffff /* 15: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */ + }; + + /* Get a temporary register to hold the mask */ + int colormask_reg = spe_allocate_available_register(f); + + /* Look up the desired mask directly and load it into the mask register. + * This will load the same mask into each of the four words in the + * mask register. + */ + spe_load_uint(f, colormask_reg, colormasks[colormask]); + + /* Use the mask register to select between the fragment color + * values and the frame buffer color values. Wherever the + * mask has a 0 bit, the current frame buffer color should override + * the fragment color. Wherever the mask has a 1 bit, the + * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) + * instruction will select bits from its first operand rA wherever the + * the mask bits rM are 0, and from its second operand rB wherever the + * mask bits rM are 1. That means that the frame buffer color is the + * first operand, and the fragment color the second. + */ + spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); + /* Release the temporary register and we're done */ + spe_release_register(f, colormask_reg); +} /** * Generate code to pack a quad of float colors into a four 32-bit integers. @@ -1223,7 +1354,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } - if (blend->colormask != 0xf) { + if (blend->colormask != PIPE_MASK_RGBA) { gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); } -- cgit v1.2.3 From 0838b702750d85b0284a97be211fa379e9f8d8d8 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 19 Sep 2008 09:36:29 -0600 Subject: cell: change spe_complement() to take a src and dst reg, like other instructions --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 14 ++++++++------ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 4 ++-- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 4 ++-- 4 files changed, 14 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index f60bfba3f51..85280f680a4 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -623,9 +623,9 @@ spe_splat(struct spe_function *p, unsigned rT, unsigned rA) void -spe_complement(struct spe_function *p, unsigned rT) +spe_complement(struct spe_function *p, unsigned rT, unsigned rA) { - spe_nor(p, rT, rT, rT); + spe_nor(p, rT, rA, rA); } @@ -667,7 +667,8 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) } } -/* For each 32-bit float element of rA and rB, choose the smaller of the +/** + * For each 32-bit float element of rA and rB, choose the smaller of the * two, compositing them into the rT register. * * The Float Compare Greater Than (fcgt) instruction will put 1s into @@ -683,7 +684,7 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) * like "x = min(x, a)", we always allocate a new register to be safe. */ void -spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB) +spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) { unsigned int compare_reg = spe_allocate_available_register(p); spe_fcgt(p, compare_reg, rA, rB); @@ -691,7 +692,8 @@ spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned spe_release_register(p, compare_reg); } -/* For each 32-bit float element of rA and rB, choose the greater of the +/** + * For each 32-bit float element of rA and rB, choose the greater of the * two, compositing them into the rT register. * * The logic is similar to that of spe_float_min() above; the only @@ -699,7 +701,7 @@ spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned * so that the larger of the two is selected instead of the smaller. */ void -spe_float_max(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB) +spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) { unsigned int compare_reg = spe_allocate_available_register(p); spe_fcgt(p, compare_reg, rA, rB); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 09400b3fb2a..8a0d70fdac5 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -310,9 +310,9 @@ spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); extern void spe_splat(struct spe_function *p, unsigned rT, unsigned rA); -/** Complement/invert all bits in rT. */ +/** rT = complement_all_bits(rA). */ extern void -spe_complement(struct spe_function *p, unsigned rT); +spe_complement(struct spe_function *p, unsigned rT, unsigned rA); /** rT = rA. */ extern void diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 6f2b89c695c..d835aae2552 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -924,7 +924,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) /* tmp = (s1_reg == 0) */ spe_ceqi(gen->f, tmp_reg, s1_reg, 0); /* tmp = !tmp */ - spe_complement(gen->f, tmp_reg); + spe_complement(gen->f, tmp_reg, tmp_reg); /* exec_mask = exec_mask & tmp */ spe_and(gen->f, exec_reg, exec_reg, tmp_reg); @@ -944,7 +944,7 @@ emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_comment(gen->f, -4, "ELSE:"); /* exec_mask = !exec_mask */ - spe_complement(gen->f, exec_reg); + spe_complement(gen->f, exec_reg, exec_reg); return true; } diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 899d8423b24..06a9fa102f9 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -920,7 +920,7 @@ gen_logicop(const struct pipe_blend_state *blend, spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); break; case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ - spe_complement(f, fragRGBA_reg); + spe_complement(f, fragRGBA_reg, fragRGBA_reg); break; case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ /* andc R, A, B computes R = A & ~B */ @@ -941,7 +941,7 @@ gen_logicop(const struct pipe_blend_state *blend, break; case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - spe_complement(f, fragRGBA_reg); + spe_complement(f, fragRGBA_reg, fragRGBA_reg); break; case PIPE_LOGICOP_NOOP: /* d */ spe_move(f, fragRGBA_reg, fbRGBA_reg); -- cgit v1.2.3 From de0a6dc04a5b508472cc0cce4481ac3bb95fda3b Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 19 Sep 2008 10:42:21 -0600 Subject: cell: the test for CELL_DEBUG_FRAGMENT_OP_FALLBACK in cmd_state_fragment_ops() was inverted --- src/gallium/drivers/cell/spu/spu_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index da2cb089722..d99dd12d2a0 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -267,7 +267,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) * final code we'll always use codegen and won't even provide the * raw state records that the fallback code requires. */ - if (spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) { + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; } /* otherwise, the default fallback code remains in place */ -- cgit v1.2.3 From 3c6bb15b7ae1c08b1ddde9e0bfb4796fd68a8a0b Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 19 Sep 2008 10:43:04 -0600 Subject: cell: fix a comment --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 06a9fa102f9..c09d727621c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1050,7 +1050,7 @@ gen_colormask(uint colormask, } /** - * Generate code to pack a quad of float colors into a four 32-bit integers. + * Generate code to pack a quad of float colors into four 32-bit integers. * * \param f SPE function to append instruction onto. * \param color_format the dest color packing format -- cgit v1.2.3 From 0500ae574f4192dd1972baa23e9c62f992042ab9 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 19 Sep 2008 10:50:46 -0600 Subject: cell: issue warning to stderr when using fallback fragment ops --- src/gallium/drivers/cell/spu/spu_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index d99dd12d2a0..6b624175584 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -247,6 +247,8 @@ cmd_release_verts(const struct cell_command_release_verts *release) static void cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { + static int warned = 0; + DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); @@ -270,7 +272,13 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; } - /* otherwise, the default fallback code remains in place */ + else { + /* otherwise, the default fallback code remains in place */ + if (!warned) { + fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); + warned = 1; + } + } spu.read_depth = spu.depth_stencil_alpha.depth.enabled; spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; -- cgit v1.2.3 From 7abf2358d739b126336c4837156816ce03f2b9d6 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 19 Sep 2008 12:52:41 -0600 Subject: cell: flesh out support for other Z/stencil format Also: improve float/int Z conversion. Use clgt instead of cgt in depth test since we're comparing unsigned values. --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 100 +++++++++++++++-------- 1 file changed, 64 insertions(+), 36 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index c09d727621c..1837b4c79bd 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -60,6 +60,9 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) { + /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ + * quantities. This only makes a difference for 32-bit Z values though. + */ ASSERT(dsa->depth.enabled); switch (dsa->depth.func) { @@ -79,28 +82,28 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, case PIPE_FUNC_GREATER: /* zmask = (ifragZ > ref) */ - spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); /* mask = (mask & zmask) */ spe_and(f, mask_reg, mask_reg, zmask_reg); break; case PIPE_FUNC_LESS: /* zmask = (ref > ifragZ) */ - spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); /* mask = (mask & zmask) */ spe_and(f, mask_reg, mask_reg, zmask_reg); break; case PIPE_FUNC_LEQUAL: /* zmask = (ifragZ > ref) */ - spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); /* mask = (mask & ~zmask) */ spe_andc(f, mask_reg, mask_reg, zmask_reg); break; case PIPE_FUNC_GEQUAL: /* zmask = (ref > ifragZ) */ - spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); /* mask = (mask & ~zmask) */ spe_andc(f, mask_reg, mask_reg, zmask_reg); break; @@ -1066,13 +1069,16 @@ gen_pack_colors(struct spe_function *f, int r_reg, int g_reg, int b_reg, int a_reg, int rgba_reg) { + int rg_reg = spe_allocate_available_register(f); + int ba_reg = spe_allocate_available_register(f); + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ spe_cfltu(f, r_reg, r_reg, 32); spe_cfltu(f, g_reg, g_reg, 32); spe_cfltu(f, b_reg, b_reg, 32); spe_cfltu(f, a_reg, a_reg, 32); - /* Shift the most significant bytes to least the significant positions. + /* Shift the most significant bytes to the least significant positions. * I.e.: reg = reg >> 24 */ spe_rotmi(f, r_reg, r_reg, -24); @@ -1104,9 +1110,12 @@ gen_pack_colors(struct spe_function *f, * OR-ing all those together gives us four packed colors: * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} */ - spe_or(f, rgba_reg, r_reg, g_reg); - spe_or(f, rgba_reg, rgba_reg, b_reg); - spe_or(f, rgba_reg, rgba_reg, a_reg); + spe_or(f, rg_reg, r_reg, g_reg); + spe_or(f, ba_reg, a_reg, b_reg); + spe_or(f, rgba_reg, rg_reg, ba_reg); + + spe_release_register(f, rg_reg); + spe_release_register(f, ba_reg); } @@ -1227,33 +1236,49 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_release_register(f, mask_reg); /* OK, fbZ_reg has four 24-bit Z values now */ } + else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + spe_rotmi(f, fbZ_reg, fbZS_reg, -8); /* fbZ = fbZS >> 8 */ + /* OK, fbZ_reg has four 24-bit Z values now */ + } + else if (zs_format == PIPE_FORMAT_Z32_UNORM) { + spe_move(f, fbZ_reg, fbZS_reg); + /* OK, fbZ_reg has four 32-bit Z values now */ + } + else if (zs_format == PIPE_FORMAT_Z16_UNORM) { + spe_move(f, fbZ_reg, fbZS_reg); + /* OK, fbZ_reg has four 16-bit Z values now */ + } else { - /* XXX handle other z/stencil formats */ - ASSERT(0); + ASSERT(0); /* invalid format */ } - /* Convert fragZ values from float[4] to uint[4] */ + /* Convert fragZ values from float[4] to 16, 24 or 32-bit uint[4] */ if (zs_format == PIPE_FORMAT_S8Z24_UNORM || zs_format == PIPE_FORMAT_X8Z24_UNORM || zs_format == PIPE_FORMAT_Z24S8_UNORM || zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* 24-bit Z values */ - int scale_reg = spe_allocate_available_register(f); - - /* scale_reg[0,1,2,3] = float(2^24-1) */ - spe_load_float(f, scale_reg, (float) 0xffffff); - - /* XXX these two instructions might be combined */ - spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */ - - spe_release_register(f, scale_reg); + /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + /* fragZ = fragZ >> 8 */ + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); } - else { - /* XXX handle 16-bit Z format */ - ASSERT(0); + else if (zs_format == PIPE_FORMAT_Z32_UNORM) { + /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + } + else if (zs_format == PIPE_FORMAT_Z16_UNORM) { + /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + /* fragZ = fragZ >> 16 */ + spe_rotmi(f, fragZ_reg, fragZ_reg, -16); } } + else { + /* no Z test, but set Z to zero so we don't OR-in garbage below */ + spe_load_uint(f, fbZ_reg, 0); /* XXX set to zero for now */ + } + if (dsa->stencil[0].enabled) { /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ @@ -1268,7 +1293,10 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) ASSERT(0); } } - + else { + /* no stencil test, but set to zero so we don't OR-in garbage below */ + spe_load_uint(f, fbS_reg, 0); /* XXX set to zero for now */ + } if (dsa->stencil[0].enabled) { /* XXX this may involve depth testing too */ @@ -1296,22 +1324,22 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ } - else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - /* XXX to do */ - ASSERT(0); + else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (zs_format == PIPE_FORMAT_Z32_UNORM) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ } else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - /* XXX to do */ - ASSERT(0); + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ } else if (zs_format == PIPE_FORMAT_S8_UNORM) { - /* XXX to do */ - ASSERT(0); + ASSERT(0); /* XXX to do */ } else { - /* bad zs_format */ - ASSERT(0); + ASSERT(0); /* bad zs_format */ } /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ -- cgit v1.2.3 From e9c05c5b82fdae75a3dccad23203987c277572b0 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Fri, 19 Sep 2008 12:59:36 -0600 Subject: cell: Fixed bugs with DP3 and DP4, they match softpipe results now. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 40 +++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index d835aae2552..a84b565e5c3 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -76,7 +76,7 @@ struct codegen /** Per-instruction temps / intermediate temps */ int num_itemps; - int itemps[4]; + int itemps[10]; /** Current IF/ELSE/ENDIF nesting level */ int if_nesting; @@ -586,9 +586,10 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); } } + + free_itemps(gen); return true; } @@ -625,9 +626,10 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); } } + + free_itemps(gen); return true; } @@ -853,6 +855,38 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit floor. See emit_SGT for comments. + */ +static boolean +emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "FLR:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int zero_reg = get_itemp(gen); + + spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + + /* d = (s1 < 0) ? s2 : s3 */ + spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); + spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + /** * Emit max. See emit_SGT for comments. */ -- cgit v1.2.3 From 1031638c2df825acc06a6180411caa4d9ebd5b31 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Fri, 19 Sep 2008 14:18:39 -0600 Subject: cell: Added FLR instruction. Verified the following instructions match softpipe: MOV, ADD, MUL, SGE, SUB, MAD, ABS, SLT, MIN, MAX, LRP, DP3, DP4, CMP, FLR --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index a84b565e5c3..d2376aa0c22 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -856,7 +856,10 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) } /** - * Emit floor. See emit_SGT for comments. + * Emit floor. + * If negative int subtract one + * Convert float to signed int + * Convert signed int to float */ static boolean emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) @@ -868,16 +871,22 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int zero_reg = get_itemp(gen); - - spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + int tmp_reg = get_itemp(gen); + + /* If negative, subtract 1.0 */ + spe_xor(gen->f, tmp_reg, tmp_reg, tmp_reg); + spe_fcgt(gen->f, d_reg, tmp_reg, s1_reg); + spe_selb(gen->f, tmp_reg, tmp_reg, get_const_one_reg(gen), d_reg); + spe_fs(gen->f, d_reg, s1_reg, tmp_reg); + + /* Convert float to int */ + spe_cflts(gen->f, d_reg, d_reg, 0); + + /* Convert int to float */ + spe_csflt(gen->f, d_reg, d_reg, 0); + - /* d = (s1 < 0) ? s2 : s3 */ - spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); - spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -1100,6 +1109,8 @@ emit_instruction(struct codegen *gen, return emit_MAX(gen, inst); case TGSI_OPCODE_MIN: return emit_MIN(gen, inst); + case TGSI_OPCODE_FLR: + return emit_FLR(gen, inst); case TGSI_OPCODE_END: return emit_END(gen); -- cgit v1.2.3 From 33bef5866c81a7f358c0aa2e37e20443dafb9eb2 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Fri, 19 Sep 2008 15:10:25 -0600 Subject: cell: Added FRC instruction --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 41 ++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index d2376aa0c22..1bc803d5908 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -886,7 +886,45 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Convert int to float */ spe_csflt(gen->f, d_reg, d_reg, 0); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit frac. + * Input - FLR(Input) + */ +static boolean +emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "FLR:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); + /* If negative, subtract 1.0 */ + spe_xor(gen->f, tmp_reg, tmp_reg, tmp_reg); + spe_fcgt(gen->f, d_reg, tmp_reg, s1_reg); + spe_selb(gen->f, tmp_reg, tmp_reg, get_const_one_reg(gen), d_reg); + spe_fs(gen->f, d_reg, s1_reg, tmp_reg); + + /* Convert float to int */ + spe_cflts(gen->f, d_reg, d_reg, 0); + + /* Convert int to float */ + spe_csflt(gen->f, d_reg, d_reg, 0); + + /* d = s1 - FLR(s1) */ + spe_fs(gen->f, d_reg, s1_reg, d_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -896,6 +934,7 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } + /** * Emit max. See emit_SGT for comments. */ @@ -1111,6 +1150,8 @@ emit_instruction(struct codegen *gen, return emit_MIN(gen, inst); case TGSI_OPCODE_FLR: return emit_FLR(gen, inst); + case TGSI_OPCODE_FRC: + return emit_FRC(gen, inst); case TGSI_OPCODE_END: return emit_END(gen); -- cgit v1.2.3 From aca74a4d92ba6f99d756ab703a78efc3918b3840 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 19 Sep 2008 17:55:10 -0600 Subject: cell: make sure the fragment ops and fragment shader code buffer is at a 32-byte boundary To make sure even/odd instructions hit the right pipes. --- src/gallium/drivers/cell/spu/spu_main.c | 4 +++- src/gallium/drivers/cell/spu/spu_main.h | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 6b624175584..b4d30228f7a 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -705,6 +705,8 @@ main(main_param_t speid, main_param_t argp) ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); ASSERT(sizeof(struct cell_command_render) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_ops_code) % 32 == 0); + ASSERT(((unsigned long) &spu.fragment_program_code) % 32 == 0); one_time_init(); @@ -721,7 +723,7 @@ main(main_param_t speid, main_param_t argp) #if 0 if (spu.init.id==0) - spu_test_misc(); + spu_test_misc(spu.init.id); #endif main_loop(); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 2c7b6258402..72e540fcff2 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -143,13 +143,13 @@ struct spu_global ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - /** Current fragment ops machine code */ - uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS]; + /** Current fragment ops machine code, at 32-byte boundary */ + uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN32_ATTRIB; /** Current fragment ops function */ spu_fragment_ops_func fragment_ops; - /** Current fragment program machine code */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; + /** Current fragment program machine code, at 32-byte boundary */ + uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN32_ATTRIB; /** Current fragment ops function */ spu_fragment_program_func fragment_program; -- cgit v1.2.3 From 70071484d95bed8c2c932d1c79e20230bcdbc1dc Mon Sep 17 00:00:00 2001 From: Patrice Mandin <pmandin@caramail.com> Date: Sun, 21 Sep 2008 14:02:30 +0200 Subject: nouveau: add flag for swizzled surface upload --- src/gallium/drivers/nouveau/nouveau_bo.h | 1 + src/gallium/drivers/nv30/nv30_fragtex.c | 34 ++++++++++++++++++-------------- 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_bo.h b/src/gallium/drivers/nouveau/nouveau_bo.h index 65b138283c4..0ed3367815a 100644 --- a/src/gallium/drivers/nouveau/nouveau_bo.h +++ b/src/gallium/drivers/nouveau/nouveau_bo.h @@ -37,6 +37,7 @@ #define NOUVEAU_BO_LOCAL (1 << 9) #define NOUVEAU_BO_TILED (1 << 10) #define NOUVEAU_BO_ZTILE (1 << 11) +#define NOUVEAU_BO_SWIZZLED (1 << 12) #define NOUVEAU_BO_DUMMY (1 << 31) struct nouveau_bo { diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 91246f5ca0a..9e6f746a420 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -26,7 +26,7 @@ static INLINE int log2i(int i) return r; } -#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,swsurf) \ { \ TRUE, \ PIPE_FORMAT_##m, \ @@ -35,6 +35,7 @@ static INLINE int log2i(int i) NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \ + swsurf \ } struct nv30_texture_format { @@ -42,24 +43,25 @@ struct nv30_texture_format { uint pipe; int format; int swizzle; + int swizzled_surface; }; static struct nv30_texture_format nv30_texture_formats[] = { - _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), - _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), - _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), - _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), - _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), - _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), - _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), - _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), -// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), -// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), - _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), - _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), - _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), - _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 1), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 1), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 1), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 1), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 1), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 1), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 1), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 1), +// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0), +// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0), {}, }; @@ -96,6 +98,8 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) if (!tf) assert(0); + tex_flags |= (tf->swizzled_surface ? NOUVEAU_BO_SWIZZLED : 0); + txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); txf |= log2i(pt->width[0]) << 20; -- cgit v1.2.3 From 56c476395ffdff2cfbc0adb9b87e5b308ee3066a Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Mon, 22 Sep 2008 10:54:50 -0600 Subject: cell: Added DPH instruction and verified against softpipe. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 41 ++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 1bc803d5908..f4e651c8ebf 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -633,6 +633,45 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit homogeneous dot product. See emit_ADD for comments. + */ +static boolean +emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "DPH:"); + + int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]); + /* d = x * x */ + spe_fm(gen->f, d_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* d = y * y + d */ + spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* d = z * z + d */ + spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + + s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + /* d = w + d */ + spe_fa(gen->f, d_reg, s2_reg, d_reg); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + } + + free_itemps(gen); + return true; +} + /** * Emit set-if-greater-than. * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as @@ -1124,6 +1163,8 @@ emit_instruction(struct codegen *gen, return emit_DP3(gen, inst); case TGSI_OPCODE_DP4: return emit_DP4(gen, inst); + case TGSI_OPCODE_DPH: + return emit_DPH(gen, inst); case TGSI_OPCODE_RCP: return emit_RCP(gen, inst); case TGSI_OPCODE_RSQ: -- cgit v1.2.3 From 6b3ec9ec2b96e33f975852ee9f4751c6fefe9869 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Mon, 22 Sep 2008 13:13:50 -0600 Subject: cell: Added TRUNC, SWZ (extended) and XPD instructions, verified against softpipe. Optimized FLR and FRC. Fixed writeback logic for DP3, DP4 and DPH. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 201 ++++++++++++++++++++++------- 1 file changed, 156 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index f4e651c8ebf..4b8189207d3 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -181,8 +181,8 @@ get_src_reg(struct codegen *gen, boolean reg_is_itemp = FALSE; uint sign_op; - assert(swizzle >= 0); - assert(swizzle <= 3); + assert(swizzle >= TGSI_SWIZZLE_X); + assert(swizzle <= TGSI_EXTSWIZZLE_ONE); channel = swizzle; @@ -192,12 +192,28 @@ get_src_reg(struct codegen *gen, break; case TGSI_FILE_INPUT: { - /* offset is measured in quadwords, not bytes */ - int offset = src->SrcRegister.Index * 4 + channel; - reg = get_itemp(gen); - reg_is_itemp = TRUE; - /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->inputs_reg, offset); + if(channel == TGSI_EXTSWIZZLE_ONE) + { + /* Load const one float and early out */ + reg = get_const_one_reg(gen); + return reg; + } + else if(channel == TGSI_EXTSWIZZLE_ZERO) + { + /* Load const zero float and early out */ + reg = get_itemp(gen); + spe_xor(gen->f, reg, reg, reg); + return reg; + } + else + { + /* offset is measured in quadwords, not bytes */ + int offset = src->SrcRegister.Index * 4 + channel; + reg = get_itemp(gen); + reg_is_itemp = TRUE; + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->inputs_reg, offset); + } } break; case TGSI_FILE_IMMEDIATE: @@ -355,8 +371,6 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } - - /** * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD * becomes (up to) four SPU "fa" instructions because we're doing SOA @@ -569,23 +583,23 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]); - /* d = x * x */ - spe_fm(gen->f, d_reg, s1_reg, s2_reg); + int tmp_reg = get_itemp(gen); + /* t = x0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - /* d = y * y + d */ - spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + /* t = y0 * y1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - /* d = z * z + d */ - spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + /* t = z0 * z1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -600,32 +614,32 @@ static boolean emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; - spe_comment(gen->f, -4, "DP3:"); + spe_comment(gen->f, -4, "DP4:"); int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]); - /* d = x * x */ - spe_fm(gen->f, d_reg, s1_reg, s2_reg); + int tmp_reg = get_itemp(gen); + /* t = x0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - /* d = y * y + d */ - spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + /* t = y0 * y1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - /* d = z * z + d */ - spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + /* t = z0 * z1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); s1_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); - /* d = w * w + d */ - spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + /* t = w0 * w1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -644,27 +658,28 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]); - /* d = x * x */ - spe_fm(gen->f, d_reg, s1_reg, s2_reg); + int tmp_reg = get_itemp(gen); + + /* t = x0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - /* d = y * y + d */ - spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + /* t = y0 * y1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - /* d = z * z + d */ - spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg); + /* t = z0 * z1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); - /* d = w + d */ - spe_fa(gen->f, d_reg, s2_reg, d_reg); + /* t = w1 + t */ + spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -672,6 +687,62 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit cross product. See emit_ADD for comments. + */ +static boolean +emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + spe_comment(gen->f, -4, "XPD:"); + + int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + int tmp_reg = get_itemp(gen); + + /* t = z0 * y1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = y0 * z1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) { + store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]); + } + + s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = x0 * z1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + /* t = z0 * x1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) { + store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]); + } + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + /* t = y0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* t = x0 * y1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) { + store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return true; +} + /** * Emit set-if-greater-than. * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as @@ -894,6 +965,37 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) return true; } +/** + * Emit trunc. + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "TRUNC:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* Convert float to int */ + spe_cflts(gen->f, d_reg, s1_reg, 0); + + /* Convert int to float */ + spe_csflt(gen->f, d_reg, d_reg, 0); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + /** * Emit floor. * If negative int subtract one @@ -907,6 +1009,9 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_comment(gen->f, -4, "FLR:"); + int zero_reg = get_itemp(gen); + spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -914,9 +1019,8 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) int tmp_reg = get_itemp(gen); /* If negative, subtract 1.0 */ - spe_xor(gen->f, tmp_reg, tmp_reg, tmp_reg); - spe_fcgt(gen->f, d_reg, tmp_reg, s1_reg); - spe_selb(gen->f, tmp_reg, tmp_reg, get_const_one_reg(gen), d_reg); + spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); + spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg); spe_fs(gen->f, d_reg, s1_reg, tmp_reg); /* Convert float to int */ @@ -944,6 +1048,9 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_comment(gen->f, -4, "FLR:"); + int zero_reg = get_itemp(gen); + spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -951,9 +1058,8 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) int tmp_reg = get_itemp(gen); /* If negative, subtract 1.0 */ - spe_xor(gen->f, tmp_reg, tmp_reg, tmp_reg); - spe_fcgt(gen->f, d_reg, tmp_reg, s1_reg); - spe_selb(gen->f, tmp_reg, tmp_reg, get_const_one_reg(gen), d_reg); + spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); + spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg); spe_fs(gen->f, d_reg, s1_reg, tmp_reg); /* Convert float to int */ @@ -1148,6 +1254,7 @@ emit_instruction(struct codegen *gen, { switch (inst->Instruction.Opcode) { case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: return emit_MOV(gen, inst); case TGSI_OPCODE_MUL: return emit_MUL(gen, inst); @@ -1165,6 +1272,8 @@ emit_instruction(struct codegen *gen, return emit_DP4(gen, inst); case TGSI_OPCODE_DPH: return emit_DPH(gen, inst); + case TGSI_OPCODE_XPD: + return emit_XPD(gen, inst); case TGSI_OPCODE_RCP: return emit_RCP(gen, inst); case TGSI_OPCODE_RSQ: @@ -1189,6 +1298,8 @@ emit_instruction(struct codegen *gen, return emit_MAX(gen, inst); case TGSI_OPCODE_MIN: return emit_MIN(gen, inst); + case TGSI_OPCODE_TRUNC: + return emit_TRUNC(gen, inst); case TGSI_OPCODE_FLR: return emit_FLR(gen, inst); case TGSI_OPCODE_FRC: -- cgit v1.2.3 From 6642380841b8cc0d166bf1c6a76be786e1c50825 Mon Sep 17 00:00:00 2001 From: Jonathan White <jwhite@tungstengraphics.com> Date: Mon, 22 Sep 2008 14:33:53 -0600 Subject: cell: Fixed bug with absolute, negate, set-negative logic in source fetch for TGSI instructions. The logic should operate on the origin channel not the swizzled channel. Please enter the commit message for your changes. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 4b8189207d3..8972b5b1ea9 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -184,31 +184,27 @@ get_src_reg(struct codegen *gen, assert(swizzle >= TGSI_SWIZZLE_X); assert(swizzle <= TGSI_EXTSWIZZLE_ONE); - channel = swizzle; - switch (src->SrcRegister.File) { case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[src->SrcRegister.Index][channel]; + reg = gen->temp_regs[src->SrcRegister.Index][swizzle]; break; case TGSI_FILE_INPUT: { - if(channel == TGSI_EXTSWIZZLE_ONE) + if(swizzle == TGSI_EXTSWIZZLE_ONE) { /* Load const one float and early out */ reg = get_const_one_reg(gen); - return reg; } - else if(channel == TGSI_EXTSWIZZLE_ZERO) + else if(swizzle == TGSI_EXTSWIZZLE_ZERO) { /* Load const zero float and early out */ reg = get_itemp(gen); spe_xor(gen->f, reg, reg, reg); - return reg; } else { /* offset is measured in quadwords, not bytes */ - int offset = src->SrcRegister.Index * 4 + channel; + int offset = src->SrcRegister.Index * 4 + swizzle; reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ @@ -217,7 +213,7 @@ get_src_reg(struct codegen *gen, } break; case TGSI_FILE_IMMEDIATE: - reg = gen->imm_regs[src->SrcRegister.Index][channel]; + reg = gen->imm_regs[src->SrcRegister.Index][swizzle]; break; case TGSI_FILE_CONSTANT: /* xxx fall-through for now / fix */ -- cgit v1.2.3 From 1c79cf15c48e51cb5cf790f44214ae6aaf78c69b Mon Sep 17 00:00:00 2001 From: Robert Ellison <papillo@tungstengraphics.com> Date: Tue, 23 Sep 2008 10:11:59 -0600 Subject: CELL: fix colormask code generation The colormask code generation had assumed that its input packed pixels were in RGBA format. In fact, the format they're in is dependent on the pipe color format. Now the color format is passed in to gen_colormask(), and proper color format-dependent SPU code is generated. --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 161 +++++++++++------------ 1 file changed, 78 insertions(+), 83 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 1837b4c79bd..3b166e446d6 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -971,87 +971,6 @@ gen_logicop(const struct pipe_blend_state *blend, } -static void -gen_colormask(uint colormask, - struct spe_function *f, - int fragRGBA_reg, int fbRGBA_reg) -{ - /* We've got four 32-bit RGBA packed pixels in each of - * fragRGBA_reg and fbRGBA_reg, not sets of floating-point - * reds, greens, blues, and alphas. - * */ - - /* The color mask operation can prevent any set of color - * components in the incoming fragment from being written to the frame - * buffer; we do this by replacing the masked components of the - * fragment with the frame buffer values. - * - * There are only 16 possibilities, with a unique mask for - * each of the possibilities. (Technically, there are only 15 - * possibilities, since we shouldn't be called for the one mask - * that does nothing, but the complete implementation is here - * anyway to avoid confusion.) - * - * We implement this via a constant static array which we'll index - * into to get the correct mask. - * - * We're dependent on the mask values being low-order bits, - * with particular values for each bit; so we start with a - * few assertions, which will fail if any of the values were - * to change. - */ - ASSERT(PIPE_MASK_R == 0x1); - ASSERT(PIPE_MASK_G == 0x2); - ASSERT(PIPE_MASK_B == 0x4); - ASSERT(PIPE_MASK_A == 0x8); - - /* Here's the list of all possible colormasks, indexed by the - * value of the combined mask specifier. - */ - static const unsigned int colormasks[16] = { - 0x00000000, /* 0: all colors masked */ - 0xff000000, /* 1: PIPE_MASK_R */ - 0x00ff0000, /* 2: PIPE_MASK_G */ - 0xffff0000, /* 3: PIPE_MASK_R | PIPE_MASK_G */ - 0x0000ff00, /* 4: PIPE_MASK_B */ - 0xff00ff00, /* 5: PIPE_MASK_R | PIPE_MASK_B */ - 0x00ffff00, /* 6: PIPE_MASK_G | PIPE_MASK_B */ - 0xffffff00, /* 7: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B */ - 0x000000ff, /* 8: PIPE_MASK_A */ - 0xff0000ff, /* 9: PIPE_MASK_R | PIPE_MASK_A */ - 0x00ff00ff, /* 10: PIPE_MASK_G | PIPE_MASK_A */ - 0xffff00ff, /* 11: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_A */ - 0x0000ffff, /* 12: PIPE_MASK_B | PIPE_MASK_A */ - 0xff00ffff, /* 13: PIPE_MASK_R | PIPE_MASK_B | PIPE_MASK_A */ - 0x00ffffff, /* 14: PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */ - 0xffffffff /* 15: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */ - }; - - /* Get a temporary register to hold the mask */ - int colormask_reg = spe_allocate_available_register(f); - - /* Look up the desired mask directly and load it into the mask register. - * This will load the same mask into each of the four words in the - * mask register. - */ - spe_load_uint(f, colormask_reg, colormasks[colormask]); - - /* Use the mask register to select between the fragment color - * values and the frame buffer color values. Wherever the - * mask has a 0 bit, the current frame buffer color should override - * the fragment color. Wherever the mask has a 1 bit, the - * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) - * instruction will select bits from its first operand rA wherever the - * the mask bits rM are 0, and from its second operand rB wherever the - * mask bits rM are 1. That means that the frame buffer color is the - * first operand, and the fragment color the second. - */ - spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); - - /* Release the temporary register and we're done */ - spe_release_register(f, colormask_reg); -} - /** * Generate code to pack a quad of float colors into four 32-bit integers. * @@ -1118,8 +1037,85 @@ gen_pack_colors(struct spe_function *f, spe_release_register(f, ba_reg); } +static void +gen_colormask(struct spe_function *f, + uint colormask, + enum pipe_format color_format, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. Further, the pixels + * are packed according to the given color format, not + * necessarily RGBA... + */ + unsigned int r_mask; + unsigned int g_mask; + unsigned int b_mask; + unsigned int a_mask; + + /* Calculate exactly where the bits for any particular color + * end up, so we can mask them correctly. + */ + switch(color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* ARGB */ + a_mask = 0xff000000; + r_mask = 0x00ff0000; + g_mask = 0x0000ff00; + b_mask = 0x000000ff; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* BGRA */ + b_mask = 0xff000000; + g_mask = 0x00ff0000; + r_mask = 0x0000ff00; + a_mask = 0x000000ff; + break; + default: + ASSERT(0); + } + /* For each R, G, B, and A component we're supposed to mask out, + * clear its bits. Then our mask operation later will work + * as expected. + */ + if (!(colormask & PIPE_MASK_R)) { + r_mask = 0; + } + if (!(colormask & PIPE_MASK_G)) { + g_mask = 0; + } + if (!(colormask & PIPE_MASK_B)) { + b_mask = 0; + } + if (!(colormask & PIPE_MASK_A)) { + a_mask = 0; + } + + /* Get a temporary register to hold the mask that will be applied to the fragment */ + int colormask_reg = spe_allocate_available_register(f); + /* The actual mask we're going to use is an OR of the remaining R, G, B, and A + * masks. Load the result value into our temporary register. + */ + spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); + + /* Use the mask register to select between the fragment color + * values and the frame buffer color values. Wherever the + * mask has a 0 bit, the current frame buffer color should override + * the fragment color. Wherever the mask has a 1 bit, the + * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) + * instruction will select bits from its first operand rA wherever the + * the mask bits rM are 0, and from its second operand rB wherever the + * mask bits rM are 1. That means that the frame buffer color is the + * first operand, and the fragment color the second. + */ + spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); + + /* Release the temporary register and we're done */ + spe_release_register(f, colormask_reg); +} /** * Generate SPE code to implement the fragment operations (alpha test, @@ -1383,7 +1379,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) } if (blend->colormask != PIPE_MASK_RGBA) { - gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); + gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); } @@ -1407,7 +1403,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ - spe_release_register(f, fbRGBA_reg); spe_release_register(f, fbZS_reg); spe_release_register(f, quad_offset_reg); -- cgit v1.2.3 From f5127909fb0386c2c11a2c26886eb02808ed514e Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 09:32:09 -0600 Subject: cell: inst reorder to save a cycle --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 3b166e446d6..a353756c711 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1196,8 +1196,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) ASSERT(TILE_SIZE == 32); - spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ + spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ -- cgit v1.2.3 From 164fb1299e1614ce05ae539d832567469eedb402 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 09:38:40 -0600 Subject: cell: checkpoint: support for function calls in SPU shaders Will be used for instructions like SIN/COS/POW/TEX/etc. The PPU needs to know the address of some functions in the SPU address space. Send that info to the PPU/main memory rather than patch up shaders on the SPU side. Not finished/tested yet... --- src/gallium/drivers/cell/common.h | 18 ++++- src/gallium/drivers/cell/ppu/cell_context.h | 1 + src/gallium/drivers/cell/ppu/cell_gen_fp.c | 81 ++++++++++++++++++++- src/gallium/drivers/cell/ppu/cell_spu.c | 8 +++ src/gallium/drivers/cell/spu/Makefile | 3 +- src/gallium/drivers/cell/spu/spu_funcs.c | 106 ++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_funcs.h | 35 +++++++++ src/gallium/drivers/cell/spu/spu_main.c | 5 ++ 8 files changed, 254 insertions(+), 3 deletions(-) create mode 100644 src/gallium/drivers/cell/spu/spu_funcs.c create mode 100644 src/gallium/drivers/cell/spu/spu_funcs.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index f0ff96eb478..99329fd8e22 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -130,7 +130,7 @@ struct cell_command_fragment_ops #define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128 /** - * Command to send a fragment progra to SPUs. + * Command to send a fragment program to SPUs. */ struct cell_command_fragment_program { @@ -267,6 +267,20 @@ struct cell_command } ALIGN16_ATTRIB; +#define MAX_SPU_FUNCTIONS 12 +/** + * Used to tell the PPU about the address of particular functions in the + * SPU's address space. + */ +struct cell_spu_function_info +{ + uint num; + char names[MAX_SPU_FUNCTIONS][16]; + uint addrs[MAX_SPU_FUNCTIONS]; + char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */ +}; + + /** This is the object passed to spe_create_thread() */ struct cell_init_info { @@ -278,6 +292,8 @@ struct cell_init_info /** Buffers for command batches, vertex/index data */ ubyte *buffers[CELL_NUM_BUFFERS]; uint *buffer_status; /**< points at cell_context->buffer_status */ + + struct cell_spu_function_info *spu_functions; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 14914b9c6f8..a9ad84bb184 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -149,6 +149,7 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; + struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; uint num_spus; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 8972b5b1ea9..fd12af19cef 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -37,7 +37,7 @@ * \author Brian Paul */ - +#include <math.h> #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" @@ -64,6 +64,7 @@ */ struct codegen { + struct cell_context *cell; int inputs_reg; /**< 1st function parameter */ int outputs_reg; /**< 2nd function parameter */ int constants_reg; /**< 3rd function parameter */ @@ -1076,6 +1077,76 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) } +#if 0 +static void +print_functions(struct cell_context *cell) +{ + struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i; + for (i = 0; i < funcs->num; i++) { + printf("SPU func %u: %s at %u\n", + i, funcs->names[i], funcs->addrs[i]); + } +} +#endif + + +/** + * Emit code to call a SPU function. + * Used to implement instructions like SIN/COS/POW/TEX/etc. + */ +static boolean +emit_function_call(struct codegen *gen, + const struct tgsi_full_instruction *inst, + char *funcname, uint num_args) +{ + const struct cell_spu_function_info *funcs = &gen->cell->spu_functions; + char comment[100]; + uint addr; + int ch; + + assert(num_args <= 2); + + /* lookup function address */ + { + uint i; + addr = 0; + for (i = 0; i < funcs->num; i++) { + if (strcmp(funcs->names[i], funcname) == 0) { + addr = funcs->addrs[i]; + } + } + assert(addr && "spu function not found"); + } + + sprintf(comment, "CALL %s:", funcname); + spe_comment(gen->f, -4, comment); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int s_regs[3]; + uint a; + for (a = 0; a < num_args; a++) { + s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); + } + + /* XXX not done */ + (void) s_regs; + (void) d_reg; + + spe_bisl(gen->f, SPE_REG_RA, addr, 0, 0); /* XXX untested! */ + + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + + /** * Emit max. See emit_SGT for comments. */ @@ -1303,6 +1374,13 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_END: return emit_END(gen); + case TGSI_OPCODE_COS: + return emit_function_call(gen, inst, "spu_cos", 1); + case TGSI_OPCODE_SIN: + return emit_function_call(gen, inst, "spu_sin", 1); + case TGSI_OPCODE_POW: + return emit_function_call(gen, inst, "spu_pow", 2); + case TGSI_OPCODE_IF: return emit_IF(gen, inst); case TGSI_OPCODE_ELSE: @@ -1431,6 +1509,7 @@ cell_gen_fragment_program(struct cell_context *cell, struct codegen gen; memset(&gen, 0, sizeof(gen)); + gen.cell = cell; gen.f = f; /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 9508227e298..df020c4146d 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -36,6 +36,7 @@ #include "cell_spu.h" #include "pipe/p_format.h" #include "pipe/p_state.h" +#include "util/u_memory.h" #include "cell/common.h" @@ -131,6 +132,11 @@ cell_start_spus(struct cell_context *cell) ASSERT_ALIGN16(&cell_global.inits[0]); ASSERT_ALIGN16(&cell_global.inits[1]); + /* + * Initialize the global 'inits' structure for each SPU. + * A pointer to the init struct will be passed to each SPU. + * The SPUs will then each grab their init info with mfc_get(). + */ for (i = 0; i < cell->num_spus; i++) { cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; @@ -141,6 +147,8 @@ cell_start_spus(struct cell_context *cell) } cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; + cell_global.inits[i].spu_functions = &cell->spu_functions; + cell_global.spe_contexts[i] = spe_context_create(0, NULL); if (!cell_global.spe_contexts[i]) { fprintf(stderr, "spe_context_create() failed\n"); diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 1ae0dfb8c10..c2db85247e0 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -16,8 +16,9 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o SOURCES = \ - spu_main.c \ + spu_funcs.c \ spu_dcache.c \ + spu_main.c \ spu_per_fragment_op.c \ spu_render.c \ spu_texture.c \ diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c new file mode 100644 index 00000000000..d1749565187 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU functions accessed by shaders. + * + * Authors: Brian Paul + */ + + +#include <string.h> +#include <libmisc.h> +#include <cos8_v.h> +#include <sin8_v.h> + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_funcs.h" + + +#define M_PI 3.1415926 + + +static vector float +spu_cos(vector float x) +{ + static const float scale = 1.0 / (2.0 * M_PI); + x = x * spu_splats(scale); /* normalize */ + return _cos8_v(x); +} + +static vector float +spu_sin(vector float x) +{ + static const float scale = 1.0 / (2.0 * M_PI); + x = x * spu_splats(scale); /* normalize */ + return _sin8_v(x); /* 8-bit accuracy enough?? */ +} + + +static void +add_func(struct cell_spu_function_info *spu_functions, + const char *name, void *addr) +{ + uint n = spu_functions->num; + ASSERT(strlen(name) < 16); + strcpy(spu_functions->names[n], name); + spu_functions->addrs[n] = (uint) addr; + spu_functions->num++; +} + + +/** + * Return info about the SPU's function to the PPU / main memory. + * The PPU needs to know the address of some SPU-side functions so + * that we can generate shader code with function calls. + */ +void +return_function_info(void) +{ + struct cell_spu_function_info funcs ALIGN16_ATTRIB; + int tag = TAG_MISC; + + ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ + + funcs.num = 0; + add_func(&funcs, "spu_cos", &spu_cos); + add_func(&funcs, "spu_sin", &spu_sin); + + /* Send the function info back to the PPU / main memory */ + mfc_put((void *) &funcs, /* src in local store */ + (unsigned int) spu.init.spu_functions, /* dst in main memory */ + sizeof(funcs), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << tag); +} + + + diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h new file mode 100644 index 00000000000..3adb6ae99f9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_FUNCS_H +#define SPU_FUNCS_H + +extern void +return_function_info(void); + +#endif + diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index b4d30228f7a..6ef65d5645d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -32,6 +32,7 @@ #include <stdio.h> #include <libmisc.h> +#include "spu_funcs.h" #include "spu_main.h" #include "spu_render.h" #include "spu_per_fragment_op.h" @@ -721,6 +722,10 @@ main(main_param_t speid, main_param_t argp) 0 /* rid */); wait_on_mask( 1 << tag ); + if (spu.init.id == 0) { + return_function_info(); + } + #if 0 if (spu.init.id==0) spu_test_misc(spu.init.id); -- cgit v1.2.3 From 6741739d1e7a2c66576b671a81eaf0c4b9737ec2 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 09:48:17 -0600 Subject: cell: remove unneeded blend/depth_stencil subclasses --- src/gallium/drivers/cell/ppu/cell_context.h | 33 ++---------------- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 5 ++- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 44 +++++------------------- src/gallium/drivers/cell/ppu/cell_state_emit.c | 5 ++- 4 files changed, 15 insertions(+), 72 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index a9ad84bb184..3dc15c9233c 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -66,35 +66,6 @@ struct cell_fragment_shader_state }; -/** - * Cell blend state atom, subclass of pipe_blend_state. - */ -struct cell_blend_state -{ - struct pipe_blend_state base; - - /** - * Generated code to perform alpha blending - */ - struct spe_function code; -}; - - -/** - * Cell depth/stencil/alpha state atom, subclass of - * pipe_depth_stencil_alpha_state. - */ -struct cell_depth_stencil_alpha_state -{ - struct pipe_depth_stencil_alpha_state base; - - /** - * Generated code to perform alpha, stencil, and depth testing on the SPE - */ - struct spe_function code; -}; - - /** * Per-context state, subclass of pipe_context. */ @@ -104,10 +75,10 @@ struct cell_context struct cell_winsys *winsys; - const struct cell_blend_state *blend; + const struct pipe_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; uint num_samplers; - const struct cell_depth_stencil_alpha_state *depth_stencil; + const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; const struct cell_vertex_shader_state *vs; const struct cell_fragment_shader_state *fs; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index a353756c711..653afc235df 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1140,9 +1140,8 @@ gen_colormask(struct spe_function *f, void cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) { - const struct pipe_depth_stencil_alpha_state *dsa = - &cell->depth_stencil->base; - const struct pipe_blend_state *blend = &cell->blend->base; + const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; + const struct pipe_blend_state *blend = cell->blend; const struct pipe_blend_color *blend_color = &cell->blend_color; const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index ea820aca744..b545d2d6975 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -37,7 +37,6 @@ #include "cell_flush.h" #include "cell_state.h" #include "cell_texture.h" -#include "cell_state_per_fragment.h" @@ -45,13 +44,7 @@ static void * cell_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { - struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state)); - - (void) memcpy(cb, blend, sizeof(*blend)); -#if 0 - cell_generate_alpha_blend(cb); -#endif - return cb; + return mem_dup(blend, sizeof(*blend)); } @@ -62,7 +55,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *state) draw_flush(cell->draw); - cell->blend = (struct cell_blend_state *) state; + cell->blend = (struct pipe_blend_state *) state; cell->dirty |= CELL_NEW_BLEND; } @@ -70,12 +63,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *state) static void cell_delete_blend_state(struct pipe_context *pipe, void *blend) { - struct cell_blend_state *cb = (struct cell_blend_state *) blend; - -#if 0 - spe_release_func(& cb->code); -#endif - FREE(cb); + FREE(blend); } @@ -97,43 +85,29 @@ cell_set_blend_color(struct pipe_context *pipe, static void * cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) + const struct pipe_depth_stencil_alpha_state *dsa) { - struct cell_depth_stencil_alpha_state *cdsa = - MALLOC(sizeof(struct cell_depth_stencil_alpha_state)); - - (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil)); -#if 0 - cell_generate_depth_stencil_test(cdsa); -#endif - return cdsa; + return mem_dup(dsa, sizeof(*dsa)); } static void cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, - void *depth_stencil) + void *dsa) { struct cell_context *cell = cell_context(pipe); draw_flush(cell->draw); - cell->depth_stencil = - (struct cell_depth_stencil_alpha_state *) depth_stencil; + cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; cell->dirty |= CELL_NEW_DEPTH_STENCIL; } static void -cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) { - struct cell_depth_stencil_alpha_state *cdsa = - (struct cell_depth_stencil_alpha_state *) depth; - -#if 0 - spe_release_func(& cdsa->code); -#endif - FREE(cdsa); + FREE(dsa); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 8a389cd6aae..f35893537bf 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -30,7 +30,6 @@ #include "cell_gen_fragment.h" #include "cell_state.h" #include "cell_state_emit.h" -#include "cell_state_per_fragment.h" #include "cell_batch.h" #include "cell_texture.h" #include "draw/draw_context.h" @@ -110,8 +109,8 @@ cell_emit_state(struct cell_context *cell) fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; memcpy(&fops->code, spe_code.store, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - fops->dsa = cell->depth_stencil->base; - fops->blend = cell->blend->base; + fops->dsa = *cell->depth_stencil; + fops->blend = *cell->blend; /* free codegen buffer */ spe_release_func(&spe_code); -- cgit v1.2.3 From b5303446a8683afdb3247f2aaf01b6df2cb7d280 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 09:53:03 -0600 Subject: cell: asst clean-up, var renaming --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index b545d2d6975..8c55b8e0933 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -49,13 +49,13 @@ cell_create_blend_state(struct pipe_context *pipe, static void -cell_bind_blend_state(struct pipe_context *pipe, void *state) +cell_bind_blend_state(struct pipe_context *pipe, void *blend) { struct cell_context *cell = cell_context(pipe); draw_flush(cell->draw); - cell->blend = (struct pipe_blend_state *) state; + cell->blend = (struct pipe_blend_state *) blend; cell->dirty |= CELL_NEW_BLEND; } @@ -169,24 +169,23 @@ cell_set_polygon_stipple( struct pipe_context *pipe, static void * cell_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *setup) + const struct pipe_rasterizer_state *rasterizer) { - struct pipe_rasterizer_state *state - = MALLOC(sizeof(struct pipe_rasterizer_state)); - memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); - return state; + return mem_dup(rasterizer, sizeof(*rasterizer)); } static void -cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) +cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) { + struct pipe_rasterizer_state *rasterizer = + (struct pipe_rasterizer_state *) rast; struct cell_context *cell = cell_context(pipe); /* pass-through to draw module */ - draw_set_rasterizer_state(cell->draw, setup); + draw_set_rasterizer_state(cell->draw, rasterizer); - cell->rasterizer = (struct pipe_rasterizer_state *)setup; + cell->rasterizer = rasterizer; cell->dirty |= CELL_NEW_RASTERIZER; } -- cgit v1.2.3 From bac5900a14b85a6513fae7eef19a5ed1d26b2011 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 09:58:17 -0600 Subject: cell: align instruction buffers to 8-byte, not 32-byte boundary --- src/gallium/drivers/cell/spu/spu_main.c | 4 ++-- src/gallium/drivers/cell/spu/spu_main.h | 8 ++++---- src/gallium/include/pipe/p_compiler.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 6ef65d5645d..8f3e3785c17 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -706,8 +706,8 @@ main(main_param_t speid, main_param_t argp) ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); ASSERT(sizeof(struct cell_command_render) % 8 == 0); - ASSERT(((unsigned long) &spu.fragment_ops_code) % 32 == 0); - ASSERT(((unsigned long) &spu.fragment_program_code) % 32 == 0); + ASSERT(((unsigned long) &spu.fragment_ops_code) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); one_time_init(); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 72e540fcff2..29a305232ec 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -143,13 +143,13 @@ struct spu_global ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - /** Current fragment ops machine code, at 32-byte boundary */ - uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN32_ATTRIB; + /** Current fragment ops machine code, at 8-byte boundary */ + uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; /** Current fragment ops function */ spu_fragment_ops_func fragment_ops; - /** Current fragment program machine code, at 32-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN32_ATTRIB; + /** Current fragment program machine code, at 8-byte boundary */ + uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; /** Current fragment ops function */ spu_fragment_program_func fragment_program; diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 1e702c7fa74..7bcebd3d6b6 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -144,12 +144,12 @@ typedef unsigned char boolean; #define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) #define ALIGN16_ASSIGN(NAME) NAME##___aligned #define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) -#define ALIGN32_ATTRIB __attribute__(( aligned( 32 ) )) +#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) #else #define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] #define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) #define ALIGN16_ATTRIB -#define ALIGN32_ATTRIB +#define ALIGN8_ATTRIB #endif -- cgit v1.2.3 From a1189ea882714282b884d37e530cd638dd4ca660 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 10:00:14 -0600 Subject: cell: move really_clear_tiles() --- src/gallium/drivers/cell/spu/spu_main.c | 38 --------------------------------- src/gallium/drivers/cell/spu/spu_tile.c | 37 ++++++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_tile.h | 6 ++++-- 3 files changed, 41 insertions(+), 40 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 8f3e3785c17..b45e79a30b1 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -117,44 +117,6 @@ release_buffer(uint buffer) } -/** - * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled - * tiles back to the main framebuffer. - */ -static void -really_clear_tiles(uint surfaceIndex) -{ - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - - if (surfaceIndex == 0) { - clear_c_tile(&spu.ctile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - } - } - } - else { - clear_z_tile(&spu.ztile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); - } - } - -#if 0 - wait_on_mask(1 << TAG_SURFACE_CLEAR); -#endif -} - - static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c index 216a33126b7..6905015a483 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.c +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -87,3 +87,40 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) 0 /* rid */); } + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +void +really_clear_tiles(uint surfaceIndex) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (surfaceIndex == 0) { + clear_c_tile(&spu.ctile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + } + } + } + else { + clear_z_tile(&spu.ztile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); + } + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h index 1b5491112db..7bfb52be8f3 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -36,12 +36,14 @@ -void +extern void get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); -void +extern void put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); +extern void +really_clear_tiles(uint surfaceIndex); static INLINE void -- cgit v1.2.3 From f45d39fa34ca36839c684fdcadd1476360de3a63 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 10:02:58 -0600 Subject: cell: move debug macros into new spu_debug.h --- src/gallium/drivers/cell/spu/spu_debug.h | 60 ++++++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_main.c | 30 ++-------------- 2 files changed, 63 insertions(+), 27 deletions(-) create mode 100644 src/gallium/drivers/cell/spu/spu_debug.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_debug.h b/src/gallium/drivers/cell/spu/spu_debug.h new file mode 100644 index 00000000000..bbe5889c4b3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_debug.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_DEBUG_H +#define SPU_DEBUG_H + + +/* Set to 0 to disable all extraneous debugging code */ +#define DEBUG 1 + +#if DEBUG +boolean Debug = FALSE; +boolean force_fragment_ops_fallback = TRUE; + +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define DEBUG_PRINTF(format,...) \ + if (Debug) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) + +#else + +#define DEBUG_PRINTF(...) +#define D_PRINTF(...) + +#endif + + +#endif /* SPU_DEBUG_H */ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index b45e79a30b1..ea01728824f 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -32,6 +32,8 @@ #include <stdio.h> #include <libmisc.h> +#include "pipe/p_defines.h" + #include "spu_funcs.h" #include "spu_main.h" #include "spu_render.h" @@ -41,8 +43,8 @@ //#include "spu_test.h" #include "spu_vertex_shader.h" #include "spu_dcache.h" +#include "spu_debug.h" #include "cell/common.h" -#include "pipe/p_defines.h" /* @@ -51,32 +53,6 @@ helpful headers: /opt/cell/sdk/usr/include/libmisc.h */ -/* Set to 0 to disable all extraneous debugging code */ -#define DEBUG 1 - -#if DEBUG -boolean Debug = FALSE; -boolean force_fragment_ops_fallback = TRUE; - -/* These debug macros use the unusual construction ", ##__VA_ARGS__" - * which expands to the expected comma + args if variadic arguments - * are supplied, but swallows the comma if there are no variadic - * arguments (which avoids syntax errors that would otherwise occur). - */ -#define DEBUG_PRINTF(format,...) \ - if (Debug) \ - printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) -#define D_PRINTF(flag, format,...) \ - if (spu.init.debug_flags & (flag)) \ - printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) - -#else - -#define DEBUG_PRINTF(...) -#define D_PRINTF(...) - -#endif - struct spu_global spu; struct spu_vs_context draw; -- cgit v1.2.3 From bb01c1a78eefeea6bc756d837fdd063660ac0230 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 10:10:08 -0600 Subject: cell: move debug-related declarations --- src/gallium/drivers/cell/spu/spu_debug.h | 4 ++-- src/gallium/drivers/cell/spu/spu_main.c | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_debug.h b/src/gallium/drivers/cell/spu/spu_debug.h index bbe5889c4b3..eeec0526558 100644 --- a/src/gallium/drivers/cell/spu/spu_debug.h +++ b/src/gallium/drivers/cell/spu/spu_debug.h @@ -34,8 +34,8 @@ #define DEBUG 1 #if DEBUG -boolean Debug = FALSE; -boolean force_fragment_ops_fallback = TRUE; +extern boolean Debug; +extern boolean force_fragment_ops_fallback; /* These debug macros use the unusual construction ", ##__VA_ARGS__" * which expands to the expected comma + args if variadic arguments diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index ea01728824f..bc94674fe82 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -58,6 +58,12 @@ struct spu_global spu; struct spu_vs_context draw; +#if DEBUG +boolean Debug = FALSE; +boolean force_fragment_ops_fallback = TRUE; +#endif + + /** * Buffers containing dynamically generated SPU code: */ -- cgit v1.2.3 From 9d00cd3fc726a3fe01b98fd222dd4c71b3e95d44 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 10:15:11 -0600 Subject: cell: move command processing code into new spu_command.c file --- src/gallium/drivers/cell/spu/Makefile | 3 +- src/gallium/drivers/cell/spu/spu_command.c | 599 +++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_command.h | 7 + src/gallium/drivers/cell/spu/spu_main.c | 558 +-------------------------- 4 files changed, 611 insertions(+), 556 deletions(-) create mode 100644 src/gallium/drivers/cell/spu/spu_command.c create mode 100644 src/gallium/drivers/cell/spu/spu_command.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index c2db85247e0..116453b79c5 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -16,8 +16,9 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o SOURCES = \ - spu_funcs.c \ + spu_command.c \ spu_dcache.c \ + spu_funcs.c \ spu_main.c \ spu_per_fragment_op.c \ spu_render.c \ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c new file mode 100644 index 00000000000..ec9da5d8870 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -0,0 +1,599 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU command processing code + */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "pipe/p_defines.h" + +#include "spu_command.h" +#include "spu_main.h" +#include "spu_render.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "spu_debug.h" +#include "cell/common.h" + + +struct spu_vs_context draw; + + +/** + * Buffers containing dynamically generated SPU code: + */ +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] + ALIGN16_ATTRIB; + + + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const uint status[4] ALIGN16_ATTRIB + = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; + + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ + DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + + if (clear->surface == 0) { + spu.fb.color_clear_value = clear->value; + if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { + uint x = (spu.init.id << 4) | (spu.init.id << 12) | + (spu.init.id << 20) | (spu.init.id << 28); + spu.fb.color_clear_value ^= x; + } + } + else { + spu.fb.depth_clear_value = clear->value; + } + +#define CLEAR_OPT 1 +#if CLEAR_OPT + + /* Simply set all tiles' status to CLEAR. + * When we actually begin rendering into a tile, we'll initialize it to + * the clear value. If any tiles go untouched during the frame, + * really_clear_tiles() will set them to the clear value. + */ + if (clear->surface == 0) { + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); + } + else { + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); + } + +#else + + /* + * This path clears the whole framebuffer to the clear color right now. + */ + + /* + printf("SPU: %s num=%d w=%d h=%d\n", + __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); + */ + + /* init a single tile to the clear value */ + if (clear->surface == 0) { + clear_c_tile(&spu.ctile); + } + else { + clear_z_tile(&spu.ztile); + } + + /* walk over my tiles, writing the 'clear' tile's data */ + { + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + } + } + + if (spu.init.debug_flags & CELL_DEBUG_SYNC) { + wait_on_mask(1 << TAG_SURFACE_CLEAR); + } + +#endif /* CLEAR_OPT */ + + DEBUG_PRINTF("CLEAR SURF done\n"); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); +} + + +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ + static int warned = 0; + + DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + /* Copy state info (for fallback case only) */ + memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); + memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + + /* Parity twist! For now, always use the fallback code by default, + * only switching to codegen when specifically requested. This + * allows us to develop freely without risking taking down the + * branch. + * + * Later, the parity of this check will be reversed, so that + * codegen is *always* used, unless we specifically indicate that + * we don't want it. + * + * Eventually, the option will be removed completely, because in + * final code we'll always use codegen and won't even provide the + * raw state records that the fallback code requires. + */ + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + } + else { + /* otherwise, the default fallback code remains in place */ + if (!warned) { + fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); + warned = 1; + } + } + + spu.read_depth = spu.depth_stencil_alpha.depth.enabled; + spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; +} + + +static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ + DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_program_code, fp->code, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 + /* Point function pointer at new code */ + spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ + DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + cmd->width, + cmd->height, + cmd->color_start, + cmd->color_format, + cmd->depth_format); + + ASSERT_ALIGN16(cmd->color_start); + ASSERT_ALIGN16(cmd->depth_start); + + spu.fb.color_start = cmd->color_start; + spu.fb.depth_start = cmd->depth_start; + spu.fb.color_format = cmd->color_format; + spu.fb.depth_format = cmd->depth_format; + spu.fb.width = cmd->width; + spu.fb.height = cmd->height; + spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; + spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z32_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0xffffffffu; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0x00ffffffu; + break; + case PIPE_FORMAT_Z16_UNORM: + spu.fb.zsize = 2; + spu.fb.zscale = (float) 0xffffu; + break; + default: + spu.fb.zsize = 0; + break; + } +} + + +static void +cmd_state_sampler(const struct cell_command_sampler *sampler) +{ + DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); + + spu.sampler[sampler->unit] = sampler->state; + if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture[sampler->unit] = sample_texture_bilinear; + else + spu.sample_texture[sampler->unit] = sample_texture_nearest; +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ + const uint unit = texture->unit; + const uint width = texture->width; + const uint height = texture->height; + + DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", + texture->unit, texture->start, + texture->width, texture->height); + + spu.texture[unit].start = texture->start; + spu.texture[unit].width = width; + spu.texture[unit].height = height; + + spu.texture[unit].tiles_per_row = width / TILE_SIZE; + + spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; + spu.texture[unit].tex_size_mask = (vector unsigned int) + { width - 1, height - 1, 0, 0 }; + spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); + spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); + memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_MAX_ATTRIBS); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.size[attr] = vs_info->size; + draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; + draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) +{ + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; +} + + +static void +cmd_finish(void) +{ + DEBUG_PRINTF("FINISH\n"); + really_clear_tiles(0); + /* wait for all outstanding DMAs to finish */ + mfc_write_tag_mask(~0); + mfc_read_tag_status_all(); + /* send mbox message to PPU */ + spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ + const uint buf = (opcode >> 8) & 0xff; + uint size = (opcode >> 16); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); + uint pos; + + DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", + buf, size, spu.init.buffers[buf]); + + ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + size = ROUNDUP16(size); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + mfc_get(buffer, /* dest */ + (unsigned int) spu.init.buffers[buf], /* src */ + size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + /* Tell PPU we're done copying the buffer to local store */ + DEBUG_PRINTF("release batch buf %u\n", buf); + release_buffer(buf); + + /* + * Loop over commands in the batch buffer + */ + for (pos = 0; pos < usize; /* no incr */) { + switch (buffer[pos]) { + /* + * rendering commands + */ + case CELL_CMD_CLEAR_SURFACE: + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) &buffer[pos]; + cmd_clear_surface(clr); + pos += sizeof(*clr) / 8; + } + break; + case CELL_CMD_RENDER: + { + struct cell_command_render *render + = (struct cell_command_render *) &buffer[pos]; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += pos_incr; + } + break; + /* + * state-update commands + */ + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; + } + break; + case CELL_CMD_STATE_FRAGMENT_OPS: + { + struct cell_command_fragment_ops *fops + = (struct cell_command_fragment_ops *) &buffer[pos]; + cmd_state_fragment_ops(fops); + pos += sizeof(*fops) / 8; + } + break; + case CELL_CMD_STATE_FRAGMENT_PROGRAM: + { + struct cell_command_fragment_program *fp + = (struct cell_command_fragment_program *) &buffer[pos]; + cmd_state_fragment_program(fp); + pos += sizeof(*fp) / 8; + } + break; + case CELL_CMD_STATE_SAMPLER: + { + struct cell_command_sampler *sampler + = (struct cell_command_sampler *) &buffer[pos]; + cmd_state_sampler(sampler); + pos += sizeof(*sampler) / 8; + } + break; + case CELL_CMD_STATE_TEXTURE: + { + struct cell_command_texture *texture + = (struct cell_command_texture *) &buffer[pos]; + cmd_state_texture(texture); + pos += sizeof(*texture) / 8; + } + break; + case CELL_CMD_STATE_VERTEX_INFO: + cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); + break; + case CELL_CMD_STATE_UNIFORMS: + draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; + pos += 2; + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); + break; + case CELL_CMD_STATE_BIND_VS: +#if 0 + spu_bind_vertex_shader(&draw, + (struct cell_shader_info *) &buffer[pos+1]); +#endif + pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); + break; + case CELL_CMD_STATE_ATTRIB_FETCH: + cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); + break; + /* + * misc commands + */ + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } + break; + case CELL_CMD_FLUSH_BUFFER_RANGE: { + struct cell_buffer_range *br = (struct cell_buffer_range *) + &buffer[pos+1]; + + spu_dcache_mark_dirty((unsigned) br->base, br->size); + pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8); + break; + } + default: + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); + ASSERT(0); + break; + } + } + + DEBUG_PRINTF("BATCH complete\n"); +} + + + +/** + * Main loop for SPEs: Get a command, execute it, repeat. + */ +void +command_loop(void) +{ + struct cell_command cmd; + int exitFlag = 0; + + DEBUG_PRINTF("Enter command loop\n"); + + ASSERT((sizeof(struct cell_command) & 0xf) == 0); + ASSERT_ALIGN16(&cmd); + + while (!exitFlag) { + unsigned opcode; + int tag = 0; + + DEBUG_PRINTF("Wait for cmd...\n"); + + /* read/wait from mailbox */ + opcode = (unsigned int) spu_read_in_mbox(); + + DEBUG_PRINTF("got cmd 0x%x\n", opcode); + + /* command payload */ + mfc_get(&cmd, /* dest */ + (unsigned int) spu.init.cmd, /* src */ + sizeof(struct cell_command), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + + /* + * NOTE: most commands should be contained in a batch buffer + */ + + switch (opcode & CELL_CMD_OPCODE_MASK) { + case CELL_CMD_EXIT: + DEBUG_PRINTF("EXIT\n"); + exitFlag = 1; + break; + case CELL_CMD_VS_EXECUTE: +#if 0 + spu_execute_vertex_shader(&draw, &cmd.vs); +#endif + break; + case CELL_CMD_BATCH: + cmd_batch(opcode); + break; + default: + printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); + } + + } + + DEBUG_PRINTF("Exit command loop\n"); + + spu_dcache_report(); +} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h new file mode 100644 index 00000000000..853e9aa5498 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.h @@ -0,0 +1,7 @@ + + + +extern void +command_loop(void); + + diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index bc94674fe82..4becd0f92a4 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -35,14 +35,11 @@ #include "pipe/p_defines.h" #include "spu_funcs.h" +#include "spu_command.h" #include "spu_main.h" -#include "spu_render.h" #include "spu_per_fragment_op.h" #include "spu_texture.h" -#include "spu_tile.h" //#include "spu_test.h" -#include "spu_vertex_shader.h" -#include "spu_dcache.h" #include "spu_debug.h" #include "cell/common.h" @@ -55,8 +52,6 @@ helpful headers: struct spu_global spu; -struct spu_vs_context draw; - #if DEBUG boolean Debug = FALSE; @@ -64,554 +59,6 @@ boolean force_fragment_ops_fallback = TRUE; #endif -/** - * Buffers containing dynamically generated SPU code: - */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; - - - -/** - * Tell the PPU that this SPU has finished copying a buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_context->buffer_status[]). - */ -static void -release_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - - const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BUFFERS); - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - -static void -cmd_clear_surface(const struct cell_command_clear_surface *clear) -{ - DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); - - if (clear->surface == 0) { - spu.fb.color_clear_value = clear->value; - if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { - uint x = (spu.init.id << 4) | (spu.init.id << 12) | - (spu.init.id << 20) | (spu.init.id << 28); - spu.fb.color_clear_value ^= x; - } - } - else { - spu.fb.depth_clear_value = clear->value; - } - -#define CLEAR_OPT 1 -#if CLEAR_OPT - - /* Simply set all tiles' status to CLEAR. - * When we actually begin rendering into a tile, we'll initialize it to - * the clear value. If any tiles go untouched during the frame, - * really_clear_tiles() will set them to the clear value. - */ - if (clear->surface == 0) { - memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); - } - else { - memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); - } - -#else - - /* - * This path clears the whole framebuffer to the clear color right now. - */ - - /* - printf("SPU: %s num=%d w=%d h=%d\n", - __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); - */ - - /* init a single tile to the clear value */ - if (clear->surface == 0) { - clear_c_tile(&spu.ctile); - } - else { - clear_z_tile(&spu.ztile); - } - - /* walk over my tiles, writing the 'clear' tile's data */ - { - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (clear->surface == 0) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - else - put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); - } - } - - if (spu.init.debug_flags & CELL_DEBUG_SYNC) { - wait_on_mask(1 << TAG_SURFACE_CLEAR); - } - -#endif /* CLEAR_OPT */ - - DEBUG_PRINTF("CLEAR SURF done\n"); -} - - -static void -cmd_release_verts(const struct cell_command_release_verts *release) -{ - DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); - ASSERT(release->vertex_buf != ~0U); - release_buffer(release->vertex_buf); -} - - -/** - * Process a CELL_CMD_STATE_FRAGMENT_OPS command. - * This involves installing new fragment ops SPU code. - * If this function is never called, we'll use a regular C fallback function - * for fragment processing. - */ -static void -cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) -{ - static int warned = 0; - - DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); - /* Copy state info (for fallback case only) */ - memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); - memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); - - /* Parity twist! For now, always use the fallback code by default, - * only switching to codegen when specifically requested. This - * allows us to develop freely without risking taking down the - * branch. - * - * Later, the parity of this check will be reversed, so that - * codegen is *always* used, unless we specifically indicate that - * we don't want it. - * - * Eventually, the option will be removed completely, because in - * final code we'll always use codegen and won't even provide the - * raw state records that the fallback code requires. - */ - if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { - spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; - } - else { - /* otherwise, the default fallback code remains in place */ - if (!warned) { - fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); - warned = 1; - } - } - - spu.read_depth = spu.depth_stencil_alpha.depth.enabled; - spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; -} - - -static void -cmd_state_fragment_program(const struct cell_command_fragment_program *fp) -{ - DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_program_code, fp->code, - SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); -#if 01 - /* Point function pointer at new code */ - spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; -#endif -} - - -static void -cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) -{ - DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - cmd->width, - cmd->height, - cmd->color_start, - cmd->color_format, - cmd->depth_format); - - ASSERT_ALIGN16(cmd->color_start); - ASSERT_ALIGN16(cmd->depth_start); - - spu.fb.color_start = cmd->color_start; - spu.fb.depth_start = cmd->depth_start; - spu.fb.color_format = cmd->color_format; - spu.fb.depth_format = cmd->depth_format; - spu.fb.width = cmd->width; - spu.fb.height = cmd->height; - spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; - spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; - - switch (spu.fb.depth_format) { - case PIPE_FORMAT_Z32_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0xffffffffu; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0x00ffffffu; - break; - case PIPE_FORMAT_Z16_UNORM: - spu.fb.zsize = 2; - spu.fb.zscale = (float) 0xffffu; - break; - default: - spu.fb.zsize = 0; - break; - } -} - - -static void -cmd_state_sampler(const struct cell_command_sampler *sampler) -{ - DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); - - spu.sampler[sampler->unit] = sampler->state; - if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) - spu.sample_texture[sampler->unit] = sample_texture_bilinear; - else - spu.sample_texture[sampler->unit] = sample_texture_nearest; -} - - -static void -cmd_state_texture(const struct cell_command_texture *texture) -{ - const uint unit = texture->unit; - const uint width = texture->width; - const uint height = texture->height; - - DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", - texture->unit, texture->start, - texture->width, texture->height); - - spu.texture[unit].start = texture->start; - spu.texture[unit].width = width; - spu.texture[unit].height = height; - - spu.texture[unit].tiles_per_row = width / TILE_SIZE; - - spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; - spu.texture[unit].tex_size_mask = (vector unsigned int) - { width - 1, height - 1, 0, 0 }; - spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); - spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); -} - - -static void -cmd_state_vertex_info(const struct vertex_info *vinfo) -{ - DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); - ASSERT(vinfo->num_attribs >= 1); - ASSERT(vinfo->num_attribs <= 8); - memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); -} - - -static void -cmd_state_vs_array_info(const struct cell_array_info *vs_info) -{ - const unsigned attr = vs_info->attr; - - ASSERT(attr < PIPE_MAX_ATTRIBS); - draw.vertex_fetch.src_ptr[attr] = vs_info->base; - draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.size[attr] = vs_info->size; - draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; - draw.vertex_fetch.dirty = 1; -} - - -static void -cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) -{ - mfc_get(attribute_fetch_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - draw.vertex_fetch.code = attribute_fetch_code_buffer; -} - - -static void -cmd_finish(void) -{ - DEBUG_PRINTF("FINISH\n"); - really_clear_tiles(0); - /* wait for all outstanding DMAs to finish */ - mfc_write_tag_mask(~0); - mfc_read_tag_status_all(); - /* send mbox message to PPU */ - spu_write_out_mbox(CELL_CMD_FINISH); -} - - -/** - * Execute a batch of commands which was sent to us by the PPU. - * See the cell_emit_state.c code to see where the commands come from. - * - * The opcode param encodes the location of the buffer and its size. - */ -static void -cmd_batch(uint opcode) -{ - const uint buf = (opcode >> 8) & 0xff; - uint size = (opcode >> 16); - uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; - const unsigned usize = size / sizeof(buffer[0]); - uint pos; - - DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", - buf, size, spu.init.buffers[buf]); - - ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - size = ROUNDUP16(size); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - mfc_get(buffer, /* dest */ - (unsigned int) spu.init.buffers[buf], /* src */ - size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - /* Tell PPU we're done copying the buffer to local store */ - DEBUG_PRINTF("release batch buf %u\n", buf); - release_buffer(buf); - - /* - * Loop over commands in the batch buffer - */ - for (pos = 0; pos < usize; /* no incr */) { - switch (buffer[pos]) { - /* - * rendering commands - */ - case CELL_CMD_CLEAR_SURFACE: - { - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) &buffer[pos]; - cmd_clear_surface(clr); - pos += sizeof(*clr) / 8; - } - break; - case CELL_CMD_RENDER: - { - struct cell_command_render *render - = (struct cell_command_render *) &buffer[pos]; - uint pos_incr; - cmd_render(render, &pos_incr); - pos += pos_incr; - } - break; - /* - * state-update commands - */ - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 8; - } - break; - case CELL_CMD_STATE_FRAGMENT_OPS: - { - struct cell_command_fragment_ops *fops - = (struct cell_command_fragment_ops *) &buffer[pos]; - cmd_state_fragment_ops(fops); - pos += sizeof(*fops) / 8; - } - break; - case CELL_CMD_STATE_FRAGMENT_PROGRAM: - { - struct cell_command_fragment_program *fp - = (struct cell_command_fragment_program *) &buffer[pos]; - cmd_state_fragment_program(fp); - pos += sizeof(*fp) / 8; - } - break; - case CELL_CMD_STATE_SAMPLER: - { - struct cell_command_sampler *sampler - = (struct cell_command_sampler *) &buffer[pos]; - cmd_state_sampler(sampler); - pos += sizeof(*sampler) / 8; - } - break; - case CELL_CMD_STATE_TEXTURE: - { - struct cell_command_texture *texture - = (struct cell_command_texture *) &buffer[pos]; - cmd_state_texture(texture); - pos += sizeof(*texture) / 8; - } - break; - case CELL_CMD_STATE_VERTEX_INFO: - cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); - break; - case CELL_CMD_STATE_VIEWPORT: - (void) memcpy(& draw.viewport, &buffer[pos+1], - sizeof(struct pipe_viewport_state)); - pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); - break; - case CELL_CMD_STATE_UNIFORMS: - draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; - pos += 2; - break; - case CELL_CMD_STATE_VS_ARRAY_INFO: - cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); - break; - case CELL_CMD_STATE_BIND_VS: -#if 0 - spu_bind_vertex_shader(&draw, - (struct cell_shader_info *) &buffer[pos+1]); -#endif - pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); - break; - case CELL_CMD_STATE_ATTRIB_FETCH: - cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); - break; - /* - * misc commands - */ - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; - case CELL_CMD_RELEASE_VERTS: - { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 8; - } - break; - case CELL_CMD_FLUSH_BUFFER_RANGE: { - struct cell_buffer_range *br = (struct cell_buffer_range *) - &buffer[pos+1]; - - spu_dcache_mark_dirty((unsigned) br->base, br->size); - pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8); - break; - } - default: - printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); - ASSERT(0); - break; - } - } - - DEBUG_PRINTF("BATCH complete\n"); -} - - -/** - * Temporary/simple main loop for SPEs: Get a command, execute it, repeat. - */ -static void -main_loop(void) -{ - struct cell_command cmd; - int exitFlag = 0; - - DEBUG_PRINTF("Enter main loop\n"); - - ASSERT((sizeof(struct cell_command) & 0xf) == 0); - ASSERT_ALIGN16(&cmd); - - while (!exitFlag) { - unsigned opcode; - int tag = 0; - - DEBUG_PRINTF("Wait for cmd...\n"); - - /* read/wait from mailbox */ - opcode = (unsigned int) spu_read_in_mbox(); - - DEBUG_PRINTF("got cmd 0x%x\n", opcode); - - /* command payload */ - mfc_get(&cmd, /* dest */ - (unsigned int) spu.init.cmd, /* src */ - sizeof(struct cell_command), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - - /* - * NOTE: most commands should be contained in a batch buffer - */ - - switch (opcode & CELL_CMD_OPCODE_MASK) { - case CELL_CMD_EXIT: - DEBUG_PRINTF("EXIT\n"); - exitFlag = 1; - break; - case CELL_CMD_VS_EXECUTE: -#if 0 - spu_execute_vertex_shader(&draw, &cmd.vs); -#endif - break; - case CELL_CMD_BATCH: - cmd_batch(opcode); - break; - default: - printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); - } - - } - - DEBUG_PRINTF("Exit main loop\n"); - - spu_dcache_report(); -} - - - static void one_time_init(void) { @@ -658,6 +105,7 @@ main(main_param_t speid, main_param_t argp) DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); + /* get initialization data */ mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ sizeof(struct cell_init_info), /* bytes */ @@ -675,7 +123,7 @@ main(main_param_t speid, main_param_t argp) spu_test_misc(spu.init.id); #endif - main_loop(); + command_loop(); return 0; } -- cgit v1.2.3 From 55b65d3b42b8ba1ea1c5b5549b4629f3b20e7a97 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 17:57:01 -0600 Subject: cell: stub-out sin/cos function bodies to avoid trashing caller's stack for now --- src/gallium/drivers/cell/spu/spu_funcs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index d1749565187..b57ad3f3b81 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -49,17 +49,27 @@ static vector float spu_cos(vector float x) { +#if 0 static const float scale = 1.0 / (2.0 * M_PI); x = x * spu_splats(scale); /* normalize */ return _cos8_v(x); +#else + /* just pass-through to avoid trashing caller's stack */ + return x; +#endif } static vector float spu_sin(vector float x) { +#if 0 static const float scale = 1.0 / (2.0 * M_PI); x = x * spu_splats(scale); /* normalize */ return _sin8_v(x); /* 8-bit accuracy enough?? */ +#else + /* just pass-through to avoid trashing caller's stack */ + return x; +#endif } -- cgit v1.2.3 From fe1c9872ae258b78f195c1885ddfc29d07d17cf6 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 26 Sep 2008 17:59:19 -0600 Subject: cell: checkpoint: more work in emit_function_call() Simple function call works now, but we don't save/restore the caller's registers yet. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 45 ++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index fd12af19cef..8d2d4f2a0f2 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1105,7 +1105,10 @@ emit_function_call(struct codegen *gen, uint addr; int ch; - assert(num_args <= 2); + /* XXX temporary value */ + const int frameSize = 64; /* stack frame (activation record) size */ + + assert(num_args <= 3); /* lookup function address */ { @@ -1119,7 +1122,9 @@ emit_function_call(struct codegen *gen, assert(addr && "spu function not found"); } - sprintf(comment, "CALL %s:", funcname); + addr /= 4; /* discard 2 least significant bits */ + + snprintf(comment, sizeof(comment), "CALL %s:", funcname); spe_comment(gen->f, -4, comment); for (ch = 0; ch < 4; ch++) { @@ -1131,12 +1136,40 @@ emit_function_call(struct codegen *gen, s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); } - /* XXX not done */ - (void) s_regs; - (void) d_reg; + /* Basically: + * save registers on stack + * move parameters to registers 3, 4, 5... + * call function + * save return value (reg 3) + * restore registers from stack + */ + + /* XXX hack: load first function param */ + spe_move(gen->f, 3, s_regs[0]); + + /* save $lr on stack # stqd $lr,16($sp) */ + spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + /* save stack pointer # stqd $sp,-frameSize($sp) */ + spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize); + + /* XXX save registers to stack here */ + + /* adjust stack pointer # ai $sp,$sp,-frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize); + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* restore stack pointer # ai $sp,$sp,frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, frameSize); + + /* XXX restore registers from stack here */ - spe_bisl(gen->f, SPE_REG_RA, addr, 0, 0); /* XXX untested! */ + /* restore $lr # lqd $lr,16($sp) */ + spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + /* XXX hack: save function's return value */ + spe_move(gen->f, d_reg, 3); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); -- cgit v1.2.3 From afaa53040bd01ca86762e7d7b1a5a65810767921 Mon Sep 17 00:00:00 2001 From: Robert Ellison <papillo@tungstengraphics.com> Date: Fri, 3 Oct 2008 18:00:43 -0600 Subject: CELL: changes to generate SPU code for stenciling This set of code changes are for stencil code generation support. Both one-sided and two-sided stenciling are supported. In addition to the raw code generation changes, these changes had to be made elsewhere in the system: - Added new "register set" feature to the SPE assembly generation. A "register set" is a way to allocate multiple registers and free them all at the same time, delegating register allocation management to the spe_function unit. It's quite useful in complex register allocation schemes (like stenciling). - Added and improved SPE macro calculations. These are operations between registers and unsigned integer immediates. In many cases, the calculation can be performed with a single instruction; the macros will generate the single instruction if possible, or generate a register load and register-to-register operation if not. These macro functions are: spe_load_uint() (which has new ways to load a value in a single instruction), spe_and_uint(), spe_xor_uint(), spe_compare_equal_uint(), and spe_compare_greater_uint(). - Added facing to fragment generation. While rendering, the rasterizer needs to be able to determine front- and back-facing fragments, in order to correctly apply two-sided stencil. That requires these changes: - Added front_winding field to the cell_command_render block, so that the state tracker could communicate to the rasterizer what it considered to be the front-facing direction. - Added fragment facing as an input to the fragment function. - Calculated facing is passed during emit_quad(). --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 246 +++++- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 41 +- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 881 ++++++++++++++++++--- src/gallium/drivers/cell/ppu/cell_render.c | 1 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 1 + src/gallium/drivers/cell/spu/spu_main.h | 3 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 19 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 3 +- src/gallium/drivers/cell/spu/spu_render.c | 4 +- src/gallium/drivers/cell/spu/spu_tri.c | 35 +- src/gallium/drivers/cell/spu/spu_tri.h | 2 +- 12 files changed, 1091 insertions(+), 146 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 491141f1908..8a87e9abb1d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -359,14 +359,21 @@ void _name (struct spe_function *p, int imm) \ */ void spe_init_func(struct spe_function *p, unsigned code_size) { + register unsigned int i; + p->store = align_malloc(code_size, 16); p->num_inst = 0; p->max_inst = code_size / SPE_INST_SIZE; + p->set_count = 0; + memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. */ - p->regs[0] = ~7; - p->regs[1] = (1U << (80 - 64)) - 1; + p->regs[0] = p->regs[1] = p->regs[2] = 1; + for (i = 80; i <= 127; i++) { + p->regs[i] = 1; + } p->print = false; p->indent = 0; @@ -398,12 +405,8 @@ int spe_allocate_available_register(struct spe_function *p) { unsigned i; for (i = 0; i < SPE_NUM_REGS; i++) { - const uint64_t mask = (1ULL << (i % 64)); - const unsigned idx = i / 64; - - assert(idx < 2); - if ((p->regs[idx] & mask) != 0) { - p->regs[idx] &= ~mask; + if (p->regs[i] == 0) { + p->regs[i] = 1; return i; } } @@ -417,31 +420,68 @@ int spe_allocate_available_register(struct spe_function *p) */ int spe_allocate_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) != 0); - - p->regs[idx] &= ~(1ULL << bit); + assert(p->regs[reg] == 0); + p->regs[reg] = 1; return reg; } /** - * Mark the given SPE register as "unallocated". + * Mark the given SPE register as "unallocated". Note that this should + * only be used on registers allocated in the current register set; an + * assertion will fail if an attempt is made to deallocate a register + * allocated in an earlier register set. */ void spe_release_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; + assert(reg < SPE_NUM_REGS); + assert(p->regs[reg] == 1); - assert(idx < 2); + p->regs[reg] = 0; +} - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) == 0); +/** + * Start a new set of registers. This can be called if + * it will be difficult later to determine exactly what + * registers were actually allocated during a code generation + * sequence, and you really just want to deallocate all of them. + */ +void spe_allocate_register_set(struct spe_function *p) +{ + register unsigned int i; + + /* Keep track of the set count. If it ever wraps around to 0, + * we're in trouble. + */ + p->set_count++; + assert(p->set_count > 0); + + /* Increment the allocation count of all registers currently + * allocated. Then any registers that are allocated in this set + * will be the only ones with a count of 1; they'll all be released + * when the register set is released. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) p->regs[i]++; + } +} + +void spe_release_register_set(struct spe_function *p) +{ + unsigned int i; + + /* If the set count drops below zero, we're in trouble. */ + assert(p->set_count > 0); + p->set_count--; - p->regs[idx] |= (1ULL << bit); + /* Drop the allocation level of all registers. Any allocated + * during this register set will drop to 0 and then become + * available. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) p->regs[i]--; + } } @@ -603,8 +643,10 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) { /* If the whole value is in the lower 18 bits, use ila, which * doesn't sign-extend. Otherwise, if the two halfwords of - * the constant are identical, use ilh. Otherwise, we have - * to use ilhu followed by iohl. + * the constant are identical, use ilh. Otherwise, if every byte of + * the desired value is 0x00 or 0xff, we can use Form Select Mask for + * Bytes Immediate (fsmbi) to load the value in a single instruction. + * Otherwise, in the general case, we have to use ilhu followed by iohl. */ if ((ui & 0xfffc0000) == ui) { spe_ila(p, rT, ui); @@ -612,13 +654,171 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) else if ((ui >> 16) == (ui & 0xffff)) { spe_ilh(p, rT, ui & 0xffff); } + else if ( + ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && + ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && + ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && + ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) + ) { + unsigned int mask = 0; + /* fsmbi duplicates each bit in the given mask eight times, + * using a 16-bit value to initialize a 16-byte quadword. + * Each 4-bit nybble of the mask corresponds to a full word + * of the result; look at the value and figure out the mask + * (replicated for each word in the quadword), and then + * form the "select mask" to get the value. + */ + if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; + if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; + if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; + if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; + spe_fsmbi(p, rT, mask); + } else { + /* The general case: this usually uses two instructions, but + * may use only one if the low-order 16 bits of each word are 0. + */ spe_ilhu(p, rT, ui >> 16); if (ui & 0xffff) spe_iohl(p, rT, ui & 0xffff); } } +/* This function is constructed identically to spe_sor_uint() below. + * Changes to one should be made in the other. + */ +void spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If we can, emit a single instruction, either And Byte Immediate + * (which uses the same constant across each byte), And Halfword Immediate + * (which sign-extends a 10-bit immediate to 16 bits and uses that + * across each halfword), or And Word Immediate (which sign-extends + * a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + register unsigned int tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use And Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_andi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use And Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_andhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the And Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_andbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_and(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +/* This function is constructed identically to spe_and_uint() above. + * Changes to one should be made in the other. + */ +void spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If we can, emit a single instruction, either Exclusive Or Byte + * Immediate (which uses the same constant across each byte), Exclusive + * Or Halfword Immediate (which sign-extends a 10-bit immediate to + * 16 bits and uses that across each halfword), or Exclusive Or Word + * Immediate (which sign-extends a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + register unsigned int tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use Exclusive Or Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_xori(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use Exclusive Or Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_xorhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the Exclusive Or Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_xorbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_xor(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +void +spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If the comparison value is 9 bits or less, it fits inside a + * Compare Equal Word Immediate instruction. + */ + if ((ui & 0x000001ff) == ui) { + spe_ceqi(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_ceq(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} + +void +spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If the comparison value is 10 bits or less, it fits inside a + * Compare Logical Greater Than Word Immediate instruction. + */ + if ((ui & 0x000003ff) == ui) { + spe_clgti(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_clgt(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} void spe_splat(struct spe_function *p, unsigned rT, unsigned rA) diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 61c7edeb604..cd2e2454097 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -53,17 +53,26 @@ struct spe_function uint num_inst; uint max_inst; - /** - * Mask of used / unused registers - * - * Each set bit corresponds to an available register. Each cleared bit - * corresponds to an allocated register. + /** + * The "set count" reflects the number of nested register sets + * are allowed. In the unlikely case that we exceed the set count, + * register allocation will start to be confused, which is critical + * enough that we check for it. + */ + unsigned char set_count; + + /** + * Flags for used and unused registers. Each byte corresponds to a + * register; a 0 in that byte means that the register is available. + * A value of 1 means that the register was allocated in the current + * register set. Any other value N means that the register was allocated + * N register sets ago. * * \sa * spe_allocate_register, spe_allocate_available_register, - * spe_release_register + * spe_allocate_register_set, spe_release_register_set, spe_release_register, */ - uint64_t regs[SPE_NUM_REGS / 64]; + unsigned char regs[SPE_NUM_REGS]; boolean print; /**< print/dump instructions as they're emitted? */ int indent; /**< number of spaces to indent */ @@ -77,6 +86,8 @@ extern unsigned spe_code_size(const struct spe_function *p); extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); extern void spe_release_register(struct spe_function *p, int reg); +extern void spe_allocate_register_set(struct spe_function *p); +extern void spe_release_register_set(struct spe_function *p); extern void spe_print_code(struct spe_function *p, boolean enable); extern void spe_indent(struct spe_function *p, int spaces); @@ -307,6 +318,22 @@ spe_load_int(struct spe_function *p, unsigned rT, int i); extern void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); +/** And immediate value into rT. */ +extern void +spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Xor immediate value into rT. */ +extern void +spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Compare equal with immediate value. */ +extern void +spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Compare greater with immediate value. */ +extern void +spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + /** Replicate word 0 of rA across rT. */ extern void spe_splat(struct spe_function *p, unsigned rT, unsigned rA); diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 99329fd8e22..c223bc17449 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -227,6 +227,7 @@ struct cell_command_render float xmin, ymin, xmax, ymax; /* XXX another dummy field */ uint min_index; boolean inline_verts; + uint front_winding; /* the rasterizer needs to be able to determine facing to apply front/back-facing stencil */ }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 653afc235df..f920ae13b42 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -54,10 +54,12 @@ * \param ifragZ_reg register containing integer fragment Z values (in) * \param ifbZ_reg register containing integer frame buffer Z values (in/out) * \param zmask_reg register containing result of Z test/comparison (out) + * + * Returns true if the Z-buffer needs to be updated. */ -static void -gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, +static boolean +gen_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) { /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ @@ -132,7 +134,10 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; */ spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + return true; } + + return false; } @@ -238,22 +243,34 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, * it and have to allocate and load it again unnecessarily. */ static inline void -setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +setup_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int *r) { if (*is_already_set) return; *r = spe_allocate_available_register(f); - spe_load_float(f, *r, value); - *is_already_set = true; } static inline void -release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +release_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int r) { if (!*is_already_set) return; spe_release_register(f, r); *is_already_set = false; } +static inline void +setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +{ + if (*is_already_set) return; + setup_optional_register(f, is_already_set, r); + spe_load_float(f, *r, value); +} + +static inline void +release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +{ + release_optional_register(f, is_already_set, r); +} + /** * Generate SPE code to implement the given blend mode for a quad of pixels. * \param f SPE function to append instruction onto. @@ -1117,6 +1134,633 @@ gen_colormask(struct spe_function *f, spe_release_register(f, colormask_reg); } +/* This function is annoyingly similar to gen_depth_test(), above, except + * that instead of comparing two varying values (i.e. fragment and buffer), + * we're comparing a varying value with a static value. As such, we have + * access to the Compare Immediate instructions where we don't in + * gen_depth_test(), which is what makes us very different. + * + * The return value in the stencil_pass_reg is a bitmask of valid + * fragments that also passed the stencil test. The bitmask of valid + * fragments that failed would be found in (mask_reg & ~stencil_pass_reg). + */ +static void +gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state, + unsigned int mask_reg, unsigned int fbS_reg, + unsigned int stencil_pass_reg) +{ + /* Generate code that puts the set of passing fragments into the stencil_pass_reg + * register, taking into account whether each fragment was active to begin with. + */ + switch (state->func) { + case PIPE_FUNC_EQUAL: + /* stencil_pass = mask & (s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + /* stencil_fail = mask & ~stencil_pass */ + break; + + case PIPE_FUNC_NOTEQUAL: + /* stencil_pass = mask & ~(s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_GREATER: + /* stencil_pass = mask & (s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_LESS: { + /* stencil_pass = mask & (reference > s) */ + /* There's no convenient Compare Less Than Immediate instruction, so + * we'll have to do this one the harder way, by loading a register and + * comparing directly. Compare Logical Greater Than Word (clgt) + * treats its operands as unsigned - no sign extension. + */ + unsigned int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + break; + } + + case PIPE_FUNC_LEQUAL: + /* stencil_pass = mask & (s <= reference) = mask & ~(s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_GEQUAL: { + /* stencil_pass = mask & (s >= reference) = mask & ~(reference > s) */ + /* As above, we have to do this by loading a register */ + unsigned int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + break; + } + + case PIPE_FUNC_NEVER: + /* stencil_pass = mask & 0 = 0 */ + spe_load_uint(f, stencil_pass_reg, 0); + spe_move(f, stencil_pass_reg, mask_reg); /* zmask = mask */ + break; + + case PIPE_FUNC_ALWAYS: + /* stencil_pass = mask & 1 = mask */ + spe_move(f, stencil_pass_reg, mask_reg); + break; + } + + /* The fragments that passed the stencil test are now in stencil_pass_reg. + * The fragments that failed would be (mask_reg & ~stencil_pass_reg). + */ +} + +/* This function generates code that calculates a set of new stencil values + * given the earlier values and the operation to apply. It does not + * apply any tests. It is intended to be called up to 3 times + * (for the stencil fail operation, for the stencil pass-z fail operation, + * and for the stencil pass-z pass operation) to collect up to three + * possible sets of values, and for the caller to combine them based + * on the result of the tests. + * + * stencil_max_value should be (2^n - 1) where n is the number of bits + * in the stencil buffer - in other words, it should be usable as a mask. + */ +static void +gen_stencil_values(struct spe_function *f, unsigned int stencil_op, + unsigned int stencil_ref_value, unsigned int stencil_max_value, + unsigned int fbS_reg, unsigned int newS_reg) +{ + /* The code below assumes that newS_reg and fbS_reg are not the same + * register; if they can be, the calculations below will have to use + * an additional temporary register. For now, mark the assumption + * with an assertion that will fail if they are the same. + */ + ASSERT(fbS_reg != newS_reg); + + /* The code also assumes the the stencil_max_value is of the form + * 2^n-1 and can therefore be used as a mask for the valid bits in + * addition to a maximum. Make sure this is the case as well. + * The clever math below exploits the fact that incrementing a + * binary number serves to flip all the bits of a number starting at + * the LSB and continuing to (and including) the first zero bit + * found. That means that a number and its increment will always + * have at least one bit in common (the high order bit, if nothing + * else) *unless* the number is zero, *or* the number is of a form + * consisting of some number of 1s in the low-order bits followed + * by nothing but 0s in the high-order bits. The latter case + * implies it's of the form 2^n-1. + */ + ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); + + switch(stencil_op) { + case PIPE_STENCIL_OP_KEEP: + /* newS = S */ + spe_move(f, newS_reg, fbS_reg); + break; + + case PIPE_STENCIL_OP_ZERO: + /* newS = 0 */ + spe_zero(f, newS_reg); + break; + + case PIPE_STENCIL_OP_REPLACE: + /* newS = stencil reference value */ + spe_load_uint(f, newS_reg, stencil_ref_value); + break; + + case PIPE_STENCIL_OP_INCR: { + /* newS = (s == max ? max : s + 1) */ + unsigned int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); + /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ + spe_ai(f, newS_reg, fbS_reg, 1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_DECR: { + /* newS = (s == 0 ? 0 : s - 1) */ + unsigned int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); + /* Add Word Immediate with a (-1) value works */ + spe_ai(f, newS_reg, fbS_reg, -1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_INCR_WRAP: + /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can + * do a normal add and mask off the correct bits + */ + spe_ai(f, newS_reg, fbS_reg, 1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_DECR_WRAP: + /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ + spe_ai(f, newS_reg, fbS_reg, -1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_INVERT: + /* newS = ~s. We take advantage of the mask/max value to invert only + * the valid bits for the field so we don't have to do an extra "and". + */ + spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); + break; + + default: + ASSERT(0); + } +} + + +/* This function generates code to get all the necessary possible + * stencil values. For each of the output registers (fail_reg, + * zfail_reg, and zpass_reg), it either allocates a new register + * and calculates a new set of values based on the stencil operation, + * or it reuses a register allocation and calculation done for an + * earlier (matching) operation, or it reuses the fbS_reg register + * (if the stencil operation is KEEP, which doesn't change the + * stencil buffer). + * + * Since this function allocates a variable number of registers, + * to avoid incurring complex logic to free them, they should + * be allocated after a spe_allocate_register_set() call + * and released by the corresponding spe_release_register_set() call. + */ +static void +gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa, + unsigned int fbS_reg, + unsigned int *fail_reg, unsigned int *zfail_reg, + unsigned int *zpass_reg, unsigned int *back_fail_reg, + unsigned int *back_zfail_reg, unsigned int *back_zpass_reg) +{ + unsigned zfail_op, back_zfail_op; + + /* Stenciling had better be enabled here */ + ASSERT(dsa->stencil[0].enabled); + + /* If the depth test is not enabled, it is treated as though it always + * passes. In particular, that means that the "zfail_op" (and the backfacing + * counterpart, if active) are not considered - a failing stencil test will + * trigger the "fail_op", and a passing stencil test will trigger the + * "zpass_op". + * + * By overriding the operations in this case to be PIPE_STENCIL_OP_KEEP, + * we keep them from being calculated. + */ + if (dsa->depth.enabled) { + zfail_op = dsa->stencil[0].zfail_op; + back_zfail_op = dsa->stencil[1].zfail_op; + } + else { + zfail_op = PIPE_STENCIL_OP_KEEP; + back_zfail_op = PIPE_STENCIL_OP_KEEP; + } + + /* One-sided or front-facing stencil */ + if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP) { + *fail_reg = fbS_reg; + } + else { + *fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].fail_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *fail_reg); + } + + if (zfail_op == PIPE_STENCIL_OP_KEEP) { + *zfail_reg = fbS_reg; + } + else if (zfail_op == dsa->stencil[0].fail_op) { + *zfail_reg = *fail_reg; + } + else { + *zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].zfail_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *zfail_reg); + } + + if (dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP) { + *zpass_reg = fbS_reg; + } + else if (dsa->stencil[0].zpass_op == dsa->stencil[0].fail_op) { + *zpass_reg = *fail_reg; + } + else if (dsa->stencil[0].zpass_op == zfail_op) { + *zpass_reg = *zfail_reg; + } + else { + *zpass_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].zpass_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *zpass_reg); + } + + /* If two-sided stencil is enabled, we have more work to do. */ + if (!dsa->stencil[1].enabled) { + /* This just flags that the registers need not be deallocated later */ + *back_fail_reg = fbS_reg; + *back_zfail_reg = fbS_reg; + *back_zpass_reg = fbS_reg; + } + else { + /* Same calculations as above, but for the back stencil */ + if (dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP) { + *back_fail_reg = fbS_reg; + } + else if (dsa->stencil[1].fail_op == dsa->stencil[0].fail_op) { + *back_fail_reg = *fail_reg; + } + else if (dsa->stencil[1].fail_op == zfail_op) { + *back_fail_reg = *zfail_reg; + } + else if (dsa->stencil[1].fail_op == dsa->stencil[0].zpass_op) { + *back_fail_reg = *zpass_reg; + } + else { + *back_fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].fail_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_fail_reg); + } + + if (back_zfail_op == PIPE_STENCIL_OP_KEEP) { + *back_zfail_reg = fbS_reg; + } + else if (back_zfail_op == dsa->stencil[0].fail_op) { + *back_zfail_reg = *fail_reg; + } + else if (back_zfail_op == zfail_op) { + *back_zfail_reg = *zfail_reg; + } + else if (back_zfail_op == dsa->stencil[0].zpass_op) { + *back_zfail_reg = *zpass_reg; + } + else if (back_zfail_op == dsa->stencil[1].fail_op) { + *back_zfail_reg = *back_fail_reg; + } + else { + *back_zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].zfail_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_zfail_reg); + } + + if (dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) { + *back_zpass_reg = fbS_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[0].fail_op) { + *back_zpass_reg = *fail_reg; + } + else if (dsa->stencil[1].zpass_op == zfail_op) { + *back_zpass_reg = *zfail_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[0].zpass_op) { + *back_zpass_reg = *zpass_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[1].fail_op) { + *back_zpass_reg = *back_fail_reg; + } + else if (dsa->stencil[1].zpass_op == back_zfail_op) { + *back_zpass_reg = *back_zfail_reg; + } + else { + *back_zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].zpass_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_zpass_reg); + } + } /* End of calculations for back-facing stencil */ +} + +static boolean +gen_stencil_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, + const int const facing_reg, + const int mask_reg, const int fragZ_reg, + const int fbZ_reg, const int fbS_reg) +{ + /* True if we've generated code that could require writeback to the + * depth and/or stencil buffers + */ + boolean modified_buffers = false; + + boolean need_to_calculate_stencil_values; + boolean need_to_writemask_stencil_values; + + /* Registers. We may or may not actually allocate these, depending + * on whether the state values indicate that we need them. + */ + unsigned int stencil_pass_reg, stencil_fail_reg; + unsigned int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; + unsigned int stencil_writemask_reg; + unsigned int zmask_reg; + unsigned int newS_reg; + + /* Stenciling is quite complex: up to six different configurable stencil + * operations/calculations can be required (three each for front-facing + * and back-facing fragments). Many of those operations will likely + * be identical, so there's good reason to try to avoid calculating + * the same values more than once (which unfortunately makes the code less + * straightforward). + * + * To make register management easier, we start a new + * register set; we can release all the registers in the set at + * once, and avoid having to keep track of exactly which registers + * we allocate. We can still allocate and free registers as + * desired (if we know we no longer need a register), but we don't + * have to spend the complexity to track the more difficult variant + * register usage scenarios. + */ + spe_allocate_register_set(f); + + /* Calculate the writemask. If the writemask is trivial (either + * all 0s, meaning that we don't need to calculate any stencil values + * because they're not going to change the stencil anyway, or all 1s, + * meaning that we have to calculate the stencil values but do not + * need to mask them), we can avoid generating code. Don't forget + * that we need to consider backfacing stencil, if enabled. + */ + if (dsa->stencil[0].write_mask == 0x0 && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + /* Trivial: don't need to calculate stencil values, and don't need to + * write them back to the framebuffer. + */ + need_to_calculate_stencil_values = false; + need_to_writemask_stencil_values = false; + } + else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + /* Still trivial, but a little less so. We need to write the stencil + * values, but we don't need to mask them. + */ + need_to_calculate_stencil_values = true; + need_to_writemask_stencil_values = false; + } + else { + /* The general case: calculate, mask, and write */ + need_to_calculate_stencil_values = true; + need_to_writemask_stencil_values = true; + + /* While we're here, generate code that calculates what the + * writemask should be. If backface stenciling is enabled, + * and the backface writemask is not the same as the frontface + * writemask, we'll have to generate code that merges the + * two masks into a single effective mask based on fragment facing. + */ + stencil_writemask_reg = spe_allocate_available_register(f); + spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask); + if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) { + unsigned int back_write_mask_reg = spe_allocate_available_register(f); + spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask); + spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg); + spe_release_register(f, back_write_mask_reg); + } + } + + /* At least one-sided stenciling must be on. Generate code that + * runs the stencil test on the basic/front-facing stencil, leaving + * the mask of passing stencil bits in stencil_pass_reg. This mask will + * be used both to mask the set of active pixels, and also to + * determine how the stencil buffer changes. + * + * This test will *not* change the value in mask_reg (because we don't + * yet know whether to apply the two-sided stencil or one-sided stencil). + */ + stencil_pass_reg = spe_allocate_available_register(f); + gen_stencil_test(f, &dsa->stencil[0], mask_reg, fbS_reg, stencil_pass_reg); + + /* If two-sided stenciling is on, generate code to run the stencil + * test on the backfacing stencil as well, and combine the two results + * into the one correct result based on facing. + */ + if (dsa->stencil[1].enabled) { + unsigned int temp_reg = spe_allocate_available_register(f); + gen_stencil_test(f, &dsa->stencil[1], mask_reg, fbS_reg, temp_reg); + spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg); + spe_release_register(f, temp_reg); + } + + /* Generate code that, given the mask of valid fragments and the + * mask of valid fragments that passed the stencil test, computes + * the mask of valid fragments that failed the stencil test. We + * have to do this before we run a depth test (because the + * depth test should not be performed on fragments that failed the + * stencil test, and because the depth test will update the + * mask of valid fragments based on the results of the depth test). + */ + stencil_fail_reg = spe_allocate_available_register(f); + spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); + /* Now remove the stenciled-out pixels from the valid fragment mask, + * so we can later use the valid fragment mask in the depth test. + */ + spe_and(f, mask_reg, mask_reg, stencil_pass_reg); + + /* We may not need to calculate stencil values, if the writemask is off */ + if (need_to_calculate_stencil_values) { + unsigned int back_stencil_fail_values, back_stencil_pass_depth_fail_values, back_stencil_pass_depth_pass_values; + unsigned int front_stencil_fail_values, front_stencil_pass_depth_fail_values, front_stencil_pass_depth_pass_values; + + /* Generate code that calculates exactly which stencil values we need, + * without calculating the same value twice (say, if two different + * stencil ops have the same value). This code will work for one-sided + * and two-sided stenciling (so that we take into account that operations + * may match between front and back stencils), and will also take into + * account whether the depth test is enabled (if the depth test is off, + * we don't need any of the zfail results, because the depth test always + * is considered to pass if it is disabled). Any register value that + * does not need to be calculated will come back with the same value + * that's in fbS_reg. + * + * This function will allocate a variant number of registers that + * will be released as part of the register set. + */ + gen_get_stencil_values(f, dsa, fbS_reg, + &front_stencil_fail_values, &front_stencil_pass_depth_fail_values, + &front_stencil_pass_depth_pass_values, &back_stencil_fail_values, + &back_stencil_pass_depth_fail_values, &back_stencil_pass_depth_pass_values); + + /* Tricky, tricky, tricky - the things we do to create optimal + * code... + * + * The various stencil values registers may overlap with each other + * and with fbS_reg arbitrarily (as any particular operation is + * only calculated once and stored in one register, no matter + * how many times it is used). So we can't change the values + * within those registers directly - if we change a value in a + * register that's being referenced by two different calculations, + * we've just unwittingly changed the second value as well... + * + * Avoid this by allocating new registers to hold the results + * (there may be 2, if the depth test is off, or 3, if it is on). + * These will be released as part of the register set. + */ + if (!dsa->stencil[1].enabled) { + /* The easy case: if two-sided stenciling is *not* enabled, we + * just use the front-sided values. + */ + stencil_fail_values = front_stencil_fail_values; + stencil_pass_depth_fail_values = front_stencil_pass_depth_fail_values; + stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values; + } + else { /* two-sided stencil enabled */ + /* Allocate new registers for the needed merged values */ + stencil_fail_values = spe_allocate_available_register(f); + spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg); + if (dsa->depth.enabled) { + stencil_pass_depth_fail_values = spe_allocate_available_register(f); + spe_selb(f, stencil_pass_depth_fail_values, front_stencil_pass_depth_fail_values, back_stencil_pass_depth_fail_values, facing_reg); + } + else { + stencil_pass_depth_fail_values = fbS_reg; + } + stencil_pass_depth_pass_values = spe_allocate_available_register(f); + spe_selb(f, stencil_pass_depth_pass_values, front_stencil_pass_depth_pass_values, back_stencil_pass_depth_pass_values, facing_reg); + } + } + + /* We now have all the stencil values we need. We also need + * the results of the depth test to figure out which + * stencil values will become the new stencil values. (Even if + * we aren't actually calculating stencil values, we need to apply + * the depth test if it's enabled.) + * + * The code generated by gen_depth_test() returns the results of the + * test in the given register, but also alters the mask_reg based + * on the results of the test. + */ + if (dsa->depth.enabled) { + zmask_reg = spe_allocate_available_register(f); + modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + } + + if (need_to_calculate_stencil_values) { + /* If we need to writemask the stencil values before going into + * the stencil buffer, we'll have to use a new register to + * hold the new values. If not, we can just keep using the + * current register. + */ + if (need_to_writemask_stencil_values) { + newS_reg = spe_allocate_available_register(f); + spe_move(f, newS_reg, fbS_reg); + modified_buffers = true; + } + else { + newS_reg = fbS_reg; + } + + /* Merge in the selected stencil fail values */ + if (stencil_fail_values != fbS_reg) { + spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); + } + + /* Same for the stencil pass/depth fail values. If this calculation + * is not needed (say, if depth test is off), then the + * stencil_pass_depth_fail_values register will be equal to fbS_reg + * and we'll skip the calculation. + */ + if (stencil_pass_depth_fail_values != fbS_reg) { + /* We don't actually have a stencil pass/depth fail mask yet. + * Calculate it here from the stencil passing mask and the + * depth passing mask. Note that zmask_reg *must* have been + * set above if we're here. + */ + unsigned int stencil_pass_depth_fail_mask = spe_allocate_available_register(f); + spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, stencil_pass_depth_fail_mask); + + spe_release_register(f, stencil_pass_depth_fail_mask); + } + + /* Same for the stencil pass/depth pass mask */ + if (stencil_pass_depth_pass_values != fbS_reg) { + unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f); + spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); + spe_release_register(f, stencil_pass_depth_pass_mask); + } + + /* Almost done. If we need to writemask, do it now, leaving the + * results in the fbS_reg register passed in. If we don't need + * to writemask, then the results are *already* in the fbS_reg, + * so there's nothing more to do. + */ + + if (need_to_writemask_stencil_values) { + /* The Select Bytes command makes a fine writemask. Where + * the mask is 0, the first (original) values are retained, + * effectively masking out changes. Where the mask is 1, the + * second (new) values are retained, incorporating changes. + */ + spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); + } + } /* done calculating stencil values */ + + /* The stencil and/or depth values have been applied, and the + * mask_reg, fbS_reg, and fbZ_reg values have been updated. + * We're all done, except that we've allocated a fair number + * of registers that we didn't bother tracking. Release all + * those registers as part of the register set, and go home. + */ + spe_release_register_set(f); + + /* Return true if we could have modified the stencil and/or + * depth buffers. + */ + return modified_buffers; +} + + /** * Generate SPE code to implement the fragment operations (alpha test, * depth test, stencil test, blending, colormask, and final @@ -1156,6 +1800,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) const int fragB_reg = 10; /* vector float */ const int fragA_reg = 11; /* vector float */ const int mask_reg = 12; /* vector uint */ + const int facing_reg = 13; /* uint */ /* offset of quad from start of tile * XXX assuming 4-byte pixels for color AND Z/stencil!!!! @@ -1183,6 +1828,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_allocate_register(f, fragB_reg); spe_allocate_register(f, fragA_reg); spe_allocate_register(f, mask_reg); + spe_allocate_register(f, facing_reg); quad_offset_reg = spe_allocate_available_register(f); fbRGBA_reg = spe_allocate_available_register(f); @@ -1195,6 +1841,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) ASSERT(TILE_SIZE == 32); + spe_comment(f, 0, "Computing tile location in memory"); spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ @@ -1205,124 +1852,164 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_release_register(f, y2_reg); } - if (dsa->alpha.enabled) { gen_alpha_test(dsa, f, mask_reg, fragA_reg); } + /* If we need the stencil buffers (because one- or two-sided stencil is + * enabled) or the depth buffer (because the depth test is enabled), + * go grab them. Note that if either one- or two-sided stencil is + * enabled, dsa->stencil[0].enabled will be true. + */ if (dsa->depth.enabled || dsa->stencil[0].enabled) { const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; boolean write_depth_stencil; - int fbZ_reg = spe_allocate_available_register(f); /* Z values */ - int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ + /* We may or may not need to allocate a register for Z or stencil values */ + boolean fbS_reg_set = false, fbZ_reg_set = false; + unsigned int fbS_reg, fbZ_reg = 0; + + spe_comment(f, 0, "Loading Z/stencil tile"); /* fetch quad of depth/stencil values from tile at (x,y) */ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + /* XXX Not sure this is allowed if we've only got a 16-bit Z buffer... */ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - if (dsa->depth.enabled) { - /* Extract Z bits from fbZS_reg into fbZ_reg */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - int mask_reg = spe_allocate_available_register(f); - spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ - spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ - spe_release_register(f, mask_reg); - /* OK, fbZ_reg has four 24-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - spe_rotmi(f, fbZ_reg, fbZS_reg, -8); /* fbZ = fbZS >> 8 */ - /* OK, fbZ_reg has four 24-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - spe_move(f, fbZ_reg, fbZS_reg); - /* OK, fbZ_reg has four 32-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - spe_move(f, fbZ_reg, fbZS_reg); - /* OK, fbZ_reg has four 16-bit Z values now */ - } - else { - ASSERT(0); /* invalid format */ - } - - /* Convert fragZ values from float[4] to 16, 24 or 32-bit uint[4] */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM || - zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - /* fragZ = fragZ >> 8 */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - /* fragZ = fragZ >> 16 */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -16); - } - } - else { - /* no Z test, but set Z to zero so we don't OR-in garbage below */ - spe_load_uint(f, fbZ_reg, 0); /* XXX set to zero for now */ + /* From the Z/stencil buffer format, pull out the bits we need for + * Z and/or stencil. We'll also convert the incoming fragment Z + * value in fragZ_reg from a floating point value in [0.0..1.0] to + * an unsigned integer value with the appropriate resolution. + */ + switch(zs_format) { + + case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ + case PIPE_FORMAT_X8Z24_UNORM: + if (dsa->depth.enabled) { + /* We need the Z part at least */ + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* four 24-bit Z values in the low-order bits */ + spe_and_uint(f, fbZ_reg, fbZS_reg, 0x00ffffff); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 24-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + } + if (dsa->stencil[0].enabled) { + setup_optional_register(f, &fbS_reg_set, &fbS_reg); + /* four 8-bit Z values in the high-order bits */ + spe_rotmi(f, fbS_reg, fbZS_reg, -24); + } + break; + + case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: + if (dsa->depth.enabled) { + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* shift by 8 to get the upper 24-bit values */ + spe_rotmi(f, fbS_reg, fbZS_reg, -8); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 24-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + } + if (dsa->stencil[0].enabled) { + setup_optional_register(f, &fbS_reg_set, &fbS_reg); + /* 8-bit stencil in the low-order bits - mask them out */ + spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); + } + break; + + case PIPE_FORMAT_Z32_UNORM: + if (dsa->depth.enabled) { + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 32-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + } + /* No stencil, so can't do anything there */ + break; + + case PIPE_FORMAT_Z16_UNORM: + if (dsa->depth.enabled) { + /* XXX Not sure this is correct, but it was here before, so we're + * going with it for now + */ + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 16-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -16); + } + /* No stencil */ + break; + + default: + ASSERT(0); /* invalid format */ } - + /* If stencil is enabled, use the stencil-specific code + * generator to generate both the stencil and depth (if needed) + * tests. Otherwise, if only depth is enabled, generate + * a quick depth test. The test generators themselves will + * report back whether the depth/stencil buffer has to be + * written back. + */ if (dsa->stencil[0].enabled) { - /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - /* XXX extract with a shift */ - ASSERT(0); - } - else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* XXX extract with a mask */ - ASSERT(0); - } - } - else { - /* no stencil test, but set to zero so we don't OR-in garbage below */ - spe_load_uint(f, fbS_reg, 0); /* XXX set to zero for now */ - } + /* This will perform the stencil and depth tests, and update + * the mask_reg, fbZ_reg, and fbS_reg as required by the + * tests. + */ + ASSERT(fbS_reg_set); + ASSERT(fbZ_reg_set); + spe_comment(f, 0, "Perform stencil test"); - if (dsa->stencil[0].enabled) { - /* XXX this may involve depth testing too */ - // gen_stencil_test(dsa, f, ... ); - ASSERT(0); + write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg); } else if (dsa->depth.enabled) { int zmask_reg = spe_allocate_available_register(f); - gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + spe_comment(f, 0, "Perform depth test"); + write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); spe_release_register(f, zmask_reg); } - - /* do we need to write Z and/or Stencil back into framebuffer? */ - write_depth_stencil = (dsa->depth.writemask | - dsa->stencil[0].write_mask | - dsa->stencil[1].write_mask); + else { + write_depth_stencil = false; + } if (write_depth_stencil) { /* Merge latest Z and Stencil values into fbZS_reg. * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. * fbS_reg has four 8-bit Z values in bits [7..0]. */ + spe_comment(f, 0, "Storing depth/stencil values"); if (zs_format == PIPE_FORMAT_S8Z24_UNORM || zs_format == PIPE_FORMAT_X8Z24_UNORM) { - spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + if (fbS_reg_set) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else { + spe_move(f, fbZS_reg, fbZ_reg); + } } else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || zs_format == PIPE_FORMAT_Z24X8_UNORM) { spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + if (fbS_reg_set) { + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } } else if (zs_format == PIPE_FORMAT_Z32_UNORM) { spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ @@ -1341,11 +2028,10 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); } - spe_release_register(f, fbZ_reg); - spe_release_register(f, fbS_reg); + release_optional_register(f, &fbZ_reg_set, fbZ_reg); + release_optional_register(f, &fbS_reg_set, fbS_reg); } - /* Get framebuffer quad/colors. We'll need these for blending, * color masking, and to obey the quad/pixel mask. * Load: fbRGBA_reg = memory[color_tile + quad_offset] @@ -1354,8 +2040,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) */ spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); - if (blend->blend_enable) { + spe_comment(f, 0, "Perform blending"); gen_blend(blend, blend_color, f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } @@ -1369,19 +2055,21 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int rgba_reg = spe_allocate_available_register(f); /* Pack four float colors as four 32-bit int colors */ + spe_comment(f, 0, "Convert fragment colors to framebuffer colors"); gen_pack_colors(f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, rgba_reg); if (blend->logicop_enable) { + spe_comment(f, 0, "Compute logic op"); gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } if (blend->colormask != PIPE_MASK_RGBA) { + spe_comment(f, 0, "Compute color mask"); gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); } - /* Mix fragment colors with framebuffer colors using the quad/pixel mask: * if (mask[i]) * rgba[i] = rgba[i]; @@ -1393,6 +2081,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) /* Store updated quad in tile: * memory[color_tile + quad_offset] = rgba_reg; */ + spe_comment(f, 0, "Store framebuffer colors"); spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); spe_release_register(f, rgba_reg); diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index dd25ae880e5..79cb8df82fa 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -152,6 +152,7 @@ cell_flush_prim_buffer(struct cell_context *cell) struct cell_command_render *render = &cell_global.command[i].render; render->prim_type = PIPE_PRIM_TRIANGLES; render->num_verts = cell->prim_buffer.num_verts; + render->front_winding = cell->rasterizer->front_winding; render->vertex_size = cell->vertex_info->size * 4; render->xmin = cell->prim_buffer.xmin; render->ymin = cell->prim_buffer.ymin; diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index aa63435b934..578ddf62dcb 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -214,6 +214,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; + render->front_winding = cell->rasterizer->front_winding; render->num_indexes = nr_indices; render->min_index = min_index; diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 29a305232ec..1cd577c23ca 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -73,7 +73,8 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, vector float fragGreen, vector float fragBlue, vector float fragAlpha, - vector unsigned int mask); + vector unsigned int mask, + uint facing); /** Function for running fragment program */ typedef void (*spu_fragment_program_func)(vector float *inputs, diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index f107764fb28..d252fa6dc18 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -57,7 +57,8 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragG, vector float fragB, vector float fragA, - vector unsigned int mask) + vector unsigned int mask, + uint facing) { vector float frag_aos[4]; unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ @@ -433,23 +434,23 @@ spu_fallback_fragment_ops(uint x, uint y, /* Form bitmask depending on color buffer format and colormask bits */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - if (spu.blend.colormask & (1<<0)) + if (spu.blend.colormask & PIPE_MASK_R) cmask |= 0x00ff0000; /* red */ - if (spu.blend.colormask & (1<<1)) + if (spu.blend.colormask & PIPE_MASK_G) cmask |= 0x0000ff00; /* green */ - if (spu.blend.colormask & (1<<2)) + if (spu.blend.colormask & PIPE_MASK_B) cmask |= 0x000000ff; /* blue */ - if (spu.blend.colormask & (1<<3)) + if (spu.blend.colormask & PIPE_MASK_A) cmask |= 0xff000000; /* alpha */ break; case PIPE_FORMAT_B8G8R8A8_UNORM: - if (spu.blend.colormask & (1<<0)) + if (spu.blend.colormask & PIPE_MASK_R) cmask |= 0x0000ff00; /* red */ - if (spu.blend.colormask & (1<<1)) + if (spu.blend.colormask & PIPE_MASK_G) cmask |= 0x00ff0000; /* green */ - if (spu.blend.colormask & (1<<2)) + if (spu.blend.colormask & PIPE_MASK_B) cmask |= 0xff000000; /* blue */ - if (spu.blend.colormask & (1<<3)) + if (spu.blend.colormask & PIPE_MASK_A) cmask |= 0x000000ff; /* alpha */ break; default: diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h index f817abf0463..a61689c83ab 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -38,7 +38,8 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragGreen, vector float fragBlue, vector float fragAlpha, - vector unsigned int mask); + vector unsigned int mask, + uint facing); #endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 305dc988810..82dbeb26b76 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -279,7 +279,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) v1 = (const float *) (vertices + indexes[j+1] * vertex_size); v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - drawn += tri_draw(v0, v1, v2, tx, ty); + drawn += tri_draw(v0, v1, v2, tx, ty, render->front_winding); } //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); @@ -297,5 +297,3 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) printf("SPU %u: RENDER done\n", spu.init.id); } - - diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 0a8fb56a62c..6039cd80b24 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -118,6 +118,8 @@ struct setup_stage { float oneoverarea; + uint facing; + uint tx, ty; int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; @@ -274,7 +276,7 @@ eval_z(float x, float y) * overall. */ static INLINE void -emit_quad( int x, int y, mask_t mask ) +emit_quad( int x, int y, mask_t mask) { /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { @@ -344,7 +346,8 @@ emit_quad( int x, int y, mask_t mask ) fragZ, soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], - mask); + mask, + setup.facing); } } @@ -379,7 +382,8 @@ emit_quad( int x, int y, mask_t mask ) outputs[0*4+1], outputs[0*4+2], outputs[0*4+3], - mask); + mask, + setup.facing); } } } @@ -483,7 +487,7 @@ static void flush_spans( void ) */ for (x = block(minleft); x <= block(maxright); x += 2) { #if 1 - emit_quad( x, setup.span.y, calculate_mask( x ) ); + emit_quad( x, setup.span.y, calculate_mask( x )); #endif } @@ -902,13 +906,28 @@ static void subtriangle( struct edge *eleft, eright->sy += lines; } +static float +determinant( const float *v0, + const float *v1, + const float *v2 ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0] - v2[0]; + const float ey = v0[1] - v2[1]; + const float fx = v1[0] - v2[0]; + const float fy = v1[1] - v2[1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + /** * Draw triangle into tile at (tx, ty) (tile coords) * The tile data should have already been fetched. */ boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding) { setup.tx = tx; setup.ty = ty; @@ -919,6 +938,12 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) setup.cliprect_maxx = (tx + 1) * TILE_SIZE; setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + /* Before we sort vertices, determine the facing of the triangle, + * which will be needed for front/back-face stencil application + */ + float det = determinant(v0, v1, v2); + setup.facing = (det > 0.0) ^ (front_winding == PIPE_WINDING_CW); + if (!setup_sort_vertices((struct vertex_header *) v0, (struct vertex_header *) v1, (struct vertex_header *) v2)) { diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h index aa694dd7c93..abc3d35160e 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -31,7 +31,7 @@ extern boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding); #endif /* SPU_TRI_H */ -- cgit v1.2.3 From 53d4706c6c0922160f310834daaec5718ff1c511 Mon Sep 17 00:00:00 2001 From: Keith Whitwell <keith@tungstengraphics.com> Date: Wed, 10 Sep 2008 11:39:43 +0100 Subject: make draw's vertex_info struct smaller/quicker to compare with memcmp() --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 4 +- src/gallium/auxiliary/draw/draw_pt_emit.c | 4 +- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 4 +- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 6 +-- src/gallium/auxiliary/draw/draw_vertex.c | 6 +-- src/gallium/auxiliary/draw/draw_vertex.h | 44 ++++++++++++++++++---- src/gallium/drivers/i915simple/i915_prim_emit.c | 4 +- .../drivers/i915simple/i915_state_derived.c | 4 +- src/gallium/drivers/softpipe/sp_setup.c | 12 +++--- 9 files changed, 59 insertions(+), 29 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index c0cf4269dbb..9825e116c32 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -231,9 +231,9 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) unsigned emit_sz = 0; unsigned src_buffer = 0; unsigned output_format; - unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) ); + unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) ); - switch (vbuf->vinfo->emit[i]) { + switch (vbuf->vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d4eca80588b..d520b05869b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -84,11 +84,11 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned emit_sz = 0; unsigned src_buffer = 0; unsigned output_format; - unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) ); + unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) ); - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 5a4db6cfe56..3966ad48ba7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -121,7 +121,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, memset(&key, 0, sizeof(key)); for (i = 0; i < vinfo->num_attribs; i++) { - const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]]; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->attrib[i].src_index]; unsigned emit_sz = 0; unsigned input_format = src->src_format; @@ -129,7 +129,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned input_offset = src->src_offset; unsigned output_format; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index a0e08dd10aa..f7e6a1a8eeb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -133,7 +133,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, for (i = 0; i < vinfo->num_attribs; i++) { unsigned emit_sz = 0; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: emit_sz = 4 * sizeof(float); break; @@ -161,8 +161,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, * numbers, not to positions in the hw vertex description -- * that's handled by the output_offset field. */ - fse->key.element[i].out.format = vinfo->emit[i]; - fse->key.element[i].out.vs_output = vinfo->src_index[i]; + fse->key.element[i].out.format = vinfo->attrib[i].emit; + fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index; fse->key.element[i].out.offset = dst_offset; dst_offset += emit_sz; diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 1446f785c51..3214213e445 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -49,7 +49,7 @@ draw_compute_vertex_size(struct vertex_info *vinfo) vinfo->size = 0; for (i = 0; i < vinfo->num_attribs; i++) { - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_OMIT: break; case EMIT_4UB: @@ -81,8 +81,8 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) unsigned i, j; for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { + j = vinfo->attrib[i].src_index; + switch (vinfo->attrib[i].emit) { case EMIT_OMIT: debug_printf("EMIT_OMIT:"); break; diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 16c65c43175..dca61581288 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -75,12 +75,41 @@ struct vertex_info { uint num_attribs; uint hwfmt[4]; /**< hardware format info for this format */ - enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS]; - enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS]; /**< EMIT_x */ - uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */ uint size; /**< total vertex size in dwords */ + + /* Keep this small and at the end of the struct to allow quick + * memcmp() comparisons. + */ + struct { + ubyte interp_mode:4; /**< INTERP_x */ + ubyte emit:4; /**< EMIT_x */ + ubyte src_index; /**< map to post-xform attribs */ + } attrib[PIPE_MAX_SHADER_INPUTS]; }; +static inline int +draw_vinfo_size( const struct vertex_info *a ) +{ + return ((const char *)&a->attrib[a->num_attribs] - + (const char *)a); +} + +static inline int +draw_vinfo_compare( const struct vertex_info *a, + const struct vertex_info *b ) +{ + unsigned sizea = draw_vinfo_size( a ); + return memcmp( a, b, sizea ); +} + +static inline void +draw_vinfo_copy( struct vertex_info *dst, + const struct vertex_info *src ) +{ + unsigned size = draw_vinfo_size( src ); + memcpy( dst, src, size ); +} + /** @@ -91,14 +120,15 @@ struct vertex_info */ static INLINE uint draw_emit_vertex_attr(struct vertex_info *vinfo, - enum attrib_emit emit, enum interp_mode interp, + enum attrib_emit emit, + enum interp_mode interp, /* only used by softpipe??? */ uint src_index) { const uint n = vinfo->num_attribs; assert(n < PIPE_MAX_SHADER_INPUTS); - vinfo->emit[n] = emit; - vinfo->interp_mode[n] = interp; - vinfo->src_index[n] = src_index; + vinfo->attrib[n].emit = emit; + vinfo->attrib[n].interp_mode = interp; + vinfo->attrib[n].src_index = src_index; vinfo->num_attribs++; return n; } diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index d194c2fb158..8f1f58b2dd1 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -77,9 +77,9 @@ emit_hw_vertex( struct i915_context *i915, assert(!i915->dirty); for (i = 0; i < vinfo->num_attribs; i++) { - const uint j = vinfo->src_index[i]; + const uint j = vinfo->attrib[i].src_index; const float *attrib = vertex->data[j]; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_1F: OUT_BATCH( fui(attrib[0]) ); count++; diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 488615067c5..178d4e8781d 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -88,12 +88,12 @@ static void calculate_vertex_layout( struct i915_context *i915 ) if (needW) { draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; - vinfo.emit[0] = EMIT_4F; + vinfo.attrib[0].emit = EMIT_4F; } else { draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZ; - vinfo.emit[0] = EMIT_3F; + vinfo.attrib[0].emit = EMIT_3F; } /* hardware point size */ diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index bc8263c33e3..13d80173937 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -773,10 +773,10 @@ static void setup_tri_coefficients( struct setup_context *setup ) /* setup interpolation for all the remaining attributes: */ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: for (j = 0; j < NUM_CHANNELS; j++) const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); @@ -1084,10 +1084,10 @@ setup_line_coefficients(struct setup_context *setup, /* setup interpolation for all the remaining attributes: */ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: for (j = 0; j < NUM_CHANNELS; j++) const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); @@ -1331,10 +1331,10 @@ setup_point( struct setup_context *setup, const_coeff(setup, &setup->posCoef, 0, 3); for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: /* fall-through */ case INTERP_LINEAR: -- cgit v1.2.3 From 4d7394f89292131323fc8e39efa511a2eeb8cc60 Mon Sep 17 00:00:00 2001 From: José Fonseca <jrfonseca@tungstengraphics.com> Date: Tue, 7 Oct 2008 14:25:09 +0900 Subject: gallium: Introduce PIPE_ARCH_SSE define for SSE support. Besides meaning x86 and x86-64 architecture, it also depends on SSE2 support enabled on gcc. This fixes the linux-debug build. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 +- src/gallium/auxiliary/util/u_sse.h | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/include/pipe/p_config.h | 8 ++++++++ 5 files changed, 12 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0efabd9de8b..b11ae316627 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -37,7 +37,7 @@ #include "draw_vs.h" -#if defined(PIPE_ARCH_X86) +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 79f424b692a..f79170b9d65 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -27,7 +27,7 @@ #include "pipe/p_config.h" -#ifdef PIPE_ARCH_X86 +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) #include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 68e56f08165..e2a8491e62c 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -39,7 +39,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_ARCH_SSE) #include <xmmintrin.h> #include <emmintrin.h> diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 496ed43df26..0111469405f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -40,7 +40,7 @@ #include "tgsi/tgsi_sse2.h" -#ifdef PIPE_ARCH_X86 +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) #include "rtasm/rtasm_x86sse.h" diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index af3746c0265..ef05547819d 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -85,6 +85,14 @@ #define PIPE_ARCH_X86_64 #endif +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_CC_GCC) && !defined(__SSE2__) +/* #warning SSE2 support requires -msse -msse2 compiler options */ +#else +#define PIPE_ARCH_SSE +#endif +#endif + #if 0 /* FIXME */ #define PIPE_ARCH_PPC #endif -- cgit v1.2.3 From 44799c3b7e0e4260b93e68a5da5a03c9279ac26a Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 7 Oct 2008 14:34:08 -0600 Subject: cell: use new keymap to save/re-use fragment ops code --- src/gallium/drivers/cell/ppu/cell_context.c | 6 ++ src/gallium/drivers/cell/ppu/cell_context.h | 17 ++++++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 80 ++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 62e213ea354..30ce6f97624 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -62,6 +62,8 @@ cell_destroy_context( struct pipe_context *pipe ) { struct cell_context *cell = cell_context(pipe); + util_delete_keymap(cell->fragment_ops_cache, NULL); + cell_spu_exit(cell); align_free(cell); @@ -131,6 +133,10 @@ cell_create_context(struct pipe_screen *screen, cell->draw = cell_draw_create(cell); + /* Create cache of fragment ops generated code */ + cell->fragment_ops_cache = + util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); + cell_init_vbuf(cell); draw_set_rasterize_stage(cell->draw, cell->vbuf); diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 3dc15c9233c..80a9b3d7e13 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -38,6 +38,7 @@ #include "cell/common.h" #include "rtasm/rtasm_ppc_spe.h" #include "tgsi/tgsi_scan.h" +#include "util/u_keymap.h" struct cell_vbuf_render; @@ -66,6 +67,19 @@ struct cell_fragment_shader_state }; +/** + * Key for mapping per-fragment state to cached SPU machine code. + * keymap(cell_fragment_ops_key) => cell_command_fragment_ops + */ +struct cell_fragment_ops_key +{ + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + enum pipe_format color_format; + enum pipe_format zs_format; +}; + + /** * Per-context state, subclass of pipe_context. */ @@ -107,6 +121,9 @@ struct cell_context uint dirty; + /** Cache of code generated for per-fragment ops */ + struct keymap *fragment_ops_cache; + /** The primitive drawing context */ struct draw_context *draw; struct draw_stage *render_stage; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index f35893537bf..b00c41f47d6 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -36,6 +36,78 @@ #include "draw/draw_private.h" +/** + * Find/create a cell_command_fragment_ops object corresponding to the + * current blend/stencil/z/colormask/etc. state. + */ +static struct cell_command_fragment_ops * +lookup_fragment_ops(struct cell_context *cell) +{ + struct cell_fragment_ops_key key; + struct cell_command_fragment_ops *ops; + + /* + * Build key + */ + key.blend = *cell->blend; + key.dsa = *cell->depth_stencil; + + if (cell->framebuffer.cbufs[0]) + key.color_format = cell->framebuffer.cbufs[0]->format; + else + key.color_format = PIPE_FORMAT_NONE; + + if (cell->framebuffer.zsbuf) + key.zs_format = cell->framebuffer.zsbuf->format; + else + key.zs_format = PIPE_FORMAT_NONE; + + /* + * Look up key in cache. + */ + ops = (struct cell_command_fragment_ops *) + util_keymap_lookup(cell->fragment_ops_cache, &key); + + /* + * If not found, create/save new fragment ops command. + */ + if (!ops) { + struct spe_function spe_code; + + if (0) + debug_printf("**** Create New Fragment Ops\n"); + + /* Prepare the buffer that will hold the generated code. */ + spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + + /* generate new code */ + cell_gen_fragment_function(cell, &spe_code); + + /* alloc new fragment ops command */ + ops = CALLOC_STRUCT(cell_command_fragment_ops); + + /* populate the new cell_command_fragment_ops object */ + ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; + memcpy(ops->code, spe_code.store, spe_code_size(&spe_code)); + ops->dsa = *cell->depth_stencil; + ops->blend = *cell->blend; + + /* insert cell_command_fragment_ops object into keymap/cache */ + util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); + + /* release rtasm buffer */ + spe_release_func(&spe_code); + } + else { + if (0) + debug_printf("**** Re-use Fragment Ops\n"); + } + + return ops; +} + + + static void emit_state_cmd(struct cell_context *cell, uint cmd, const void *state, uint state_size) @@ -92,6 +164,7 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL | CELL_NEW_BLEND)) { +#if 0 /* XXX we don't want to always do codegen here. We should have * a hash/lookup table to cache previous results... */ @@ -114,6 +187,13 @@ cell_emit_state(struct cell_context *cell) /* free codegen buffer */ spe_release_func(&spe_code); +#else + struct cell_command_fragment_ops *fops, *fops_cmd; + fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd)); + fops = lookup_fragment_ops(cell); + memcpy(fops_cmd, fops, sizeof(*fops)); +#endif + } if (cell->dirty & CELL_NEW_SAMPLER) { -- cgit v1.2.3 From be3c070b6a86255feb752b7574daff8cb6091b96 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 7 Oct 2008 14:50:06 -0600 Subject: cell: memset() key to zero --- src/gallium/drivers/cell/ppu/cell_state_emit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index b00c41f47d6..69c1e4d342d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -49,6 +49,7 @@ lookup_fragment_ops(struct cell_context *cell) /* * Build key */ + memset(&key, 0, sizeof(key)); key.blend = *cell->blend; key.dsa = *cell->depth_stencil; -- cgit v1.2.3 From e561058641ca39a676b219a056f889ad99240029 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 7 Oct 2008 14:58:05 -0600 Subject: cell: remove old code --- src/gallium/drivers/cell/ppu/cell_state_emit.c | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 69c1e4d342d..a36fd3a6015 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -165,36 +165,10 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL | CELL_NEW_BLEND)) { -#if 0 - /* XXX we don't want to always do codegen here. We should have - * a hash/lookup table to cache previous results... - */ - struct cell_command_fragment_ops *fops - = cell_batch_alloc(cell, sizeof(*fops)); - struct spe_function spe_code; - - /* Prepare the buffer that will hold the generated code. */ - spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - - /* generate new code */ - cell_gen_fragment_function(cell, &spe_code); - - /* put the new code into the batch buffer */ - fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; - memcpy(&fops->code, spe_code.store, - SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - fops->dsa = *cell->depth_stencil; - fops->blend = *cell->blend; - - /* free codegen buffer */ - spe_release_func(&spe_code); -#else struct cell_command_fragment_ops *fops, *fops_cmd; fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd)); fops = lookup_fragment_ops(cell); memcpy(fops_cmd, fops, sizeof(*fops)); -#endif - } if (cell->dirty & CELL_NEW_SAMPLER) { -- cgit v1.2.3 From 3008657ceaec3f91386c767c51647729afe16b34 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 7 Oct 2008 15:13:48 -0600 Subject: cell: fix formatting --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 8d2d4f2a0f2..5d16fc13fe7 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -191,19 +191,16 @@ get_src_reg(struct codegen *gen, break; case TGSI_FILE_INPUT: { - if(swizzle == TGSI_EXTSWIZZLE_ONE) - { + if (swizzle == TGSI_EXTSWIZZLE_ONE) { /* Load const one float and early out */ reg = get_const_one_reg(gen); } - else if(swizzle == TGSI_EXTSWIZZLE_ZERO) - { + else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { /* Load const zero float and early out */ reg = get_itemp(gen); spe_xor(gen->f, reg, reg, reg); } - else - { + else { /* offset is measured in quadwords, not bytes */ int offset = src->SrcRegister.Index * 4 + swizzle; reg = get_itemp(gen); -- cgit v1.2.3 From ce416566bc71d2463785a834ffbe14fb5e9eae03 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 7 Oct 2008 16:11:20 -0600 Subject: cell: fix incorrect extended swizzle term code in get_src_reg() --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 50 ++++++++++++++++-------------- 1 file changed, 26 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 5d16fc13fe7..131a2356fe5 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -185,22 +185,24 @@ get_src_reg(struct codegen *gen, assert(swizzle >= TGSI_SWIZZLE_X); assert(swizzle <= TGSI_EXTSWIZZLE_ONE); - switch (src->SrcRegister.File) { - case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[src->SrcRegister.Index][swizzle]; - break; - case TGSI_FILE_INPUT: - { - if (swizzle == TGSI_EXTSWIZZLE_ONE) { - /* Load const one float and early out */ - reg = get_const_one_reg(gen); - } - else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { - /* Load const zero float and early out */ - reg = get_itemp(gen); - spe_xor(gen->f, reg, reg, reg); - } - else { + if (swizzle == TGSI_EXTSWIZZLE_ONE) { + /* Load const one float and early out */ + reg = get_const_one_reg(gen); + } + else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { + /* Load const zero float and early out */ + reg = get_itemp(gen); + spe_xor(gen->f, reg, reg, reg); + } + else { + assert(swizzle < 4); + + switch (src->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + reg = gen->temp_regs[src->SrcRegister.Index][swizzle]; + break; + case TGSI_FILE_INPUT: + { /* offset is measured in quadwords, not bytes */ int offset = src->SrcRegister.Index * 4 + swizzle; reg = get_itemp(gen); @@ -208,15 +210,15 @@ get_src_reg(struct codegen *gen, /* Load: reg = memory[(machine_reg) + offset] */ spe_lqd(gen->f, reg, gen->inputs_reg, offset); } + break; + case TGSI_FILE_IMMEDIATE: + reg = gen->imm_regs[src->SrcRegister.Index][swizzle]; + break; + case TGSI_FILE_CONSTANT: + /* xxx fall-through for now / fix */ + default: + assert(0); } - break; - case TGSI_FILE_IMMEDIATE: - reg = gen->imm_regs[src->SrcRegister.Index][swizzle]; - break; - case TGSI_FILE_CONSTANT: - /* xxx fall-through for now / fix */ - default: - assert(0); } /* -- cgit v1.2.3 From 800c350d71132bbb5126bd89310df540332978f4 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 7 Oct 2008 16:14:27 -0600 Subject: cell: add support for fragment shader constant buffers --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_gen_fp.c | 10 +++++++++- src/gallium/drivers/cell/ppu/cell_state.h | 5 +++-- src/gallium/drivers/cell/ppu/cell_state_emit.c | 19 +++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_state_shader.c | 5 ++++- src/gallium/drivers/cell/spu/spu_command.c | 22 ++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_main.h | 8 +++++--- 7 files changed, 63 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index c223bc17449..d261c1a6408 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -94,6 +94,7 @@ #define CELL_CMD_STATE_BIND_VS 18 #define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 +#define CELL_CMD_STATE_FS_CONSTANTS 21 #define CELL_CMD_VS_EXECUTE 22 #define CELL_CMD_FLUSH_BUFFER_RANGE 23 diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 131a2356fe5..3065869d043 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -215,7 +215,15 @@ get_src_reg(struct codegen *gen, reg = gen->imm_regs[src->SrcRegister.Index][swizzle]; break; case TGSI_FILE_CONSTANT: - /* xxx fall-through for now / fix */ + { + /* offset is measured in quadwords, not bytes */ + int offset = src->SrcRegister.Index * 4 + swizzle; + reg = get_itemp(gen); + reg_is_itemp = TRUE; + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->constants_reg, offset); + } + break; default: assert(0); } diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index a7771a55a31..b193170f9ce 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -44,8 +44,9 @@ #define CELL_NEW_TEXTURE 0x800 #define CELL_NEW_VERTEX 0x1000 #define CELL_NEW_VS 0x2000 -#define CELL_NEW_CONSTANTS 0x4000 -#define CELL_NEW_VERTEX_INFO 0x8000 +#define CELL_NEW_VS_CONSTANTS 0x4000 +#define CELL_NEW_FS_CONSTANTS 0x8000 +#define CELL_NEW_VERTEX_INFO 0x10000 extern void diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index a36fd3a6015..cbfa393cfba 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include "pipe/p_inlines.h" #include "util/u_memory.h" #include "cell_context.h" #include "cell_gen_fragment.h" @@ -162,6 +163,24 @@ cell_emit_state(struct cell_context *cell) } } + if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { + const uint shader = PIPE_SHADER_FRAGMENT; + const uint num_const = cell->constants[shader].size / sizeof(float); + uint i, j; + float *buf = cell_batch_alloc(cell, 16 + num_const * sizeof(float)); + uint64_t *ibuf = (uint64_t *) buf; + const float *constants = pipe_buffer_map(cell->pipe.screen, + cell->constants[shader].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; + ibuf[1] = num_const; + j = 4; + for (i = 0; i < num_const; i++) { + buf[j++] = constants[i]; + } + pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); + } + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL | CELL_NEW_BLEND)) { diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 3a0d066da2a..54a17eaf2b7 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -197,7 +197,10 @@ cell_set_constant_buffer(struct pipe_context *pipe, buf->buffer); cell->constants[shader].size = buf->size; - cell->dirty |= CELL_NEW_CONSTANTS; + if (shader == PIPE_SHADER_VERTEX) + cell->dirty |= CELL_NEW_VS_CONSTANTS; + else if (shader == PIPE_SHADER_FRAGMENT) + cell->dirty |= CELL_NEW_FS_CONSTANTS; } diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index ec9da5d8870..91a4c137e7c 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -231,6 +231,25 @@ cmd_state_fragment_program(const struct cell_command_fragment_program *fp) } +static uint +cmd_state_fs_constants(const uint64_t *buffer, uint pos) +{ + const uint num_const = buffer[pos + 1]; + const float *constants = (const float *) &buffer[pos + 2]; + uint i; + + DEBUG_PRINTF("CMD_STATE_FS_CONSTANTS (%u)\n", num_const); + + /* Expand each float to float[4] for SOA execution */ + for (i = 0; i < num_const; i++) { + spu.constants[i] = spu_splats(constants[i]); + } + + /* return new buffer pos (in 8-byte words) */ + return pos + 2 + num_const / 2; +} + + static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { @@ -456,6 +475,9 @@ cmd_batch(uint opcode) pos += sizeof(*fp) / 8; } break; + case CELL_CMD_STATE_FS_CONSTANTS: + pos = cmd_state_fs_constants(buffer, pos); + break; case CELL_CMD_STATE_SAMPLER: { struct cell_command_sampler *sampler diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 1cd577c23ca..82c9c69a3a8 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -41,6 +41,9 @@ #define MAX_HEIGHT 1024 +#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ + + /** * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. * The data may be addressed through several different types. @@ -157,9 +160,8 @@ struct spu_global /** Current texture sampler function */ spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; - /** Fragment program constants (XXX preliminary/used) */ -#define MAX_CONSTANTS 32 - vector float constants[MAX_CONSTANTS]; + /** Fragment program constants */ + vector float constants[4 * CELL_MAX_CONSTANTS]; } ALIGN16_ATTRIB; -- cgit v1.2.3 From feb5a26bb1e39099abd1caf4a405776ea0124315 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 8 Oct 2008 20:33:24 -0600 Subject: cell: increase SPU_MAX_FRAGMENT_PROGRAM_INSTS --- src/gallium/drivers/cell/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d261c1a6408..5dc756023fb 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -128,7 +128,7 @@ struct cell_command_fragment_ops /** Max instructions for fragment programs */ -#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128 +#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512 /** * Command to send a fragment program to SPUs. -- cgit v1.2.3 From a4e477433f485a39b5de448d0a9cb6f4bf9bb90f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 8 Oct 2008 20:34:35 -0600 Subject: cell: implement more built-in shader functions, link spu code with -lm --- configs/linux-cell | 2 +- src/gallium/drivers/cell/spu/spu_funcs.c | 65 +++++++++++++++++++++----------- 2 files changed, 45 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/configs/linux-cell b/configs/linux-cell index 86651b83d7b..8d74ee469de 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -53,7 +53,7 @@ SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main \ -DSPU_MAIN_PARAM_LONG_LONG \ -include spu_intrinsics.h -SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc +SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc -lm SPU_AR = ppu-ar SPU_AR_FLAGS = -qcs diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index b57ad3f3b81..1adf9de0e8d 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -35,41 +35,61 @@ #include <string.h> #include <libmisc.h> -#include <cos8_v.h> -#include <sin8_v.h> +#include <math.h> +#include <cos14_v.h> +#include <sin14_v.h> #include "cell/common.h" #include "spu_main.h" #include "spu_funcs.h" -#define M_PI 3.1415926 - - static vector float spu_cos(vector float x) { -#if 0 - static const float scale = 1.0 / (2.0 * M_PI); - x = x * spu_splats(scale); /* normalize */ - return _cos8_v(x); -#else - /* just pass-through to avoid trashing caller's stack */ - return x; -#endif + return _cos14_v(x); } static vector float spu_sin(vector float x) { -#if 0 - static const float scale = 1.0 / (2.0 * M_PI); - x = x * spu_splats(scale); /* normalize */ - return _sin8_v(x); /* 8-bit accuracy enough?? */ -#else - /* just pass-through to avoid trashing caller's stack */ - return x; -#endif + return _sin14_v(x); +} + +static vector float +spu_pow(vector float x, vector float y) +{ + float z0 = powf(spu_extract(x,0), spu_extract(y,0)); + float z1 = powf(spu_extract(x,1), spu_extract(y,1)); + float z2 = powf(spu_extract(x,2), spu_extract(y,2)); + float z3 = powf(spu_extract(x,3), spu_extract(y,3)); + return (vector float) {z0, z1, z2, z3}; +} + +static vector float +spu_exp2(vector float x) +{ + float z0 = powf(2.0f, spu_extract(x,0)); + float z1 = powf(2.0f, spu_extract(x,1)); + float z2 = powf(2.0f, spu_extract(x,2)); + float z3 = powf(2.0f, spu_extract(x,3)); + return (vector float) {z0, z1, z2, z3}; +} + +static vector float +spu_log2(vector float x) +{ + /* + * log_base_2(x) = log(x) / log(2) + * 1.442695 = 1/log(2). + */ + static const vector float k = {1.442695F, 1.442695F, 1.442695F, 1.442695F}; + float z0 = logf(spu_extract(x,0)); + float z1 = logf(spu_extract(x,1)); + float z2 = logf(spu_extract(x,2)); + float z3 = logf(spu_extract(x,3)); + vector float v = (vector float) {z0, z1, z2, z3}; + return spu_mul(v, k); } @@ -101,6 +121,9 @@ return_function_info(void) funcs.num = 0; add_func(&funcs, "spu_cos", &spu_cos); add_func(&funcs, "spu_sin", &spu_sin); + add_func(&funcs, "spu_pow", &spu_pow); + add_func(&funcs, "spu_exp2", &spu_exp2); + add_func(&funcs, "spu_log2", &spu_log2); /* Send the function info back to the PPU / main memory */ mfc_put((void *) &funcs, /* src in local store */ -- cgit v1.2.3 From d48a92e88040470f93e2186f8eb23e4797a09860 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 8 Oct 2008 20:44:32 -0600 Subject: cell: implement function calls from shader code. fslight demo runs now. Used for SIN, COS, EXP2, LOG2, POW instructions. TEX next. Fixed some bugs in MIN, MAX, DP3, DP4, DPH instructions. In rtasm code: Special-case spe_lqd(), spe_stqd() functions so they take byte offsets but low-order 4 bits are shifted out. This makes things consistant with SPU assembly language conventions. Added spe_get_registers_used() function. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 76 ++++++++++-- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 11 +- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 141 +++++++++++++++-------- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 30 ++--- 4 files changed, 182 insertions(+), 76 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index c442b1f6aa2..9274bc5e3cd 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -174,9 +174,12 @@ reg_name(int reg) return "$lr"; default: { - static char buf[10]; - sprintf(buf, "$%d", reg); - return buf; + /* cycle through four buffers to handle multiple calls per printf */ + static char buf[4][10]; + static int b = 0; + b = (b + 1) % 4; + sprintf(buf[b], "$%d", reg); + return buf[b]; } } } @@ -269,15 +272,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - if (strcmp(name, "spe_lqd") == 0 || - strcmp(name, "spe_stqd") == 0) { - printf("%s\t%s, %d(%s)\n", - rem_prefix(name), reg_name(rT), imm, reg_name(rA)); - } - else { - printf("%s\t%s, %s, 0x%x\n", - rem_prefix(name), reg_name(rT), reg_name(rA), imm); - } + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); } } @@ -379,6 +375,7 @@ void _name (struct spe_function *p, int imm) \ #include "rtasm_ppc_spe.h" + /** * Initialize an spe_function. * \param code_size size of instruction buffer to allocate, in bytes. @@ -513,6 +510,20 @@ void spe_release_register_set(struct spe_function *p) } +unsigned +spe_get_registers_used(const struct spe_function *p, ubyte used[]) +{ + unsigned i, num = 0; + /* only count registers in the range available to callers */ + for (i = 2; i < 80; i++) { + if (p->regs[i]) { + used[num++] = i; + } + } + return num; +} + + void spe_print_code(struct spe_function *p, boolean enable) { @@ -539,6 +550,46 @@ spe_comment(struct spe_function *p, int rel_indent, const char *s) } +/** + * Load quad word. + * NOTE: imm is in bytes and the least significant 4 bits must be zero! + */ +void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) +{ + const boolean pSave = p->print; + + p->print = FALSE; + assert(offset % 4 == 0); + emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } +} + + +/** + * Store quad word. + * NOTE: imm is in bytes and the least significant 4 bits must be zero! + */ +void spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) +{ + const boolean pSave = p->print; + + p->print = FALSE; + assert(offset % 4 == 0); + emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } +} + + /** * For branch instructions: * \param d if 1, disable interupts if branch is taken @@ -764,6 +815,7 @@ spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) spe_release_register(p, tmp_reg); } + /** * This function is constructed identically to spe_and_uint() above. * Changes to one should be made in the other. diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index cd2e2454097..47dadb343c2 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -89,6 +89,9 @@ extern void spe_release_register(struct spe_function *p, int reg); extern void spe_allocate_register_set(struct spe_function *p); extern void spe_release_register_set(struct spe_function *p); +extern unsigned +spe_get_registers_used(const struct spe_function *p, ubyte used[]); + extern void spe_print_code(struct spe_function *p, boolean enable); extern void spe_indent(struct spe_function *p, int spaces); extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); @@ -128,11 +131,9 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); /* Memory load / store instructions */ -EMIT_RI10(spe_lqd, 0x034); EMIT_RR (spe_lqx, 0x1c4); EMIT_RI16(spe_lqa, 0x061); EMIT_RI16(spe_lqr, 0x067); -EMIT_RI10(spe_stqd, 0x024); EMIT_RR (spe_stqx, 0x144); EMIT_RI16(spe_stqa, 0x041); EMIT_RI16(spe_stqr, 0x047); @@ -290,6 +291,12 @@ EMIT_RI16(spe_brz, 0x040); EMIT_RI16(spe_brhnz, 0x046); EMIT_RI16(spe_brhz, 0x044); +extern void +spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); + +extern void +spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); + extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 3065869d043..640ebcadbb3 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -84,6 +84,8 @@ struct codegen /** Index of execution mask register */ int exec_mask_reg; + int frame_size; /**< Stack frame size, in words */ + struct spe_function *f; boolean error; }; @@ -208,7 +210,7 @@ get_src_reg(struct codegen *gen, reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->inputs_reg, offset); + spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); } break; case TGSI_FILE_IMMEDIATE: @@ -221,7 +223,7 @@ get_src_reg(struct codegen *gen, reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->constants_reg, offset); + spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); } break; default: @@ -325,6 +327,7 @@ store_dest_reg(struct codegen *gen, } else { /* we're not inside a condition or loop: do nothing special */ + } break; case TGSI_FILE_OUTPUT: @@ -337,17 +340,17 @@ store_dest_reg(struct codegen *gen, /* First read the current value from memory: * Load: curval = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset); + spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); /* Mix curval with newvalue according to exec mask: * d[i] = mask_reg[i] ? value_reg : d_reg */ spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); /* Store: memory[(machine_reg) + offset] = curval */ - spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset); + spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); } else { /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); } } break; @@ -357,6 +360,41 @@ store_dest_reg(struct codegen *gen, } + +static void +emit_prologue(struct codegen *gen) +{ + gen->frame_size = 256+128; /* XXX temporary */ + + spe_comment(gen->f, -4, "Function prologue:"); + + /* save $lr on stack # stqd $lr,16($sp) */ + spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* save stack pointer # stqd $sp,-frameSize($sp) */ + spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + + /* adjust stack pointer # ai $sp,$sp,-frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); +} + + +static void +emit_epilogue(struct codegen *gen) +{ + spe_comment(gen->f, -4, "Function epilogue:"); + + /* restore stack pointer # ai $sp,$sp,frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); + + /* restore $lr # lqd $lr,16($sp) */ + spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* return from function call */ + spe_bi(gen->f, SPE_REG_RA, 0, 0); +} + + static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { @@ -588,6 +626,7 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); int tmp_reg = get_itemp(gen); + /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); @@ -603,7 +642,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); } } @@ -623,6 +664,7 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); int tmp_reg = get_itemp(gen); + /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); @@ -643,6 +685,8 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -683,6 +727,8 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -1112,9 +1158,6 @@ emit_function_call(struct codegen *gen, uint addr; int ch; - /* XXX temporary value */ - const int frameSize = 64; /* stack frame (activation record) size */ - assert(num_args <= 3); /* lookup function address */ @@ -1136,48 +1179,45 @@ emit_function_call(struct codegen *gen, for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int s_regs[3]; - uint a; + int s_regs[3], d_reg; + ubyte usedRegs[SPE_NUM_REGS]; + uint a, i, numUsed; + for (a = 0; a < num_args; a++) { s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); } + d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* Basically: - * save registers on stack - * move parameters to registers 3, 4, 5... - * call function - * save return value (reg 3) - * restore registers from stack - */ + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 32); - /* XXX hack: load first function param */ - spe_move(gen->f, 3, s_regs[0]); - - /* save $lr on stack # stqd $lr,16($sp) */ - spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - /* save stack pointer # stqd $sp,-frameSize($sp) */ - spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize); - - /* XXX save registers to stack here */ + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } - /* adjust stack pointer # ai $sp,$sp,-frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize); + /* setup function arguments */ + for (a = 0; a < num_args; a++) { + spe_move(gen->f, 3 + a, s_regs[a]); + } /* branch to function, save return addr */ spe_brasl(gen->f, SPE_REG_RA, addr); - /* restore stack pointer # ai $sp,$sp,frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, frameSize); - - /* XXX restore registers from stack here */ - - /* restore $lr # lqd $lr,16($sp) */ - spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - /* XXX hack: save function's return value */ + /* save function's return value */ spe_move(gen->f, d_reg, 3); + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_reg) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); } @@ -1202,10 +1242,11 @@ emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); /* d = (s1 > s2) ? s1 : s2 */ - spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); + spe_fcgt(gen->f, tmp_reg, s1_reg, s2_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -1230,10 +1271,11 @@ emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); /* d = (s2 > s1) ? s1 : s2 */ - spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); + spe_fcgt(gen->f, tmp_reg, s2_reg, s1_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -1346,8 +1388,7 @@ static boolean emit_END(struct codegen *gen) { spe_comment(gen->f, -4, "END:"); - /* return from function call */ - spe_bi(gen->f, SPE_REG_RA, 0, 0); + emit_epilogue(gen); return true; } @@ -1420,6 +1461,10 @@ emit_instruction(struct codegen *gen, return emit_function_call(gen, inst, "spu_sin", 1); case TGSI_OPCODE_POW: return emit_function_call(gen, inst, "spu_pow", 2); + case TGSI_OPCODE_EXPBASE2: + return emit_function_call(gen, inst, "spu_exp2", 1); + case TGSI_OPCODE_LOGBASE2: + return emit_function_call(gen, inst, "spu_log2", 1); case TGSI_OPCODE_IF: return emit_IF(gen, inst); @@ -1532,6 +1577,7 @@ emit_declaration(struct cell_context *cell, } + /** * Translate TGSI shader code to SPE instructions. This is done when * the state tracker gives us a new shader (via pipe->create_fs_state()). @@ -1571,12 +1617,14 @@ cell_gen_fragment_program(struct cell_context *cell, tgsi_parse_init(&parse, tokens); + emit_prologue(&gen); + while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { tgsi_parse_token(&parse); switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: - if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) + if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) gen.error = true; break; @@ -1595,7 +1643,6 @@ cell_gen_fragment_program(struct cell_context *cell, } } - if (gen.error) { /* terminate the SPE code */ return emit_END(&gen); diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 566df7f59e3..18969005b02 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -73,8 +73,8 @@ emit_matrix_transpose(struct spe_function *p, int col3; - spe_lqd(p, shuf_hi, shuf_ptr, 3); - spe_lqd(p, shuf_lo, shuf_ptr, 4); + spe_lqd(p, shuf_hi, shuf_ptr, 3*16); + spe_lqd(p, shuf_lo, shuf_ptr, 4*16); spe_shufb(p, t1, row0, row2, shuf_hi); spe_shufb(p, t2, row0, row2, shuf_lo); @@ -122,13 +122,13 @@ emit_matrix_transpose(struct spe_function *p, */ switch (count) { case 4: - spe_stqd(p, col3, dest_ptr, 3); + spe_stqd(p, col3, dest_ptr, 3 * 16); case 3: - spe_stqd(p, col2, dest_ptr, 2); + spe_stqd(p, col2, dest_ptr, 2 * 16); case 2: - spe_stqd(p, col1, dest_ptr, 1); + spe_stqd(p, col1, dest_ptr, 1 * 16); case 1: - spe_stqd(p, col0, dest_ptr, 0); + spe_stqd(p, col0, dest_ptr, 0 * 16); } @@ -166,17 +166,17 @@ emit_fetch(struct spe_function *p, float scale_signed = 0.0; float scale_unsigned = 0.0; - spe_lqd(p, v0, in_ptr, 0 + offset[0]); - spe_lqd(p, v1, in_ptr, 1 + offset[0]); - spe_lqd(p, v2, in_ptr, 2 + offset[0]); - spe_lqd(p, v3, in_ptr, 3 + offset[0]); + spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); + spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); + spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); + spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); offset[0] += 4; switch (bytes) { case 1: scale_signed = 1.0f / 127.0f; scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1); + spe_lqd(p, tmp, shuf_ptr, 1 * 16); spe_shufb(p, v0, v0, v0, tmp); spe_shufb(p, v1, v1, v1, tmp); spe_shufb(p, v2, v2, v2, tmp); @@ -185,7 +185,7 @@ emit_fetch(struct spe_function *p, case 2: scale_signed = 1.0f / 32767.0f; scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2); + spe_lqd(p, tmp, shuf_ptr, 2 * 16); spe_shufb(p, v0, v0, v0, tmp); spe_shufb(p, v1, v1, v1, tmp); spe_shufb(p, v2, v2, v2, tmp); @@ -241,11 +241,11 @@ emit_fetch(struct spe_function *p, switch (count) { case 1: - spe_stqd(p, float_zero, out_ptr, 1); + spe_stqd(p, float_zero, out_ptr, 1 * 16); case 2: - spe_stqd(p, float_zero, out_ptr, 2); + spe_stqd(p, float_zero, out_ptr, 2 * 16); case 3: - spe_stqd(p, float_one, out_ptr, 3); + spe_stqd(p, float_one, out_ptr, 3 * 16); } if (float_zero != -1) { -- cgit v1.2.3 From 2a3fa97be3d10a6d4e36c6d232afb884efd69d55 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 9 Oct 2008 08:24:03 -0600 Subject: cell: more accurate comments --- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index f920ae13b42..de170d1036c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1841,7 +1841,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) ASSERT(TILE_SIZE == 32); - spe_comment(f, 0, "Computing tile location in memory"); + spe_comment(f, 0, "Compute quad offset within tile"); spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ @@ -1869,7 +1869,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) boolean fbS_reg_set = false, fbZ_reg_set = false; unsigned int fbS_reg, fbZ_reg = 0; - spe_comment(f, 0, "Loading Z/stencil tile"); + spe_comment(f, 0, "Fetch quad's Z/stencil values from tile"); /* fetch quad of depth/stencil values from tile at (x,y) */ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ @@ -1993,7 +1993,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. * fbS_reg has four 8-bit Z values in bits [7..0]. */ - spe_comment(f, 0, "Storing depth/stencil values"); + spe_comment(f, 0, "Store quad's depth/stencil values in tile"); if (zs_format == PIPE_FORMAT_S8Z24_UNORM || zs_format == PIPE_FORMAT_X8Z24_UNORM) { if (fbS_reg_set) { @@ -2038,6 +2038,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking * we could skip this load. */ + spe_comment(f, 0, "Fetch quad colors from tile"); spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); if (blend->blend_enable) { @@ -2055,7 +2056,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int rgba_reg = spe_allocate_available_register(f); /* Pack four float colors as four 32-bit int colors */ - spe_comment(f, 0, "Convert fragment colors to framebuffer colors"); + spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); gen_pack_colors(f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, rgba_reg); @@ -2081,7 +2082,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) /* Store updated quad in tile: * memory[color_tile + quad_offset] = rgba_reg; */ - spe_comment(f, 0, "Store framebuffer colors"); + spe_comment(f, 0, "Store quad colors into color tile"); spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); spe_release_register(f, rgba_reg); -- cgit v1.2.3 From b9689791ddd1030f7cd25af21701f56d89e0f3b0 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 9 Oct 2008 08:52:31 -0600 Subject: cell: massage the emit functions to get better instruction scheduling --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 130 ++++++++++++++++------------- 1 file changed, 74 insertions(+), 56 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 640ebcadbb3..e6d994205cc 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -398,15 +398,19 @@ emit_epilogue(struct codegen *gen) static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, src_reg[4], dst_reg[4]; spe_comment(gen->f, -4, "MOV:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { /* XXX we don't always need to actually emit a mov instruction here */ - spe_move(gen->f, dst_reg, src_reg); - store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, dst_reg[ch], src_reg[ch]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); free_itemps(gen); } } @@ -421,22 +425,25 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], s2_reg[4], d_reg[4]; + spe_comment(gen->f, -4, "ADD:"); - /* Loop over Red/Green/Blue/Alpha channels */ + /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ for (ch = 0; ch < 4; ch++) { /* If the dest R, G, B or A writemask is enabled... */ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - /* get indexes of the two src, one dest SPE registers */ - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { /* Emit actual SPE instruction: d = s1 + s2 */ - spe_fa(gen->f, d_reg, s1_reg, s2_reg); - + spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); /* Free any intermediate temps we allocated */ free_itemps(gen); } @@ -450,23 +457,20 @@ emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], s2_reg[4], d_reg[4]; spe_comment(gen->f, -4, "SUB:"); - /* Loop over Red/Green/Blue/Alpha channels */ for (ch = 0; ch < 4; ch++) { - /* If the dest R, G, B or A writemask is enabled... */ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - /* get indexes of the two src, one dest SPE registers */ - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* Emit actual SPE instruction: d = s1 - s2 */ - spe_fs(gen->f, d_reg, s1_reg, s2_reg); - - /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - /* Free any intermediate temps we allocated */ + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + /* d = s1 - s2 */ + spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); free_itemps(gen); } } @@ -479,17 +483,21 @@ emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; spe_comment(gen->f, -4, "MAD:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { /* d = s1 * s2 + s3 */ - spe_fma(gen->f, d_reg, s1_reg, s2_reg, s3_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); free_itemps(gen); } } @@ -527,16 +535,20 @@ emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], s2_reg[4], d_reg[4]; spe_comment(gen->f, -4, "MUL:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { /* d = s1 * s2 */ - spe_fm(gen->f, d_reg, s1_reg, s2_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); free_itemps(gen); } } @@ -621,29 +633,35 @@ static boolean emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; + int s1x_reg, s1y_reg, s1z_reg; + int s2x_reg, s2y_reg, s2z_reg; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + spe_comment(gen->f, -4, "DP3:"); - int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - int tmp_reg = get_itemp(gen); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - /* t = x0 * x1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + /* t0 = x0 * x1 */ + spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - /* t = y0 * y1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + /* t1 = y0 * y1 */ + spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); - s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - /* t = z0 * z1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + /* t0 = z0 * z1 + t0 */ + spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - spe_move(gen->f, d_reg, tmp_reg); + spe_move(gen->f, d_reg, t0_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); } } -- cgit v1.2.3 From c201e357eb95f9b18b8d9b8a534ae2594a176904 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 9 Oct 2008 10:56:25 -0600 Subject: cell: better immediate value allocation, better comments --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 39 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index e6d994205cc..5647bb23e60 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1526,16 +1526,23 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) for (ch = 0; ch < 4; ch++) { float val = immed->u.ImmediateFloat32[ch].Float; - int reg = spe_allocate_available_register(gen->f); - if (reg < 0) - return false; + if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) { + /* re-use previous register */ + gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; + } + else { + int reg = spe_allocate_available_register(gen->f); - /* update immediate map */ - gen->imm_regs[gen->num_imm][ch] = reg; + if (reg < 0) + return false; - /* emit initializer instruction */ - spe_load_float(gen->f, reg, val); + /* update immediate map */ + gen->imm_regs[gen->num_imm][ch] = reg; + + /* emit initializer instruction */ + spe_load_float(gen->f, reg, val); + } } gen->num_imm++; @@ -1558,12 +1565,6 @@ emit_declaration(struct cell_context *cell, switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: - if (cell->debug_flags & CELL_DEBUG_ASM) { - printf("Declare temp reg %d .. %d\n", - decl->DeclarationRange.First, - decl->DeclarationRange.Last); - } - for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { @@ -1578,12 +1579,12 @@ emit_declaration(struct cell_context *cell, * to SPU memory. someday... */ - if (cell->debug_flags & CELL_DEBUG_ASM) { - printf(" SPE regs: %d %d %d %d\n", - gen->temp_regs[i][0], - gen->temp_regs[i][1], - gen->temp_regs[i][2], - gen->temp_regs[i][3]); + { + char buf[100]; + sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, + gen->temp_regs[i][0], gen->temp_regs[i][1], + gen->temp_regs[i][2], gen->temp_regs[i][3]); + spe_comment(gen->f, -4, buf); } } break; -- cgit v1.2.3 From 583098e3cb602fd9810a7c65718155fd9b0b3fda Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Thu, 9 Oct 2008 19:48:53 -0600 Subject: cell: implement basic TXP instruction in fragment shaders Lots of restrictions for now (one 2D texture, no mipmaps, etc.) for now but basic texture demos work. TEX, TXD, TXP do the same thing for the time being. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 109 ++++++++++++++++++++++++----- src/gallium/drivers/cell/spu/spu_funcs.c | 51 ++++++++++++-- src/gallium/drivers/cell/spu/spu_tri.c | 2 +- 3 files changed, 138 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 5647bb23e60..c8125a8a052 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -226,6 +226,11 @@ get_src_reg(struct codegen *gen, spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); } break; + case TGSI_FILE_SAMPLER: + { + reg = 3; /* XXX total hack */ + } + break; default: assert(0); } @@ -1162,6 +1167,21 @@ print_functions(struct cell_context *cell) #endif +static uint +lookup_function(struct cell_context *cell, const char *funcname) +{ + const struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i, addr = 0; + for (i = 0; i < funcs->num; i++) { + if (strcmp(funcs->names[i], funcname) == 0) { + addr = funcs->addrs[i]; + } + } + assert(addr && "spu function not found"); + return addr / 4; /* discard 2 least significant bits */ +} + + /** * Emit code to call a SPU function. * Used to implement instructions like SIN/COS/POW/TEX/etc. @@ -1171,27 +1191,12 @@ emit_function_call(struct codegen *gen, const struct tgsi_full_instruction *inst, char *funcname, uint num_args) { - const struct cell_spu_function_info *funcs = &gen->cell->spu_functions; + const uint addr = lookup_function(gen->cell, funcname); char comment[100]; - uint addr; int ch; assert(num_args <= 3); - /* lookup function address */ - { - uint i; - addr = 0; - for (i = 0; i < funcs->num; i++) { - if (strcmp(funcs->names[i], funcname) == 0) { - addr = funcs->addrs[i]; - } - } - assert(addr && "spu function not found"); - } - - addr /= 4; /* discard 2 least significant bits */ - snprintf(comment, sizeof(comment), "CALL %s:", funcname); spe_comment(gen->f, -4, comment); @@ -1245,6 +1250,72 @@ emit_function_call(struct codegen *gen, } +static boolean +emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const uint addr = lookup_function(gen->cell, "spu_txp"); + int ch; + int coord_regs[4], d_regs[4]; + + spe_comment(gen->f, -4, "CALL txp:"); + + /* get src/dst reg info */ + for (ch = 0; ch < 4; ch++) { + coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + { + ubyte usedRegs[SPE_NUM_REGS]; + uint i, numUsed; + + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 32); + + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + + /* setup function arguments */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, 3 + i, coord_regs[i]); + } + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* save function's return values (four pixel's colors) */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, d_regs[i], 3 + i); + } + + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_regs[0] && + reg != d_regs[1] && + reg != d_regs[2] && + reg != d_regs[3]) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + } + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return TRUE; +} + + /** * Emit max. See emit_SGT for comments. */ @@ -1483,6 +1554,12 @@ emit_instruction(struct codegen *gen, return emit_function_call(gen, inst, "spu_exp2", 1); case TGSI_OPCODE_LOGBASE2: return emit_function_call(gen, inst, "spu_log2", 1); + case TGSI_OPCODE_TEX: + /* fall-through for now */ + case TGSI_OPCODE_TXD: + /* fall-through for now */ + case TGSI_OPCODE_TXP: + return emit_TXP(gen, inst); case TGSI_OPCODE_IF: return emit_IF(gen, inst); diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 1adf9de0e8d..c7bcb3de9dd 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -38,12 +38,20 @@ #include <math.h> #include <cos14_v.h> #include <sin14_v.h> +#include <transpose_matrix4x4.h> #include "cell/common.h" #include "spu_main.h" #include "spu_funcs.h" +/** For "return"-ing four vectors */ +struct vec_4x4 +{ + vector float v[4]; +}; + + static vector float spu_cos(vector float x) { @@ -92,16 +100,44 @@ spu_log2(vector float x) return spu_mul(v, k); } +static struct vec_4x4 +spu_txp(vector float s, vector float t, vector float r, vector float q) +{ + const uint unit = 0; + struct vec_4x4 colors; + vector float coords[4]; + + coords[0] = s; + coords[1] = t; + coords[2] = r; + coords[3] = q; + _transpose_matrix4x4(coords, coords); + + /* get four texture samples */ + colors.v[0] = spu.sample_texture[unit](unit, coords[0]); + colors.v[1] = spu.sample_texture[unit](unit, coords[1]); + colors.v[2] = spu.sample_texture[unit](unit, coords[2]); + colors.v[3] = spu.sample_texture[unit](unit, coords[3]); + + _transpose_matrix4x4(colors.v, colors.v); + return colors; +} + +/** + * Add named function to list of "exported" functions that will be + * made available to the PPU-hosted code generator. + */ static void -add_func(struct cell_spu_function_info *spu_functions, - const char *name, void *addr) +export_func(struct cell_spu_function_info *spu_functions, + const char *name, void *addr) { uint n = spu_functions->num; ASSERT(strlen(name) < 16); strcpy(spu_functions->names[n], name); spu_functions->addrs[n] = (uint) addr; spu_functions->num++; + ASSERT(spu_functions->num <= 16); } @@ -119,11 +155,12 @@ return_function_info(void) ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ funcs.num = 0; - add_func(&funcs, "spu_cos", &spu_cos); - add_func(&funcs, "spu_sin", &spu_sin); - add_func(&funcs, "spu_pow", &spu_pow); - add_func(&funcs, "spu_exp2", &spu_exp2); - add_func(&funcs, "spu_log2", &spu_log2); + export_func(&funcs, "spu_cos", &spu_cos); + export_func(&funcs, "spu_sin", &spu_sin); + export_func(&funcs, "spu_pow", &spu_pow); + export_func(&funcs, "spu_exp2", &spu_exp2); + export_func(&funcs, "spu_log2", &spu_log2); + export_func(&funcs, "spu_txp", &spu_txp); /* Send the function info back to the PPU / main memory */ mfc_put((void *) &funcs, /* src in local store */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 6039cd80b24..87991c31368 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -286,7 +286,7 @@ emit_quad( int x, int y, mask_t mask) spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; - if (spu.texture[0].start) { + if (0/*spu.texture[0].start*/) { /* * Temporary texture mapping path * This will go away when fragment programs support TEX inst. -- cgit v1.2.3 From 086a56134f334505ca9cd6f57194280c1ebf44dc Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 08:44:29 -0600 Subject: cell: updates in response to draw's struct vertex_info changes --- src/gallium/drivers/cell/spu/spu_tri.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 87991c31368..a62d4f0f2ff 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -215,7 +215,7 @@ clip_emit_quad(struct setup_stage *setup) static INLINE void eval_coeff(uint slot, float x, float y, vector float result[4]) { - switch (spu.vertex_info.interp_mode[slot]) { + switch (spu.vertex_info.attrib[slot].interp_mode) { case INTERP_CONSTANT: result[QUAD_TOP_LEFT] = result[QUAD_TOP_RIGHT] = @@ -776,7 +776,7 @@ static void setup_tri_coefficients(void) uint i; for (i = 0; i < spu.vertex_info.num_attribs; i++) { - switch (spu.vertex_info.interp_mode[i]) { + switch (spu.vertex_info.attrib[i].interp_mode) { case INTERP_NONE: break; case INTERP_POS: -- cgit v1.2.3 From dc7d213c54b046ec03ddb1fcfb0d9d9e905ffedc Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 11:52:55 -0600 Subject: cell: fix bug in emit_FRC() when src register == dst register. With this fix, the glsl/brick demo runs. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index c8125a8a052..dad74bbad86 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1119,7 +1119,7 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; - spe_comment(gen->f, -4, "FLR:"); + spe_comment(gen->f, -4, "FRC:"); int zero_reg = get_itemp(gen); spe_xor(gen->f, zero_reg, zero_reg, zero_reg); @@ -1131,18 +1131,18 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) int tmp_reg = get_itemp(gen); /* If negative, subtract 1.0 */ - spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); - spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg); - spe_fs(gen->f, d_reg, s1_reg, tmp_reg); + spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg); + spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg); + spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg); /* Convert float to int */ - spe_cflts(gen->f, d_reg, d_reg, 0); + spe_cflts(gen->f, tmp_reg, tmp_reg, 0); /* Convert int to float */ - spe_csflt(gen->f, d_reg, d_reg, 0); + spe_csflt(gen->f, tmp_reg, tmp_reg, 0); /* d = s1 - FLR(s1) */ - spe_fs(gen->f, d_reg, s1_reg, d_reg); + spe_fs(gen->f, d_reg, s1_reg, tmp_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); -- cgit v1.2.3 From e43af05311acd979f43a75f8ba4d9152b453408e Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 11:56:03 -0600 Subject: cell: fix bug in emit_FLR() when src reg == dst reg --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index dad74bbad86..7a4e8d20ba2 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1092,15 +1092,15 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) int tmp_reg = get_itemp(gen); /* If negative, subtract 1.0 */ - spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); - spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg); - spe_fs(gen->f, d_reg, s1_reg, tmp_reg); + spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg); + spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg); + spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg); /* Convert float to int */ - spe_cflts(gen->f, d_reg, d_reg, 0); + spe_cflts(gen->f, tmp_reg, tmp_reg, 0); /* Convert int to float */ - spe_csflt(gen->f, d_reg, d_reg, 0); + spe_csflt(gen->f, d_reg, tmp_reg, 0); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -1111,8 +1111,7 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) } /** - * Emit frac. - * Input - FLR(Input) + * Compute frac = Input - FLR(Input) */ static boolean emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) -- cgit v1.2.3 From a45d293fd9a1432404a7e26f97cb20b2a0c43654 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 12:04:19 -0600 Subject: cell: fix fm/fs copy & paste bug from a few commits ago --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 7a4e8d20ba2..ab71336754c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -474,7 +474,7 @@ emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { /* d = s1 - s2 */ - spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); free_itemps(gen); } -- cgit v1.2.3 From a13f61d34d40475a6f12fb8696b6e7d58aaa78b7 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 12:24:39 -0600 Subject: cell: fix LERP when dst reg is a src reg Also, bump up frame size and fix some assertions. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index ab71336754c..db54c7e57b5 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -369,7 +369,7 @@ store_dest_reg(struct codegen *gen, static void emit_prologue(struct codegen *gen) { - gen->frame_size = 256+128; /* XXX temporary */ + gen->frame_size = 1024; /* XXX temporary */ spe_comment(gen->f, -4, "Function prologue:"); @@ -517,6 +517,7 @@ static boolean emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; + int tmp_reg = get_itemp(gen); spe_comment(gen->f, -4, "LERP:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { @@ -524,9 +525,10 @@ emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* d = s3 + s1(s2 - s3) */ - spe_fs(gen->f, d_reg, s2_reg, s3_reg); - spe_fma(gen->f, d_reg, d_reg, s1_reg, s3_reg); + spe_fs(gen->f, tmp_reg, s2_reg, s3_reg); + spe_fma(gen->f, d_reg, tmp_reg, s1_reg, s3_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); } @@ -1211,7 +1213,7 @@ emit_function_call(struct codegen *gen, d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 32); + assert(numUsed < gen->frame_size / 16 - 2); /* save registers to stack */ for (i = 0; i < numUsed; i++) { @@ -1269,7 +1271,7 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) uint i, numUsed; numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 32); + assert(numUsed < gen->frame_size / 16 - 2); /* save registers to stack */ for (i = 0; i < numUsed; i++) { -- cgit v1.2.3 From 02931db3117cd064175a07412b860e8051d9ed58 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 12:38:27 -0600 Subject: cell: call cell_flush_int() at end of cell_create_context() Ensures that SPUs are initialized/ready before proceeding. This fixes a spurious assertion failure when the SPU-side shader function info hasn't been returned to the PPU before shader codegen. --- src/gallium/drivers/cell/ppu/cell_context.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 30ce6f97624..35cd6874a27 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -162,5 +162,8 @@ cell_create_context(struct pipe_screen *screen, cell_init_batch_buffers(cell); + /* make sure SPU initializations are done before proceeding */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + return &cell->pipe; } -- cgit v1.2.3 From adeed0f90fdd46ea139d5c4b3b75d5dc79b2a0c7 Mon Sep 17 00:00:00 2001 From: Robert Ellison <papillo@tungstengraphics.com> Date: Fri, 10 Oct 2008 14:13:13 -0600 Subject: CELL: fixing stencil bugs These are the defects found and fixed so far. Several more have been observed; I'm working on them. - Fixed an error in spe_load_uint() that caused incorrect values to be loaded if the given unsigned value had the low 18 bits as 0, and that caused inefficient code to be emitted if the given value had the high 14 bits as 0. - Fixed a problem in stencil code generation where optional registers weren't tracked correctly. - Fixed a problem that the stencil function NEVER was acting as ALWAYS. - Fixed several problems that could occur if stenciling were enabled but depth was disabled. - Fixed a problem with two-sided stencil writemask handling that could cause a stencil writemask to not be applied. - Fixed several state permutations that were incorrectly flagged as not requiring stencil values to be calculated. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 4 +- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 88 +++++++++++++++++++----- 2 files changed, 72 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index cc35f0ba5b0..9bf3b9bf0ca 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -727,7 +727,7 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) * Bytes Immediate (fsmbi) to load the value in a single instruction. * Otherwise, in the general case, we have to use ilhu followed by iohl. */ - if ((ui & 0x3ffff) == ui) { + if ((ui & 0x0003ffff) == ui) { spe_ila(p, rT, ui); } else if ((ui >> 16) == (ui & 0xffff)) { @@ -764,7 +764,7 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) } /** - * This function is constructed identically to spe_sor_uint() below. + * This function is constructed identically to spe_xor_uint() below. * Changes to one should be made in the other. */ void diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index de170d1036c..4e1e53ecdc7 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -247,6 +247,7 @@ setup_optional_register(struct spe_function *f, boolean *is_already_set, unsigne { if (*is_already_set) return; *r = spe_allocate_available_register(f); + *is_already_set = true; } static inline void @@ -1157,7 +1158,6 @@ gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state, /* stencil_pass = mask & (s == reference) */ spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); - /* stencil_fail = mask & ~stencil_pass */ break; case PIPE_FUNC_NOTEQUAL: @@ -1207,7 +1207,6 @@ gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state, case PIPE_FUNC_NEVER: /* stencil_pass = mask & 0 = 0 */ spe_load_uint(f, stencil_pass_reg, 0); - spe_move(f, stencil_pass_reg, mask_reg); /* zmask = mask */ break; case PIPE_FUNC_ALWAYS: @@ -1483,6 +1482,10 @@ gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_a } /* End of calculations for back-facing stencil */ } +/* Note that fbZ_reg may *not* be set on entry, if in fact + * the depth test is not enabled. This function must not use + * the register if depth is not enabled. + */ static boolean gen_stencil_depth_test(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa, @@ -1522,6 +1525,7 @@ gen_stencil_depth_test(struct spe_function *f, * have to spend the complexity to track the more difficult variant * register usage scenarios. */ + spe_comment(f, 0, "Allocating stencil register set"); spe_allocate_register_set(f); /* Calculate the writemask. If the writemask is trivial (either @@ -1538,7 +1542,7 @@ gen_stencil_depth_test(struct spe_function *f, need_to_calculate_stencil_values = false; need_to_writemask_stencil_values = false; } - else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0xff)) { /* Still trivial, but a little less so. We need to write the stencil * values, but we don't need to mask them. */ @@ -1556,10 +1560,12 @@ gen_stencil_depth_test(struct spe_function *f, * writemask, we'll have to generate code that merges the * two masks into a single effective mask based on fragment facing. */ + spe_comment(f, 0, "Computing stencil writemask"); stencil_writemask_reg = spe_allocate_available_register(f); spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask); if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) { unsigned int back_write_mask_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Resolving two-sided stencil writemask"); spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask); spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg); spe_release_register(f, back_write_mask_reg); @@ -1575,6 +1581,7 @@ gen_stencil_depth_test(struct spe_function *f, * This test will *not* change the value in mask_reg (because we don't * yet know whether to apply the two-sided stencil or one-sided stencil). */ + spe_comment(f, 0, "Running basic stencil test"); stencil_pass_reg = spe_allocate_available_register(f); gen_stencil_test(f, &dsa->stencil[0], mask_reg, fbS_reg, stencil_pass_reg); @@ -1584,6 +1591,7 @@ gen_stencil_depth_test(struct spe_function *f, */ if (dsa->stencil[1].enabled) { unsigned int temp_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Running backface stencil test"); gen_stencil_test(f, &dsa->stencil[1], mask_reg, fbS_reg, temp_reg); spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg); spe_release_register(f, temp_reg); @@ -1597,6 +1605,7 @@ gen_stencil_depth_test(struct spe_function *f, * stencil test, and because the depth test will update the * mask of valid fragments based on the results of the depth test). */ + spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); stencil_fail_reg = spe_allocate_available_register(f); spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); /* Now remove the stenciled-out pixels from the valid fragment mask, @@ -1623,6 +1632,7 @@ gen_stencil_depth_test(struct spe_function *f, * This function will allocate a variant number of registers that * will be released as part of the register set. */ + spe_comment(f, 0, "Computing stencil values"); gen_get_stencil_values(f, dsa, fbS_reg, &front_stencil_fail_values, &front_stencil_pass_depth_fail_values, &front_stencil_pass_depth_pass_values, &back_stencil_fail_values, @@ -1652,6 +1662,7 @@ gen_stencil_depth_test(struct spe_function *f, stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values; } else { /* two-sided stencil enabled */ + spe_comment(f, 0, "Resolving backface stencil values"); /* Allocate new registers for the needed merged values */ stencil_fail_values = spe_allocate_available_register(f); spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg); @@ -1678,11 +1689,13 @@ gen_stencil_depth_test(struct spe_function *f, * on the results of the test. */ if (dsa->depth.enabled) { + spe_comment(f, 0, "Running stencil depth test"); zmask_reg = spe_allocate_available_register(f); modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); } if (need_to_calculate_stencil_values) { + /* If we need to writemask the stencil values before going into * the stencil buffer, we'll have to use a new register to * hold the new values. If not, we can just keep using the @@ -1690,8 +1703,8 @@ gen_stencil_depth_test(struct spe_function *f, */ if (need_to_writemask_stencil_values) { newS_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Saving current stencil values for writemasking"); spe_move(f, newS_reg, fbS_reg); - modified_buffers = true; } else { newS_reg = fbS_reg; @@ -1699,7 +1712,9 @@ gen_stencil_depth_test(struct spe_function *f, /* Merge in the selected stencil fail values */ if (stencil_fail_values != fbS_reg) { + spe_comment(f, 0, "Loading stencil fail values"); spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); + modified_buffers = true; } /* Same for the stencil pass/depth fail values. If this calculation @@ -1714,20 +1729,36 @@ gen_stencil_depth_test(struct spe_function *f, * set above if we're here. */ unsigned int stencil_pass_depth_fail_mask = spe_allocate_available_register(f); + spe_comment(f, 0, "Loading stencil pass/depth fail values"); spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, stencil_pass_depth_fail_mask); spe_release_register(f, stencil_pass_depth_fail_mask); + modified_buffers = true; } - /* Same for the stencil pass/depth pass mask */ + /* Same for the stencil pass/depth pass mask. Note that we + * *can* get here with zmask_reg being unset (if the depth + * test is off but the stencil test is on). In this case, + * we assume the depth test passes, and don't need to mask + * the stencil pass mask with the Z mask. + */ if (stencil_pass_depth_pass_values != fbS_reg) { - unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f); - spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); - - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); - spe_release_register(f, stencil_pass_depth_pass_mask); + if (dsa->depth.enabled) { + unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f); + /* We'll need a separate register */ + spe_comment(f, 0, "Loading stencil pass/depth pass values"); + spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); + spe_release_register(f, stencil_pass_depth_pass_mask); + } + else { + /* We can use the same stencil-pass register */ + spe_comment(f, 0, "Loading stencil pass values"); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); + } + modified_buffers = true; } /* Almost done. If we need to writemask, do it now, leaving the @@ -1736,14 +1767,16 @@ gen_stencil_depth_test(struct spe_function *f, * so there's nothing more to do. */ - if (need_to_writemask_stencil_values) { + if (need_to_writemask_stencil_values && modified_buffers) { /* The Select Bytes command makes a fine writemask. Where * the mask is 0, the first (original) values are retained, * effectively masking out changes. Where the mask is 1, the * second (new) values are retained, incorporating changes. */ + spe_comment(f, 0, "Writemasking new stencil values"); spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); } + } /* done calculating stencil values */ /* The stencil and/or depth values have been applied, and the @@ -1752,6 +1785,7 @@ gen_stencil_depth_test(struct spe_function *f, * of registers that we didn't bother tracking. Release all * those registers as part of the register set, and go home. */ + spe_comment(f, 0, "Releasing stencil register set"); spe_release_register_set(f); /* Return true if we could have modified the stencil and/or @@ -1869,7 +1903,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) boolean fbS_reg_set = false, fbZ_reg_set = false; unsigned int fbS_reg, fbZ_reg = 0; - spe_comment(f, 0, "Fetch quad's Z/stencil values from tile"); + spe_comment(f, 0, "Fetching Z/stencil quad from tile"); /* fetch quad of depth/stencil values from tile at (x,y) */ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ @@ -1973,13 +2007,18 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) * tests. */ ASSERT(fbS_reg_set); - ASSERT(fbZ_reg_set); spe_comment(f, 0, "Perform stencil test"); + /* Note that fbZ_reg may not be set on entry, if stenciling + * is enabled but there's no Z-buffer. The + * gen_stencil_depth_test() function must ignore the + * fbZ_reg register if depth is not enabled. + */ write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg); } else if (dsa->depth.enabled) { int zmask_reg = spe_allocate_available_register(f); + ASSERT(fbZ_reg_set); spe_comment(f, 0, "Perform depth test"); write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); spe_release_register(f, zmask_reg); @@ -1996,26 +2035,39 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_comment(f, 0, "Store quad's depth/stencil values in tile"); if (zs_format == PIPE_FORMAT_S8Z24_UNORM || zs_format == PIPE_FORMAT_X8Z24_UNORM) { - if (fbS_reg_set) { + if (fbS_reg_set && fbZ_reg_set) { spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ } + else if (fbS_reg_set) { + spe_shli(f, fbZS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + } else { spe_move(f, fbZS_reg, fbZ_reg); } } else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || zs_format == PIPE_FORMAT_Z24X8_UNORM) { - spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ - if (fbS_reg_set) { + if (fbS_reg_set && fbZ_reg_set) { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ } + else if (fbS_reg_set) { + spe_move(f, fbZS_reg, fbS_reg); + } + else { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ + } } else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + if (fbZ_reg_set) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } } else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + if (fbZ_reg_set) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } } else if (zs_format == PIPE_FORMAT_S8_UNORM) { ASSERT(0); /* XXX to do */ -- cgit v1.2.3 From 53ae243869a9e1ff0f2b1c559ec51adff867b970 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 14:34:43 -0600 Subject: cell: fix function prologue/epilogue code for large stack frames The ai instruction is limited to a 10-bit signed immediate value. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 44 +++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index db54c7e57b5..3d0e7976dfb 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -369,18 +369,36 @@ store_dest_reg(struct codegen *gen, static void emit_prologue(struct codegen *gen) { - gen->frame_size = 1024; /* XXX temporary */ + gen->frame_size = 1024; /* XXX temporary, should be dynamic */ spe_comment(gen->f, -4, "Function prologue:"); /* save $lr on stack # stqd $lr,16($sp) */ spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - /* save stack pointer # stqd $sp,-frameSize($sp) */ - spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + if (gen->frame_size >= 512) { + /* offset is too large for ai instruction */ + int offset_reg = spe_allocate_available_register(gen->f); + int sp_reg = spe_allocate_available_register(gen->f); + /* offset = -framesize */ + spe_load_int(gen->f, offset_reg, -gen->frame_size); + /* sp = $sp */ + spe_move(gen->f, sp_reg, SPE_REG_SP); + /* $sp = $sp + offset_reg */ + spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); + /* save $sp in stack frame */ + spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); + /* clean up */ + spe_release_register(gen->f, offset_reg); + spe_release_register(gen->f, sp_reg); + } + else { + /* save stack pointer # stqd $sp,-frameSize($sp) */ + spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); - /* adjust stack pointer # ai $sp,$sp,-frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + /* adjust stack pointer # ai $sp,$sp,-frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + } } @@ -389,8 +407,20 @@ emit_epilogue(struct codegen *gen) { spe_comment(gen->f, -4, "Function epilogue:"); - /* restore stack pointer # ai $sp,$sp,frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); + if (gen->frame_size >= 512) { + /* offset is too large for ai instruction */ + int offset_reg = spe_allocate_available_register(gen->f); + /* offset = framesize */ + spe_load_int(gen->f, offset_reg, gen->frame_size); + /* $sp = $sp + offset */ + spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); + /* clean up */ + spe_release_register(gen->f, offset_reg); + } + else { + /* restore stack pointer # ai $sp,$sp,frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); + } /* restore $lr # lqd $lr,16($sp) */ spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); -- cgit v1.2.3 From 01e312a73b68dc5ddffca0d1b1472fc5dcb6f59e Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 16:36:40 -0600 Subject: cell: pass texture unit (sampler number) to txp() function The glsl/multitex demo runs now. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++++ src/gallium/drivers/cell/spu/spu_funcs.c | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 3d0e7976dfb..ef84059d8f5 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1285,9 +1285,12 @@ static boolean emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) { const uint addr = lookup_function(gen->cell, "spu_txp"); + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; int ch; int coord_regs[4], d_regs[4]; + assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + spe_comment(gen->f, -4, "CALL txp:"); /* get src/dst reg info */ @@ -1314,6 +1317,7 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) for (i = 0; i < 4; i++) { spe_move(gen->f, 3 + i, coord_regs[i]); } + spe_load_uint(gen->f, 7, unit); /* sampler unit */ /* branch to function, save return addr */ spe_brasl(gen->f, SPE_REG_RA, addr); diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index c7bcb3de9dd..7dd7fcd253a 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -101,9 +101,10 @@ spu_log2(vector float x) } static struct vec_4x4 -spu_txp(vector float s, vector float t, vector float r, vector float q) +spu_txp(vector float s, vector float t, vector float r, vector float q, + unsigned unit) { - const uint unit = 0; + //const uint unit = 0; struct vec_4x4 colors; vector float coords[4]; -- cgit v1.2.3 From ecac7996d4c5a1e492ce97c5f5cac885941fc711 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Fri, 10 Oct 2008 17:48:16 -0600 Subject: cell: more instruction scheduling optimizations (MIN/MAX/LERP/etc) Also, optimize register->memory stores. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 201 ++++++++++++++++++++--------- 1 file changed, 140 insertions(+), 61 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index ef84059d8f5..68093d9e83d 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -77,7 +77,7 @@ struct codegen /** Per-instruction temps / intermediate temps */ int num_itemps; - int itemps[10]; + int itemps[12]; /** Current IF/ELSE/ENDIF nesting level */ int if_nesting; @@ -167,6 +167,37 @@ get_exec_mask_reg(struct codegen *gen) } +static boolean +is_register_src(struct codegen *gen, int channel, + const struct tgsi_full_src_register *src) +{ + int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + + if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { + return FALSE; + } + if (src->SrcRegister.File == TGSI_FILE_TEMPORARY || + src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + return TRUE; + } + return FALSE; +} + + +static boolean +is_memory_dst(struct codegen *gen, int channel, + const struct tgsi_full_dst_register *dst) +{ + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + return TRUE; + } + else { + return FALSE; + } +} + + /** * Return the index of the SPU temporary containing the named TGSI * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we @@ -226,11 +257,6 @@ get_src_reg(struct codegen *gen, spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); } break; - case TGSI_FILE_SAMPLER: - { - reg = 3; /* XXX total hack */ - } - break; default: assert(0); } @@ -257,7 +283,7 @@ get_src_reg(struct codegen *gen, } /* mask with bit 31 set, the rest cleared */ - spe_load_int(gen->f, bit31mask_reg, (1 << 31)); + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); if (sign_op == TGSI_UTIL_SIGN_CLEAR) { spe_andc(gen->f, result_reg, reg, bit31mask_reg); @@ -434,6 +460,7 @@ static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, src_reg[4], dst_reg[4]; + spe_comment(gen->f, -4, "MOV:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { @@ -441,11 +468,18 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); } } + for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - /* XXX we don't always need to actually emit a mov instruction here */ - spe_move(gen->f, dst_reg[ch], src_reg[ch]); - store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); + if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && + is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { + /* special-case: register to memory store */ + store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); + } + else { + spe_move(gen->f, dst_reg[ch], src_reg[ch]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); + } free_itemps(gen); } } @@ -546,23 +580,37 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; - int tmp_reg = get_itemp(gen); + int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; spe_comment(gen->f, -4, "LERP:"); + /* setup/get src/dst/temp regs */ for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + } - /* d = s3 + s1(s2 - s3) */ - spe_fs(gen->f, tmp_reg, s2_reg, s3_reg); - spe_fma(gen->f, d_reg, tmp_reg, s1_reg, s3_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + /* d = s3 + s1(s2 - s3) */ + /* do all subtracts, then all fma, then all stores to better pipeline */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } } + free_itemps(gen); return true; } @@ -651,7 +699,7 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) const int bit31mask_reg = get_itemp(gen); /* mask with bit 31 set, the rest cleared */ - spe_load_int(gen->f, bit31mask_reg, (1 << 31)); + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); /* d = sign bit cleared in s1 */ spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg); @@ -714,35 +762,41 @@ static boolean emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; + int s0x_reg, s0y_reg, s0z_reg, s0w_reg; + int s1x_reg, s1y_reg, s1z_reg, s1w_reg; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + spe_comment(gen->f, -4, "DP4:"); - int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); - int tmp_reg = get_itemp(gen); + s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); + s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); - /* t = x0 * x1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + /* t0 = x0 * x1 */ + spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); - s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); - /* t = y0 * y1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + /* t1 = y0 * y1 */ + spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); - s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); - /* t = z0 * z1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + /* t0 = z0 * z1 + t0 */ + spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); - s1_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); - s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); - /* t = w0 * w1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + /* t1 = w0 * w1 + t1 */ + spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - spe_move(gen->f, d_reg, tmp_reg); - store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); } } @@ -756,6 +810,7 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) { + /* XXX rewrite this function to look more like DP3/DP4 */ int ch; spe_comment(gen->f, -4, "DPH:"); @@ -1357,26 +1412,38 @@ emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; spe_comment(gen->f, -4, "MAX:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int tmp_reg = get_itemp(gen); + s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + } - /* d = (s1 > s2) ? s1 : s2 */ - spe_fcgt(gen->f, tmp_reg, s1_reg, s2_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg); + /* d = (s0 > s1) ? s0 : s1 */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); + } + } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } } + free_itemps(gen); return true; } @@ -1386,26 +1453,38 @@ emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; spe_comment(gen->f, -4, "MIN:"); for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int tmp_reg = get_itemp(gen); + s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + } - /* d = (s2 > s1) ? s1 : s2 */ - spe_fcgt(gen->f, tmp_reg, s2_reg, s1_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg); + /* d = (s1 > s0) ? s0 : s1 */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); + } + } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } } + free_itemps(gen); return true; } -- cgit v1.2.3 From 734685549ca7dbee78845fdef1d65aceaa729845 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 10:54:11 -0600 Subject: cell: added spu_unpack_A8R8G8B8_transpose4() Plus, clearer shuffle masks in other funcs. --- src/gallium/drivers/cell/spu/spu_colorpack.h | 49 ++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h index fd8dc6ded3e..d7ce0055248 100644 --- a/src/gallium/drivers/cell/spu/spu_colorpack.h +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -31,6 +31,7 @@ #define SPU_COLORPACK_H +#include <transpose_matrix4x4.h> #include <spu_intrinsics.h> @@ -84,10 +85,10 @@ spu_unpack_B8G8R8A8(uint color) vector unsigned int color_u4 = spu_splats(color); color_u4 = spu_shuffle(color_u4, color_u4, ((vector unsigned char) { - 10, 10, 10, 10, - 5, 5, 5, 5, + 2, 2, 2, 2, + 1, 1, 1, 1, 0, 0, 0, 0, - 15, 15, 15, 15}) ); + 3, 3, 3, 3}) ); return spu_convtf(color_u4, 32); } @@ -98,13 +99,47 @@ spu_unpack_A8R8G8B8(uint color) vector unsigned int color_u4 = spu_splats(color); color_u4 = spu_shuffle(color_u4, color_u4, ((vector unsigned char) { - 5, 5, 5, 5, - 10, 10, 10, 10, - 15, 15, 15, 15, + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3, 0, 0, 0, 0}) ); - return spu_convtf(color_u4, 32); } +/** + * \param color_in - array of 32-bit packed ARGB colors + * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order + */ +static INLINE void +spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4], + vector float color_out[4]) +{ + vector unsigned int c0; + + c0 = spu_shuffle(color_in[0], color_in[0], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[0] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[1], color_in[1], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[1] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[2], color_in[2], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[2] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[3], color_in[3], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[3] = spu_convtf(c0, 32); + + _transpose_matrix4x4(color_out, color_out); +} + + + #endif /* SPU_COLORPACK_H */ -- cgit v1.2.3 From 3b07c28dee74c7aa3be5efac8084d610675af291 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 10:55:08 -0600 Subject: cell: do texture sampling/filtering for four pixels at a time. --- src/gallium/drivers/cell/spu/spu_command.c | 11 ++- src/gallium/drivers/cell/spu/spu_funcs.c | 4 + src/gallium/drivers/cell/spu/spu_main.h | 19 ++++- src/gallium/drivers/cell/spu/spu_texture.c | 125 ++++++++++++++++++++++++++++- src/gallium/drivers/cell/spu/spu_texture.h | 12 +++ 5 files changed, 161 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 91a4c137e7c..c59be7defde 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -301,10 +301,14 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; - if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) + if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { spu.sample_texture[sampler->unit] = sample_texture_bilinear; - else + spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; + } + else { spu.sample_texture[sampler->unit] = sample_texture_nearest; + spu.sample_texture4[sampler->unit] = sample_texture4_nearest; + } } @@ -323,6 +327,9 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].width = width; spu.texture[unit].height = height; + spu.texture[unit].width4 = spu_splats((float) width); + spu.texture[unit].height4 = spu_splats((float) height); + spu.texture[unit].tiles_per_row = width / TILE_SIZE; spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 7dd7fcd253a..13c234ea2eb 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -106,6 +106,7 @@ spu_txp(vector float s, vector float t, vector float r, vector float q, { //const uint unit = 0; struct vec_4x4 colors; +#if 0 vector float coords[4]; coords[0] = s; @@ -121,6 +122,9 @@ spu_txp(vector float s, vector float t, vector float r, vector float q, colors.v[3] = spu.sample_texture[unit](unit, coords[3]); _transpose_matrix4x4(colors.v, colors.v); +#else + spu.sample_texture4[unit](s, t, r, q, unit, colors.v); +#endif return colors; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 82c9c69a3a8..5d14be51c25 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -67,6 +67,14 @@ typedef union { typedef vector float (*spu_sample_texture_func)(uint unit, vector float texcoord); +typedef void (*spu_sample_texture4_func)(vector float s, + vector float t, + vector float r, + vector float q, + uint unit, + vector float colors[4]); + + /** Function for performing per-fragment ops */ typedef void (*spu_fragment_ops_func)(uint x, uint y, tile_t *colorTile, @@ -107,10 +115,12 @@ struct spu_texture void *start; ushort width, height; ushort tiles_per_row; - vector float tex_size; - vector unsigned int tex_size_mask; /**< == int(size - 1) */ - vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ - vector unsigned int tex_size_y_mask; /**< == int(size - 1) */ + vector float tex_size; /**< == {width, height, 0, 0} */ + vector float width4; /**< == {width, width, width, width} */ + vector float height4; /**< == {height, height, height, height} */ + vector unsigned int tex_size_mask; /**< == {width-1, height-1, 0, 0 } */ + vector unsigned int tex_size_x_mask; /**< splat(width-1) */ + vector unsigned int tex_size_y_mask; /**< splat(height-1) */ } ALIGN16_ATTRIB; @@ -159,6 +169,7 @@ struct spu_global /** Current texture sampler function */ spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; + spu_sample_texture4_func sample_texture4[CELL_MAX_SAMPLERS]; /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 117b8a36f80..12e6ed1ba1e 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -26,6 +26,8 @@ **************************************************************************/ +#include <transpose_matrix4x4.h> + #include "pipe/p_compiler.h" #include "spu_main.h" #include "spu_texture.h" @@ -91,10 +93,10 @@ static void get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) { const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; - vec_uint4 tile_x = spu_rlmask(x, -5); - vec_uint4 tile_y = spu_rlmask(y, -5); - const qword offset_x = si_andi((qword) x, 0x1f); - const qword offset_y = si_andi((qword) y, 0x1f); + vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ + const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); @@ -132,6 +134,31 @@ sample_texture_nearest(uint unit, vector float texcoord) } +/** + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture4_nearest(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]) +{ + vector float ss = spu_mul(s, spu.texture[unit].width4); + vector float tt = spu_mul(t, spu.texture[unit].height4); + vector unsigned int is = spu_convtu(ss, 0); + vector unsigned int it = spu_convtu(tt, 0); + vec_uint4 texels[4]; + + /* GL_REPEAT wrap mode: */ + is = spu_and(is, spu.texture[unit].tex_size_x_mask); + it = spu_and(it, spu.texture[unit].tex_size_y_mask); + + get_four_texels(unit, is, it, texels); + + /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ + spu_unpack_A8R8G8B8_transpose4(texels, colors); +} + + vector float sample_texture_bilinear(uint unit, vector float texcoord) { @@ -198,3 +225,93 @@ sample_texture_bilinear(uint unit, vector float texcoord) return texel_sum; } + + +void +sample_texture4_bilinear(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]) +{ + vector float ss = spu_madd(s, spu.texture[unit].width4, spu_splats(-0.5f)); + vector float tt = spu_madd(t, spu.texture[unit].height4, spu_splats(-0.5f)); + + vector unsigned int is0 = spu_convtu(ss, 0); + vector unsigned int it0 = spu_convtu(tt, 0); + + /* is + 1, it + 1 */ + vector unsigned int is1 = spu_add(is0, 1); + vector unsigned int it1 = spu_add(it0, 1); + + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask); + + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(unit, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, is1, it1, texels + 12); /* lower-right */ + + /* XXX possibly rework following code to compute the weighted sample + * colors with integer arithmetic for fewer int->float conversions. + */ + + /* convert packed int texels to float colors */ + vector float ftexels[16]; + spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); + spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); + spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); + spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); + vector signed int iss1024 = spu_convts(ss1024, 0); + iss1024 = spu_and(iss1024, 1023); + vector float sWeights0 = spu_convtf(iss1024, 10); + + vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); + vector signed int itt1024 = spu_convts(tt1024, 0); + itt1024 = spu_and(itt1024, 1023); + vector float tWeights0 = spu_convtf(itt1024, 10); + + /* 1 - sWeight and 1 - tWeight */ + vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); + vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); + + /* reds, for four pixels */ + ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), + spu_add(ftexels[8], ftexels[12])); + + /* greens, for four pixels */ + ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), + spu_add(ftexels[9], ftexels[13])); + + /* blues, for four pixels */ + ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), + spu_add(ftexels[10], ftexels[14])); + + /* alphas, for four pixels */ + ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), + spu_add(ftexels[11], ftexels[15])); +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index f7c9738be88..f019e7d8eff 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -40,8 +40,20 @@ extern vector float sample_texture_nearest(uint unit, vector float texcoord); +extern void +sample_texture4_nearest(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]); + + extern vector float sample_texture_bilinear(uint unit, vector float texcoord); +extern void +sample_texture4_bilinear(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]); + + #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From c8fb3682619ea49c5fefdf8b88cdb95eac7478ff Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 11:16:04 -0600 Subject: cell: remove old texture code --- src/gallium/drivers/cell/spu/spu_command.c | 2 - src/gallium/drivers/cell/spu/spu_funcs.c | 19 ------- src/gallium/drivers/cell/spu/spu_main.h | 4 -- src/gallium/drivers/cell/spu/spu_texture.c | 88 ++---------------------------- src/gallium/drivers/cell/spu/spu_texture.h | 8 --- src/gallium/drivers/cell/spu/spu_tri.c | 67 +---------------------- 6 files changed, 7 insertions(+), 181 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index c59be7defde..d4cc9a21467 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -302,11 +302,9 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) spu.sampler[sampler->unit] = sampler->state; if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { - spu.sample_texture[sampler->unit] = sample_texture_bilinear; spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; } else { - spu.sample_texture[sampler->unit] = sample_texture_nearest; spu.sample_texture4[sampler->unit] = sample_texture4_nearest; } } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 13c234ea2eb..4c90b701ee7 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -104,27 +104,8 @@ static struct vec_4x4 spu_txp(vector float s, vector float t, vector float r, vector float q, unsigned unit) { - //const uint unit = 0; struct vec_4x4 colors; -#if 0 - vector float coords[4]; - - coords[0] = s; - coords[1] = t; - coords[2] = r; - coords[3] = q; - _transpose_matrix4x4(coords, coords); - - /* get four texture samples */ - colors.v[0] = spu.sample_texture[unit](unit, coords[0]); - colors.v[1] = spu.sample_texture[unit](unit, coords[1]); - colors.v[2] = spu.sample_texture[unit](unit, coords[2]); - colors.v[3] = spu.sample_texture[unit](unit, coords[3]); - - _transpose_matrix4x4(colors.v, colors.v); -#else spu.sample_texture4[unit](s, t, r, q, unit, colors.v); -#endif return colors; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 5d14be51c25..2a8cb00f8df 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -64,9 +64,6 @@ typedef union { /** Function for sampling textures */ -typedef vector float (*spu_sample_texture_func)(uint unit, - vector float texcoord); - typedef void (*spu_sample_texture4_func)(vector float s, vector float t, vector float r, @@ -168,7 +165,6 @@ struct spu_global spu_fragment_program_func fragment_program; /** Current texture sampler function */ - spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; spu_sample_texture4_func sample_texture4[CELL_MAX_SAMPLERS]; /** Fragment program constants */ diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 12e6ed1ba1e..ba62ad27fd9 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -120,21 +120,9 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) } -/** - * Get texture sample at texcoord. - */ -vector float -sample_texture_nearest(uint unit, vector float texcoord) -{ - vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); - vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ - itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */ - uint texel = get_texel(unit, itc); - return spu_unpack_A8R8G8B8(texel); -} - /** + * Do nearest texture sampling for four pixels. * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). */ void @@ -148,7 +136,7 @@ sample_texture4_nearest(vector float s, vector float t, vector unsigned int it = spu_convtu(tt, 0); vec_uint4 texels[4]; - /* GL_REPEAT wrap mode: */ + /* PIPE_TEX_WRAP_REPEAT */ is = spu_and(is, spu.texture[unit].tex_size_x_mask); it = spu_and(it, spu.texture[unit].tex_size_y_mask); @@ -159,74 +147,10 @@ sample_texture4_nearest(vector float s, vector float t, } -vector float -sample_texture_bilinear(uint unit, vector float texcoord) -{ - static const vec_uint4 offset_x = {0, 0, 1, 1}; - static const vec_uint4 offset_y = {0, 1, 0, 1}; - - vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); - tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ - - /* integer texcoords S,T: */ - vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */ - - vec_uint4 texels[4]; - - /* setup texcoords for quad: - * +-----+-----+ - * |x0,y0|x1,y1| - * +-----+-----+ - * |x2,y2|x3,y3| - * +-----+-----+ - */ - vec_uint4 x = spu_splats(spu_extract(itc, 0)); - vec_uint4 y = spu_splats(spu_extract(itc, 1)); - x = spu_add(x, offset_x); - y = spu_add(y, offset_y); - - /* GL_REPEAT wrap mode: */ - x = spu_and(x, spu.texture[unit].tex_size_x_mask); - y = spu_and(y, spu.texture[unit].tex_size_y_mask); - - get_four_texels(unit, x, y, texels); - - /* integer A8R8G8B8 to float texel conversion */ - vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); - vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); - vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0)); - vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0)); - - - /* Compute weighting factors in [0,1] - * Multiply texcoord by 1024, AND with 1023, convert back to float. - */ - vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); - vector signed int itc1024 = spu_convts(tc1024, 0); - itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); - vector float weight = spu_convtf(itc1024, 10); - - /* smeared frac and 1-frac */ - vector float sfrac = spu_splats(spu_extract(weight, 0)); - vector float tfrac = spu_splats(spu_extract(weight, 1)); - vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); - vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); - - /* multiply the samples (colors) by the S/T weights */ - texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); - texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); - texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); - texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); - - /* compute sum of weighted samples */ - vector float texel_sum = spu_add(texel00, texel01); - texel_sum = spu_add(texel_sum, texel10); - texel_sum = spu_add(texel_sum, texel11); - - return texel_sum; -} - - +/** + * Do bilinear texture sampling for four pixels. + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index f019e7d8eff..d576aed7191 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -36,20 +36,12 @@ extern void invalidate_tex_cache(void); -extern vector float -sample_texture_nearest(uint unit, vector float texcoord); - - extern void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]); -extern vector float -sample_texture_bilinear(uint unit, vector float texcoord); - - extern void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index a62d4f0f2ff..022d21ba8f2 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -286,72 +286,7 @@ emit_quad( int x, int y, mask_t mask) spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; - if (0/*spu.texture[0].start*/) { - /* - * Temporary texture mapping path - * This will go away when fragment programs support TEX inst. - */ - const uint unit = 0; - vector float colors[4]; - vector float texcoords[4]; - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors[0] = spu.sample_texture[unit](unit, texcoords[0]); - if (spu_extract(mask, 1)) - colors[1] = spu.sample_texture[unit](unit, texcoords[1]); - if (spu_extract(mask, 2)) - colors[2] = spu.sample_texture[unit](unit, texcoords[2]); - if (spu_extract(mask, 3)) - colors[3] = spu.sample_texture[unit](unit, texcoords[3]); - - - if (spu.texture[1].start) { - /* multi-texture mapping */ - const uint unit = 1; - vector float colors1[4]; - - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors1[0] = spu.sample_texture[unit](unit, texcoords[0]); - if (spu_extract(mask, 1)) - colors1[1] = spu.sample_texture[unit](unit, texcoords[1]); - if (spu_extract(mask, 2)) - colors1[2] = spu.sample_texture[unit](unit, texcoords[2]); - if (spu_extract(mask, 3)) - colors1[3] = spu.sample_texture[unit](unit, texcoords[3]); - - /* hack: modulate first texture by second */ - colors[0] = spu_mul(colors[0], colors1[0]); - colors[1] = spu_mul(colors[1], colors1[1]); - colors[2] = spu_mul(colors[2], colors1[2]); - colors[3] = spu_mul(colors[3], colors1[3]); - } - - { - /* Convert fragment data from AoS to SoA format. - * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) - * This is temporary! - */ - vector float soa_frag[4]; - _transpose_matrix4x4(soa_frag, colors); - - vector float fragZ = eval_z((float) x, (float) y); - - /* Do all per-fragment/quad operations here, including: - * alpha test, z test, stencil test, blend and framebuffer writing. - */ - spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, - fragZ, - soa_frag[0], soa_frag[1], - soa_frag[2], soa_frag[3], - mask, - setup.facing); - } - - } - else { + { /* * Run fragment shader, execute per-fragment ops, update fb/tile. */ -- cgit v1.2.3 From 67425aaa09df9cab76d7cc5c66e9e4595f0ccf40 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 14:09:54 -0600 Subject: cell: bilinear texture filtering using integer arithmetic Fewer float/int conversions involved. --- src/gallium/drivers/cell/spu/spu_texture.c | 144 +++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_texture.h | 5 + 2 files changed, 149 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index ba62ad27fd9..c10268131d5 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -239,3 +239,147 @@ sample_texture4_bilinear(vector float s, vector float t, colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), spu_add(ftexels[11], ftexels[15])); } + + + +/** + * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h + */ +static INLINE void +transpose(vector unsigned int *mOut, vector unsigned int *mIn) +{ + vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */ + vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */ + vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */ + + vector unsigned char shufflehi = ((vector unsigned char) { + 0x00, 0x01, 0x02, 0x03, + 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, + 0x14, 0x15, 0x16, 0x17}); + vector unsigned char shufflelo = ((vector unsigned char) { + 0x08, 0x09, 0x0A, 0x0B, + 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x1C, 0x1D, 0x1E, 0x1F}); + abcd = *(mIn+0); + efgh = *(mIn+1); + ijkl = *(mIn+2); + mnop = *(mIn+3); + + aibj = spu_shuffle(abcd, ijkl, shufflehi); + ckdl = spu_shuffle(abcd, ijkl, shufflelo); + emfn = spu_shuffle(efgh, mnop, shufflehi); + gohp = spu_shuffle(efgh, mnop, shufflelo); + + aeim = spu_shuffle(aibj, emfn, shufflehi); + bfjn = spu_shuffle(aibj, emfn, shufflelo); + cgko = spu_shuffle(ckdl, gohp, shufflehi); + dhlp = spu_shuffle(ckdl, gohp, shufflelo); + + *(mOut+0) = aeim; + *(mOut+1) = bfjn; + *(mOut+2) = cgko; + *(mOut+3) = dhlp; +} + + +/** + * Bilinear filtering, using int intead of float arithmetic + */ +void +sample_texture4_bilinear_2(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]) +{ + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + /* Scale texcoords by size of texture, and add half pixel bias */ + vector float ss = spu_madd(s, spu.texture[unit].width4, half); + vector float tt = spu_madd(t, spu.texture[unit].height4, half); + + /* convert float coords to fixed-pt coords with 8 fraction bits */ + vector unsigned int is = spu_convtu(ss, 8); + vector unsigned int it = spu_convtu(tt, 8); + + /* compute integer texel weights in [0, 255] */ + vector signed int sWeights0 = spu_and((vector signed int) is, 255); + vector signed int tWeights0 = spu_and((vector signed int) it, 255); + vector signed int sWeights1 = spu_sub(255, sWeights0); + vector signed int tWeights1 = spu_sub(255, tWeights0); + + /* texel coords: is0 = is / 256, it0 = is / 256 */ + vector unsigned int is0 = spu_rlmask(is, -8); + vector unsigned int it0 = spu_rlmask(it, -8); + + /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ + vector unsigned int is1 = spu_add(is0, 1); + vector unsigned int it1 = spu_add(it0, 1); + + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask); + + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(unit, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, is1, it1, texels + 12); /* lower-right */ + + /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ + { + static const unsigned char ZERO = 0x80; + int i; + for (i = 0; i < 16; i++) { + texels[i] = spu_shuffle(texels[i], texels[i], + ((vector unsigned char) { + ZERO, ZERO, ZERO, 1, + ZERO, ZERO, ZERO, 2, + ZERO, ZERO, ZERO, 3, + ZERO, ZERO, ZERO, 0})); + } + } + + /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ + transpose(texels + 0, texels + 0); + transpose(texels + 4, texels + 4); + transpose(texels + 8, texels + 8); + transpose(texels + 12, texels + 12); + + /* computed weighted colors */ + vector unsigned int c0, c1, c2, c3, cSum; + + /* red */ + c0 = (vector unsigned int) si_mpyu((qword) texels[ 0], si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpyu((qword) texels[ 4], si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpyu((qword) texels[ 8], si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpyu((qword) texels[12], si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[0] = spu_convtf(cSum, 24); + + /* green */ + c0 = (vector unsigned int) si_mpyu((qword) texels[ 1], si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpyu((qword) texels[ 5], si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpyu((qword) texels[ 9], si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpyu((qword) texels[13], si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[1] = spu_convtf(cSum, 24); + + /* blue */ + c0 = (vector unsigned int) si_mpyu((qword) texels[ 2], si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpyu((qword) texels[ 6], si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpyu((qword) texels[10], si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpyu((qword) texels[14], si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[2] = spu_convtf(cSum, 24); + + /* alpha */ + c0 = (vector unsigned int) si_mpyu((qword) texels[ 3], si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpyu((qword) texels[ 7], si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpyu((qword) texels[11], si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpyu((qword) texels[15], si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[3] = spu_convtf(cSum, 24); +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index d576aed7191..38a17deda25 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -47,5 +47,10 @@ sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]); +extern void +sample_texture4_bilinear_2(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]); + #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From 420e8cdf25501dd82e1c178e6300d7b416798e25 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 14:10:36 -0600 Subject: cell: remove more old texture code --- src/gallium/drivers/cell/spu/spu_texture.c | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index c10268131d5..3f2280436cb 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -50,32 +50,6 @@ invalidate_tex_cache(void) } -/** - * XXX look into getting texels for all four pixels in a quad at once. - */ -static uint -get_texel(uint unit, vec_uint4 coordinate) -{ - /* - * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as - * SIMD since X and Y are already in a SIMD register. - */ - const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; - ushort x = spu_extract(coordinate, 0); - ushort y = spu_extract(coordinate, 1); - unsigned tile_offset = sizeof(tile_t) - * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE)); - ushort texel_offset = (ushort) 4 - * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE)); - vec_uint4 tmp; - - spu_dcache_fetch_unaligned((qword *) & tmp, - texture_ea + tile_offset + texel_offset, - 4); - return spu_extract(tmp, 0); -} - - /** * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... * -- cgit v1.2.3 From c05cabd646f1c7384b5187e3427064096aef4673 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 14:31:11 -0600 Subject: cell: use fewer memory references in sample_texture4_bilinear_2() --- src/gallium/drivers/cell/spu/spu_texture.c | 56 +++++++++++++++++------------- 1 file changed, 31 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 3f2280436cb..96ef88822aa 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -220,7 +220,11 @@ sample_texture4_bilinear(vector float s, vector float t, * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h */ static INLINE void -transpose(vector unsigned int *mOut, vector unsigned int *mIn) +transpose(vector unsigned int *mOut0, + vector unsigned int *mOut1, + vector unsigned int *mOut2, + vector unsigned int *mOut3, + vector unsigned int *mIn) { vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */ vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */ @@ -251,10 +255,10 @@ transpose(vector unsigned int *mOut, vector unsigned int *mIn) cgko = spu_shuffle(ckdl, gohp, shufflehi); dhlp = spu_shuffle(ckdl, gohp, shufflelo); - *(mOut+0) = aeim; - *(mOut+1) = bfjn; - *(mOut+2) = cgko; - *(mOut+3) = dhlp; + *mOut0 = aeim; + *mOut1 = bfjn; + *mOut2 = cgko; + *mOut3 = dhlp; } @@ -317,43 +321,45 @@ sample_texture4_bilinear_2(vector float s, vector float t, } /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ - transpose(texels + 0, texels + 0); - transpose(texels + 4, texels + 4); - transpose(texels + 8, texels + 8); - transpose(texels + 12, texels + 12); + vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7, + texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15; + transpose(&texel0, &texel1, &texel2, &texel3, texels + 0); + transpose(&texel4, &texel5, &texel6, &texel7, texels + 4); + transpose(&texel8, &texel9, &texel10, &texel11, texels + 8); + transpose(&texel12, &texel13, &texel14, &texel15, texels + 12); /* computed weighted colors */ vector unsigned int c0, c1, c2, c3, cSum; /* red */ - c0 = (vector unsigned int) si_mpyu((qword) texels[ 0], si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpyu((qword) texels[ 4], si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpyu((qword) texels[ 8], si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpyu((qword) texels[12], si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + c0 = (vector unsigned int) si_mpyu((qword) texel0, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpyu((qword) texel4, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpyu((qword) texel8, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpyu((qword) texel12, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); colors[0] = spu_convtf(cSum, 24); /* green */ - c0 = (vector unsigned int) si_mpyu((qword) texels[ 1], si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpyu((qword) texels[ 5], si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpyu((qword) texels[ 9], si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpyu((qword) texels[13], si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + c0 = (vector unsigned int) si_mpyu((qword) texel1, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpyu((qword) texel5, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpyu((qword) texel9, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpyu((qword) texel13, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); colors[1] = spu_convtf(cSum, 24); /* blue */ - c0 = (vector unsigned int) si_mpyu((qword) texels[ 2], si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpyu((qword) texels[ 6], si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpyu((qword) texels[10], si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpyu((qword) texels[14], si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + c0 = (vector unsigned int) si_mpyu((qword) texel2, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpyu((qword) texel6, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpyu((qword) texel10, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpyu((qword) texel14, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); colors[2] = spu_convtf(cSum, 24); /* alpha */ - c0 = (vector unsigned int) si_mpyu((qword) texels[ 3], si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpyu((qword) texels[ 7], si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpyu((qword) texels[11], si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpyu((qword) texels[15], si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ + c0 = (vector unsigned int) si_mpyu((qword) texel3, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpyu((qword) texel7, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpyu((qword) texel11, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpyu((qword) texel15, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/ cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); colors[3] = spu_convtf(cSum, 24); } -- cgit v1.2.3 From b0c136cfb1fcbcea35e17dc699a96acbb24738f5 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 15:17:01 -0600 Subject: cell: remove old texture-related fields --- src/gallium/drivers/cell/spu/spu_command.c | 3 --- src/gallium/drivers/cell/spu/spu_main.h | 2 -- 2 files changed, 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index d4cc9a21467..64890f6dbd6 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -330,9 +330,6 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].tiles_per_row = width / TILE_SIZE; - spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; - spu.texture[unit].tex_size_mask = (vector unsigned int) - { width - 1, height - 1, 0, 0 }; spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 2a8cb00f8df..e3960dbe8b6 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -112,10 +112,8 @@ struct spu_texture void *start; ushort width, height; ushort tiles_per_row; - vector float tex_size; /**< == {width, height, 0, 0} */ vector float width4; /**< == {width, width, width, width} */ vector float height4; /**< == {height, height, height, height} */ - vector unsigned int tex_size_mask; /**< == {width-1, height-1, 0, 0 } */ vector unsigned int tex_size_x_mask; /**< splat(width-1) */ vector unsigned int tex_size_y_mask; /**< splat(height-1) */ } ALIGN16_ATTRIB; -- cgit v1.2.3 From 978799beb2a9c51550abb1f37bb6f63d06bc4717 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 16:43:11 -0600 Subject: cell: initial work for mipmap texture filtering --- src/gallium/drivers/cell/common.h | 6 +- src/gallium/drivers/cell/ppu/cell_screen.c | 4 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 18 ++-- src/gallium/drivers/cell/ppu/cell_texture.c | 48 ++++++---- src/gallium/drivers/cell/ppu/cell_texture.h | 6 +- src/gallium/drivers/cell/spu/spu_command.c | 37 +++++--- src/gallium/drivers/cell/spu/spu_funcs.c | 1 + src/gallium/drivers/cell/spu/spu_main.h | 7 +- src/gallium/drivers/cell/spu/spu_texture.c | 120 ++++++++++++++++++------- src/gallium/drivers/cell/spu/spu_texture.h | 6 ++ 10 files changed, 176 insertions(+), 77 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 5dc756023fb..e4de9a551d7 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -67,6 +67,7 @@ #define CELL_MAX_SPUS 6 #define CELL_MAX_SAMPLERS 4 +#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ #define TILE_SIZE 32 @@ -251,8 +252,9 @@ struct cell_command_texture { uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */ uint unit; - void *start; /**< Address in main memory */ - ushort width, height; + void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ + ushort width[CELL_MAX_TEXTURE_LEVELS]; + ushort height[CELL_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 47ba6fa2909..d2235579507 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -76,11 +76,11 @@ cell_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 10; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; /* max 2Kx2K */ + return CELL_MAX_TEXTURE_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 8; /* max 128x128x128 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; /* max 2Kx2K */ + return CELL_MAX_TEXTURE_LEVELS; default: return 10; } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index cbfa393cfba..7090b4c99f7 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -211,14 +211,20 @@ cell_emit_state(struct cell_context *cell) texture->opcode = CELL_CMD_STATE_TEXTURE; texture->unit = i; if (cell->texture[i]) { - texture->start = cell->texture[i]->tiled_data; - texture->width = cell->texture[i]->base.width[0]; - texture->height = cell->texture[i]->base.height[0]; + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = cell->texture[i]->tiled_data[level]; + texture->width[level] = cell->texture[i]->base.width[level]; + texture->height[level] = cell->texture[i]->base.height[level]; + } } else { - texture->start = NULL; - texture->width = 1; - texture->height = 1; + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = NULL; + texture->width[level] = 1; + texture->height[level] = 1; + } } } } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index b6590dfb86e..f5f81ac3cc4 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -66,6 +66,8 @@ cell_texture_layout(struct cell_texture * spt) unsigned size; unsigned w_tile, h_tile; + assert(level < CELL_MAX_TEXTURE_LEVELS); + /* width, height, rounded up to tile size */ w_tile = align(width, TILE_SIZE); h_tile = align(height, TILE_SIZE); @@ -249,33 +251,41 @@ cell_tile_texture(struct cell_context *cell, struct cell_texture *texture) { struct pipe_screen *screen = cell->pipe.screen; - uint face = 0, level = 0, zslice = 0; - struct pipe_surface *surf; - const uint w = texture->base.width[0], h = texture->base.height[0]; + uint face = 0, level, zslice = 0; const uint *src; - /* temporary restrictions: */ - assert(w >= TILE_SIZE); - assert(h >= TILE_SIZE); - assert(w % TILE_SIZE == 0); - assert(h % TILE_SIZE == 0); + for (level = 0; level <= texture->base.last_level; level++) { + if (!texture->tiled_data[level]) { + struct pipe_surface *surf; - surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice, - PIPE_BUFFER_USAGE_CPU_WRITE); - ASSERT(surf); + const uint w = texture->base.width[level], h = texture->base.height[level]; - src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE); + if (w < 32 || h < 32) + continue; + /* temporary restrictions: */ + assert(w >= TILE_SIZE); + assert(h >= TILE_SIZE); + assert(w % TILE_SIZE == 0); + assert(h % TILE_SIZE == 0); - if (texture->tiled_data) { - align_free(texture->tiled_data); - } - texture->tiled_data = align_malloc(w * h * 4, 16); + surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + ASSERT(surf); + + src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE); - tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src); + if (texture->tiled_data[level]) { + align_free(texture->tiled_data[level]); + } + texture->tiled_data[level] = align_malloc(w * h * 4, 16); - pipe_surface_unmap(surf); + tile_copy_data(w, h, TILE_SIZE, texture->tiled_data[level], src); - pipe_surface_reference(&surf, NULL); + pipe_surface_unmap(surf); + + pipe_surface_reference(&surf, NULL); + } + } } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 6d37e95ebce..6d357369841 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -40,15 +40,15 @@ struct cell_texture { struct pipe_texture base; - unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; + unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; /* The data is held here: */ struct pipe_buffer *buffer; unsigned long buffer_size; - void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/ + void *tiled_data[CELL_MAX_TEXTURE_LEVELS]; /* XXX this may be temporary */ /*ALIGN16*/ }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 64890f6dbd6..089af224155 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -301,6 +301,12 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; +#if 0 + if (spu.sampler[sampler->unit].min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + spu.sample_texture4[sampler->unit] = sample_texture4_lod; + } + else +#endif if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; } @@ -314,24 +320,29 @@ static void cmd_state_texture(const struct cell_command_texture *texture) { const uint unit = texture->unit; - const uint width = texture->width; - const uint height = texture->height; + uint i; - DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", - texture->unit, texture->start, - texture->width, texture->height); + DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); - spu.texture[unit].start = texture->start; - spu.texture[unit].width = width; - spu.texture[unit].height = height; + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + uint width = texture->width[i]; + uint height = texture->height[i]; - spu.texture[unit].width4 = spu_splats((float) width); - spu.texture[unit].height4 = spu_splats((float) height); + DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i, + texture->start[i], texture->width[i], texture->height[i]); - spu.texture[unit].tiles_per_row = width / TILE_SIZE; + spu.texture[unit].level[i].start = texture->start[i]; + spu.texture[unit].level[i].width = width; + spu.texture[unit].level[i].height = height; - spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); - spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); + spu.texture[unit].level[i].tiles_per_row = width / TILE_SIZE; + + spu.texture[unit].level[i].width4 = spu_splats((float) width); + spu.texture[unit].level[i].height4 = spu_splats((float) height); + + spu.texture[unit].level[i].tex_size_x_mask = spu_splats(width - 1); + spu.texture[unit].level[i].tex_size_y_mask = spu_splats(height - 1); + } } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 4c90b701ee7..f2946010bda 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -100,6 +100,7 @@ spu_log2(vector float x) return spu_mul(v, k); } + static struct vec_4x4 spu_txp(vector float s, vector float t, vector float r, vector float q, unsigned unit) diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index e3960dbe8b6..9515543efe4 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -107,7 +107,7 @@ struct spu_framebuffer } ALIGN16_ATTRIB; -struct spu_texture +struct spu_texture_level { void *start; ushort width, height; @@ -118,6 +118,11 @@ struct spu_texture vector unsigned int tex_size_y_mask; /**< splat(height-1) */ } ALIGN16_ATTRIB; +struct spu_texture +{ + struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; +} ALIGN16_ATTRIB; + /** * All SPU global/context state will be in a singleton object of this type: diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 96ef88822aa..96c09e3ccbe 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -27,6 +27,7 @@ #include <transpose_matrix4x4.h> +#include <math.h> #include "pipe/p_compiler.h" #include "spu_main.h" @@ -42,11 +43,12 @@ void invalidate_tex_cache(void) { + uint lvl = 0; uint unit = 0; - uint bytes = 4 * spu.texture[unit].width - * spu.texture[unit].height; + uint bytes = 4 * spu.texture[unit].level[lvl].width + * spu.texture[unit].level[lvl].height; - spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes); + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); } @@ -64,15 +66,17 @@ invalidate_tex_cache(void) * a time. */ static void -get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) +get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, + vec_uint4 *texels) { - const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + const unsigned texture_ea = (uintptr_t) tlevel->start; vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ - const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); + const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); @@ -104,17 +108,18 @@ sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]) { - vector float ss = spu_mul(s, spu.texture[unit].width4); - vector float tt = spu_mul(t, spu.texture[unit].height4); + const uint lvl = 0; + vector float ss = spu_mul(s, spu.texture[unit].level[lvl].width4); + vector float tt = spu_mul(t, spu.texture[unit].level[lvl].height4); vector unsigned int is = spu_convtu(ss, 0); vector unsigned int it = spu_convtu(tt, 0); vec_uint4 texels[4]; /* PIPE_TEX_WRAP_REPEAT */ - is = spu_and(is, spu.texture[unit].tex_size_x_mask); - it = spu_and(it, spu.texture[unit].tex_size_y_mask); + is = spu_and(is, spu.texture[unit].level[lvl].tex_size_x_mask); + it = spu_and(it, spu.texture[unit].level[lvl].tex_size_y_mask); - get_four_texels(unit, is, it, texels); + get_four_texels(unit, lvl, is, it, texels); /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ spu_unpack_A8R8G8B8_transpose4(texels, colors); @@ -130,8 +135,9 @@ sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]) { - vector float ss = spu_madd(s, spu.texture[unit].width4, spu_splats(-0.5f)); - vector float tt = spu_madd(t, spu.texture[unit].height4, spu_splats(-0.5f)); + const uint lvl = 0; + vector float ss = spu_madd(s, spu.texture[unit].level[lvl].width4, spu_splats(-0.5f)); + vector float tt = spu_madd(t, spu.texture[unit].level[lvl].height4, spu_splats(-0.5f)); vector unsigned int is0 = spu_convtu(ss, 0); vector unsigned int it0 = spu_convtu(tt, 0); @@ -141,17 +147,17 @@ sample_texture4_bilinear(vector float s, vector float t, vector unsigned int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask); + is0 = spu_and(is0, spu.texture[unit].level[lvl].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].level[lvl].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].level[lvl].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].level[lvl].tex_size_y_mask); /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, lvl, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, lvl, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, lvl, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, lvl, is1, it1, texels + 12); /* lower-right */ /* XXX possibly rework following code to compute the weighted sample * colors with integer arithmetic for fewer int->float conversions. @@ -270,10 +276,11 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector float r, vector float q, uint unit, vector float colors[4]) { + const uint lvl = 0; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; /* Scale texcoords by size of texture, and add half pixel bias */ - vector float ss = spu_madd(s, spu.texture[unit].width4, half); - vector float tt = spu_madd(t, spu.texture[unit].height4, half); + vector float ss = spu_madd(s, spu.texture[unit].level[lvl].width4, half); + vector float tt = spu_madd(t, spu.texture[unit].level[lvl].height4, half); /* convert float coords to fixed-pt coords with 8 fraction bits */ vector unsigned int is = spu_convtu(ss, 8); @@ -294,17 +301,17 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector unsigned int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask); + is0 = spu_and(is0, spu.texture[unit].level[lvl].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].level[lvl].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].level[lvl].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].level[lvl].tex_size_y_mask); /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, lvl, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, lvl, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, lvl, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, lvl, is1, it1, texels + 12); /* lower-right */ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ { @@ -363,3 +370,54 @@ sample_texture4_bilinear_2(vector float s, vector float t, cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); colors[3] = spu_convtf(cSum, 24); } + + + +/** + * Compute level of detail factor from texcoords. + */ +static float +compute_lambda(uint unit, vector float s, vector float t) +{ + uint lvl = 0; + float width = spu.texture[unit].level[lvl].width; + float height = spu.texture[unit].level[lvl].width; + float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); + float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); + float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); + float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); + float x = dsdx * dsdx + dtdx * dtdx; + float y = dsdy * dsdy + dtdy * dtdy; + float rho = x > y ? x : y; + rho = sqrtf(rho); + float lambda = logf(rho) * 1.442695f; + return lambda; +} + + + +/** + * Texture sampling with level of detail selection. + */ +void +sample_texture4_lod(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]) +{ + float lambda = compute_lambda(unit, s, t); + + if (lambda < spu.sampler[unit].min_lod) + lambda = spu.sampler[unit].min_lod; + else if (lambda > spu.sampler[unit].max_lod) + lambda = spu.sampler[unit].max_lod; + + /* hack for now */ + int level = (int) lambda; + if (level > 3) + level = 3; + + /* + sample_texture4_bilinear_2(s, t, r, q, unit, level, colors); + */ +} + diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index 38a17deda25..4802f7c47c1 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -53,4 +53,10 @@ sample_texture4_bilinear_2(vector float s, vector float t, uint unit, vector float colors[4]); +extern void +sample_texture4_lod(vector float s, vector float t, + vector float r, vector float q, + uint unit, vector float colors[4]); + + #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From dee18a147d3adaf2578d27837c8f18c92d796c9d Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 19:41:26 -0600 Subject: cell: finish-up perspective-corrected interpolation --- src/gallium/drivers/cell/spu/spu_tri.c | 127 +++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 45 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 022d21ba8f2..3f1fb4f7c9c 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -213,7 +213,7 @@ clip_emit_quad(struct setup_stage *setup) * Eg: four colors will be computed (in AoS format). */ static INLINE void -eval_coeff(uint slot, float x, float y, vector float result[4]) +eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) { switch (spu.vertex_info.attrib[slot].interp_mode) { case INTERP_CONSTANT: @@ -222,23 +222,43 @@ eval_coeff(uint slot, float x, float y, vector float result[4]) result[QUAD_BOTTOM_LEFT] = result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; break; - case INTERP_LINEAR: - /* fall-through, for now */ - default: { - register vector float dadx = setup.coef[slot].dadx.v; - register vector float dady = setup.coef[slot].dady.v; - register vector float topLeft - = spu_add(setup.coef[slot].a0.v, - spu_add(spu_mul(spu_splats(x), dadx), - spu_mul(spu_splats(y), dady))); + vector float dadx = setup.coef[slot].dadx.v; + vector float dady = setup.coef[slot].dady.v; + vector float topLeft = + spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); result[QUAD_TOP_LEFT] = topLeft; result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); } + break; + case INTERP_PERSPECTIVE: + { + vector float dadx = setup.coef[slot].dadx.v; + vector float dady = setup.coef[slot].dady.v; + vector float topLeft = + spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + vector float wInv = spu_re(w); /* 1.0 / w */ + + result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv); + result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv); + result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv); + result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv); + } + break; + case INTERP_POS: + case INTERP_NONE: + break; + default: + ASSERT(0); } } @@ -248,14 +268,14 @@ eval_coeff(uint slot, float x, float y, vector float result[4]) * XXX this will all be re-written someday. */ static INLINE void -eval_coeff_soa(uint slot, float x, float y, vector float result[4]) +eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4]) { - eval_coeff(slot, x, y, result); + eval_coeff(slot, x, y, w, result); _transpose_matrix4x4(result, result); } - +/** Evalute coefficients to get Z for four pixels in a quad */ static INLINE vector float eval_z(float x, float y) { @@ -269,6 +289,20 @@ eval_z(float x, float y) } +/** Evalute coefficients to get W for four pixels in a quad */ +static INLINE vector float +eval_w(float x, float y) +{ + const uint slot = 0; + const float dwdx = setup.coef[slot].dadx.f[3]; + const float dwdy = setup.coef[slot].dady.f[3]; + const float topLeft = setup.coef[slot].a0.f[3] + x * dwdx + y * dwdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy }; + return spu_add(topLeftv, derivs); +} + + /** * Emit a quad (pass to next stage). No clipping is done. * Note: about 1/5 to 1/7 of the time, mask is zero and this function @@ -292,14 +326,15 @@ emit_quad( int x, int y, mask_t mask) */ vector float inputs[4*4], outputs[2*4]; vector float fragZ = eval_z((float) x, (float) y); + vector float fragW = eval_w((float) x, (float) y); /* setup inputs */ #if 0 - eval_coeff_soa(1, (float) x, (float) y, inputs); + eval_coeff_soa(1, (float) x, (float) y, fragW, inputs); #else uint i; for (i = 0; i < spu.vertex_info.num_attribs; i++) { - eval_coeff_soa(i+1, (float) x, (float) y, inputs + i * 4); + eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4); } #endif ASSERT(spu.fragment_program); @@ -658,7 +693,6 @@ tri_linear_coeff4(uint slot) -#if 0 /** * Compute a0, dadx and dady for a perspective-corrected interpolant, * for a triangle. @@ -667,38 +701,41 @@ tri_linear_coeff4(uint slot) * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void tri_persp_coeff( unsigned slot, - unsigned i ) +static void +tri_persp_coeff4(uint slot) { - /* premultiply by 1/w: - */ - float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; - float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; - float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; - - float botda = mida - mina; - float majda = maxa - mina; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - /* - printf("tri persp %d,%d: %f %f %f\n", slot, i, - setup.vmin->data[slot][i], - setup.vmid->data[slot][i], - setup.vmax->data[slot][i] - ); - */ + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); + const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); + const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); - assert(slot < PIPE_MAX_SHADER_INPUTS); - assert(i <= 3); + vector float vmin_d = setup.vmin->data[slot]; + vector float vmid_d = setup.vmid->data[slot]; + vector float vmax_d = setup.vmax->data[slot]; - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - setup.coef[slot].a0.f[i] = (mina - - (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + - setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); + vmin_d = spu_mul(vmin_d, vmin_w); + vmid_d = spu_mul(vmid_d, vmid_w); + vmax_d = spu_mul(vmax_d, vmax_w); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); + setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + + setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); } -#endif + /** @@ -726,7 +763,7 @@ static void setup_tri_coefficients(void) tri_linear_coeff4(i); break; case INTERP_PERSPECTIVE: - tri_linear_coeff4(i); /* temporary */ + tri_persp_coeff4(i); break; default: ASSERT(0); -- cgit v1.2.3 From 5d7cc6176de09e683e5b40a69df250d1abfaf6f0 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 19:50:20 -0600 Subject: cell: remove dead code, clean-up, reformatting --- src/gallium/drivers/cell/spu/spu_tri.c | 114 +++++++-------------------------- 1 file changed, 24 insertions(+), 90 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 3f1fb4f7c9c..438fae84a8f 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -120,19 +120,11 @@ struct setup_stage { uint facing; - uint tx, ty; + uint tx, ty; /**< position of current tile (x, y) */ int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; -#if 0 - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; -#else struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; -#endif - -#if 0 - struct quad_header quad; -#endif struct { int left[2]; /**< [0] = row0, [1] = row1 */ @@ -144,69 +136,9 @@ struct setup_stage { }; - static struct setup_stage setup; - - -#if 0 -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} -#endif - -#if 0 -/** - * Clip setup.quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip(struct setup_stage *setup) -{ - const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (setup.quad.x0 >= maxx || - setup.quad.y0 >= maxy || - setup.quad.x0 + 1 < minx || - setup.quad.y0 + 1 < miny) { - /* totally clipped */ - setup.quad.mask = 0x0; - return; - } - if (setup.quad.x0 < minx) - setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup.quad.y0 < miny) - setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup.quad.x0 == maxx - 1) - setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup.quad.y0 == maxy - 1) - setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} -#endif - -#if 0 -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad(struct setup_stage *setup) -{ - quad_clip(setup); - if (setup.quad.mask) { - struct softpipe_context *sp = setup.softpipe; - sp->quad.first->run(sp->quad.first, &setup.quad); - } -} -#endif - /** * Evaluate attribute coefficients (plane equations) to compute * attribute values for the four fragments in a quad. @@ -363,7 +295,8 @@ emit_quad( int x, int y, mask_t mask) * Given an X or Y coordinate, return the block/quad coordinate that it * belongs to. */ -static INLINE int block( int x ) +static INLINE int +block(int x) { return x & ~1; } @@ -374,7 +307,8 @@ static INLINE int block( int x ) * the triangle's bounds. * The mask is a uint4 vector and each element will be 0 or 0xffffffff. */ -static INLINE mask_t calculate_mask( int x ) +static INLINE mask_t +calculate_mask(int x) { /* This is a little tricky. * Use & instead of && to avoid branches. @@ -392,7 +326,8 @@ static INLINE mask_t calculate_mask( int x ) /** * Render a horizontal span of quads */ -static void flush_spans( void ) +static void +flush_spans(void) { int minleft, maxright; int x; @@ -420,7 +355,6 @@ static void flush_spans( void ) return; } - /* OK, we're very likely to need the tile data now. * clear or finish waiting if needed. */ @@ -456,9 +390,7 @@ static void flush_spans( void ) * calculate_mask() could be simplified a bit... */ for (x = block(minleft); x <= block(maxright); x += 2) { -#if 1 emit_quad( x, setup.span.y, calculate_mask( x )); -#endif } setup.span.y = 0; @@ -467,8 +399,10 @@ static void flush_spans( void ) setup.span.right[1] = 0; } + #if DEBUG_VERTS -static void print_vertex(const struct vertex_header *v) +static void +print_vertex(const struct vertex_header *v) { int i; fprintf(stderr, "Vertex: (%p)\n", v); @@ -480,11 +414,11 @@ static void print_vertex(const struct vertex_header *v) #endif -static boolean setup_sort_vertices(const struct vertex_header *v0, - const struct vertex_header *v1, - const struct vertex_header *v2) +static boolean +setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) { - #if DEBUG_VERTS fprintf(stderr, "Triangle:\n"); print_vertex(v0); @@ -692,7 +626,6 @@ tri_linear_coeff4(uint slot) } - /** * Compute a0, dadx and dady for a perspective-corrected interpolant, * for a triangle. @@ -742,7 +675,8 @@ tri_persp_coeff4(uint slot) * Compute the setup.coef[] array dadx, dady, a0 values. * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. */ -static void setup_tri_coefficients(void) +static void +setup_tri_coefficients(void) { #if 1 uint i; @@ -779,7 +713,8 @@ static void setup_tri_coefficients(void) } -static void setup_tri_edges(void) +static void +setup_tri_edges(void) { float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; @@ -809,9 +744,8 @@ static void setup_tri_edges(void) * Render the upper or lower half of a triangle. * Scissoring/cliprect is applied here too. */ -static void subtriangle( struct edge *eleft, - struct edge *eright, - unsigned lines ) +static void +subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) { const int minx = setup.cliprect_minx; const int maxx = setup.cliprect_maxx; @@ -878,10 +812,9 @@ static void subtriangle( struct edge *eleft, eright->sy += lines; } + static float -determinant( const float *v0, - const float *v1, - const float *v2 ) +determinant(const float *v0, const float *v1, const float *v2) { /* edge vectors e = v0 - v2, f = v1 - v2 */ const float ex = v0[0] - v2[0]; @@ -899,7 +832,8 @@ determinant( const float *v0, * The tile data should have already been fetched. */ boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding) +tri_draw(const float *v0, const float *v1, const float *v2, + uint tx, uint ty, uint front_winding) { setup.tx = tx; setup.ty = ty; -- cgit v1.2.3 From fc562a7acd86bee4853d38961e29c8da3d56e548 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Mon, 13 Oct 2008 20:19:51 -0600 Subject: cell: more clean-up in spu_tri.c --- src/gallium/drivers/cell/spu/spu_tri.c | 100 ++++++--------------------------- 1 file changed, 16 insertions(+), 84 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 438fae84a8f..03f094373df 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -116,7 +116,7 @@ struct setup_stage { struct edge etop; struct edge emaj; - float oneoverarea; + float oneOverArea; uint facing; @@ -507,13 +507,13 @@ setup_sort_vertices(const struct vertex_header *v0, * use the prim->det value because its sign is correct. */ { - const float area = (setup.emaj.dx * setup.ebot.dy - - setup.ebot.dx * setup.emaj.dy); + const float area = (setup.emaj.dx * setup.ebot.dy - + setup.ebot.dx * setup.emaj.dy); - setup.oneoverarea = 1.0f / area; + setup.oneOverArea = 1.0f / area; /* _mesa_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup.oneoverarea, area, prim->det ); + __FUNCTION__, setup.oneOverArea, area, prim->det ); */ } @@ -536,7 +536,7 @@ setup_sort_vertices(const struct vertex_header *v0, * \param slot which attribute slot */ static INLINE void -const_coeff(uint slot) +const_coeff4(uint slot) { setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; @@ -544,58 +544,6 @@ const_coeff(uint slot) } -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static INLINE void -tri_linear_coeff(uint slot, uint firstComp, uint lastComp) -{ - uint i; - const float *vmin_d = (float *) &setup.vmin->data[slot]; - const float *vmid_d = (float *) &setup.vmid->data[slot]; - const float *vmax_d = (float *) &setup.vmax->data[slot]; - const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f; - const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f; - - for (i = firstComp; i < lastComp; i++) { - float botda = vmid_d[i] - vmin_d[i]; - float majda = vmax_d[i] - vmin_d[i]; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - ASSERT(slot < PIPE_MAX_SHADER_INPUTS); - - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup.coef[slot].a0.f[i] = (vmin_d[i] - - (setup.coef[slot].dadx.f[i] * x + - setup.coef[slot].dady.f[i] * y)); - } - - /* - _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup.coef[slot].a0[i], - setup.coef[slot].dadx.f[i], - setup.coef[slot].dady.f[i]); - */ -} - - /** * As above, but interp setup all four vector components. */ @@ -616,8 +564,8 @@ tri_linear_coeff4(uint slot) vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), spu_mul(majda, spu_splats(setup.ebot.dx))); - setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); - setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneOverArea)); vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); @@ -660,8 +608,8 @@ tri_persp_coeff4(uint slot) vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), spu_mul(majda, spu_splats(setup.ebot.dx))); - setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); - setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneOverArea)); vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); @@ -678,21 +626,17 @@ tri_persp_coeff4(uint slot) static void setup_tri_coefficients(void) { -#if 1 uint i; for (i = 0; i < spu.vertex_info.num_attribs; i++) { switch (spu.vertex_info.attrib[i].interp_mode) { case INTERP_NONE: break; - case INTERP_POS: - /*tri_linear_coeff(i, 2, 3);*/ - /* XXX interp W if PERSPECTIVE... */ - tri_linear_coeff4(i); - break; case INTERP_CONSTANT: - const_coeff(i); + const_coeff4(i); break; + case INTERP_POS: + /* fall-through */ case INTERP_LINEAR: tri_linear_coeff4(i); break; @@ -703,13 +647,6 @@ setup_tri_coefficients(void) ASSERT(0); } } -#else - ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS); - ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR || - spu.vertex_info.interp_mode[1] == INTERP_CONSTANT); - tri_linear_coeff(0, 2, 3); /* slot 0, z */ - tri_linear_coeff(1, 0, 4); /* slot 1, color */ -#endif } @@ -863,19 +800,14 @@ tri_draw(const float *v0, const float *v1, const float *v2, setup.span.y_flags = 0; setup.span.right[0] = 0; setup.span.right[1] = 0; - /* setup.span.z_mode = tri_z_mode( setup.ctx ); */ - /* init_constant_attribs( setup ); */ - - if (setup.oneoverarea < 0.0) { - /* emaj on left: - */ + if (setup.oneOverArea < 0.0) { + /* emaj on left */ subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); } else { - /* emaj on right: - */ + /* emaj on right */ subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); } -- cgit v1.2.3 From b7609be0f1cc8d7a822a29a2ecc165cd848df2b3 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 10:05:23 -0600 Subject: cell: remove old code, clean-ups, etc. --- src/gallium/drivers/cell/ppu/cell_context.c | 1 - src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 72 ++++++++++++-------------- src/gallium/drivers/cell/ppu/cell_texture.h | 4 -- 4 files changed, 35 insertions(+), 44 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 35cd6874a27..b66aa9c9d99 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -128,7 +128,6 @@ cell_create_context(struct pipe_screen *screen, cell_init_state_functions(cell); cell_init_shader_functions(cell); cell_init_surface_functions(cell); - cell_init_texture_functions(cell); cell_init_vertex_functions(cell); cell->draw = cell_draw_create(cell); diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 8c55b8e0933..02721e8f385 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -258,7 +258,7 @@ cell_set_sampler_textures(struct pipe_context *pipe, } cell->num_textures = num; - cell_update_texture_mapping(cell); + cell_update_texture_mapping(cell); /* XXX temporary! */ cell->dirty |= CELL_NEW_TEXTURE; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index f5f81ac3cc4..ad3344aacd1 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -52,15 +52,15 @@ static unsigned minify( unsigned d ) static void -cell_texture_layout(struct cell_texture * spt) +cell_texture_layout(struct cell_texture *ct) { - struct pipe_texture *pt = &spt->base; + struct pipe_texture *pt = &ct->base; unsigned level; unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; - spt->buffer_size = 0; + ct->buffer_size = 0; for ( level = 0 ; level <= pt->last_level ; level++ ) { unsigned size; @@ -78,9 +78,9 @@ cell_texture_layout(struct cell_texture * spt) pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); - spt->stride[level] = pt->nblocksx[level] * pt->block.size; + ct->stride[level] = pt->nblocksx[level] * pt->block.size; - spt->level_offset[level] = spt->buffer_size; + ct->level_offset[level] = ct->buffer_size; size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; if (pt->target == PIPE_TEXTURE_CUBE) @@ -88,7 +88,7 @@ cell_texture_layout(struct cell_texture * spt) else size *= depth; - spt->buffer_size += size; + ct->buffer_size += size; width = minify(width); height = minify(height); @@ -102,26 +102,25 @@ cell_texture_create(struct pipe_screen *screen, const struct pipe_texture *templat) { struct pipe_winsys *ws = screen->winsys; - struct cell_texture *spt = CALLOC_STRUCT(cell_texture); - if (!spt) + struct cell_texture *ct = CALLOC_STRUCT(cell_texture); + if (!ct) return NULL; - spt->base = *templat; - spt->base.refcount = 1; - spt->base.screen = screen; + ct->base = *templat; + ct->base.refcount = 1; + ct->base.screen = screen; - cell_texture_layout(spt); + cell_texture_layout(ct); - spt->buffer = ws->buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); + ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, + ct->buffer_size); - if (!spt->buffer) { - FREE(spt); + if (!ct->buffer) { + FREE(ct); return NULL; } - return &spt->base; + return &ct->base; } @@ -137,15 +136,15 @@ cell_texture_release(struct pipe_screen *screen, __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { - struct cell_texture *spt = cell_texture(*pt); + struct cell_texture *ct = cell_texture(*pt); /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + DBG("%s deleting %p\n", __FUNCTION__, (void *) ct); */ - pipe_buffer_reference(screen, &spt->buffer, NULL); + pipe_buffer_reference(screen, &ct->buffer, NULL); - FREE(spt); + FREE(ct); } *pt = NULL; } @@ -169,22 +168,22 @@ cell_get_tex_surface(struct pipe_screen *screen, unsigned usage) { struct pipe_winsys *ws = screen->winsys; - struct cell_texture *spt = cell_texture(pt); + struct cell_texture *ct = cell_texture(pt); struct pipe_surface *ps; ps = ws->surface_alloc(ws); if (ps) { assert(ps->refcount); assert(ps->winsys); - winsys_buffer_reference(ws, &ps->buffer, spt->buffer); + winsys_buffer_reference(ws, &ps->buffer, ct->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; ps->height = pt->height[level]; ps->nblocksx = pt->nblocksx[level]; ps->nblocksy = pt->nblocksy[level]; - ps->stride = spt->stride[level]; - ps->offset = spt->level_offset[level]; + ps->stride = ct->stride[level]; + ps->offset = ct->level_offset[level]; ps->usage = usage; /* XXX may need to override usage flags (see sp_texture.c) */ @@ -268,11 +267,13 @@ cell_tile_texture(struct cell_context *cell, assert(w % TILE_SIZE == 0); assert(h % TILE_SIZE == 0); - surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice, + surf = screen->get_tex_surface(screen, &texture->base, face, + level, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); ASSERT(surf); - src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE); + src = (const uint *) pipe_surface_map(surf, + PIPE_BUFFER_USAGE_CPU_WRITE); if (texture->tiled_data[level]) { align_free(texture->tiled_data[level]); @@ -289,6 +290,7 @@ cell_tile_texture(struct cell_context *cell, } +/** XXX temporary hack */ void cell_update_texture_mapping(struct cell_context *cell) { @@ -335,9 +337,9 @@ cell_tex_surface_release(struct pipe_screen *screen, static void * -cell_surface_map( struct pipe_screen *screen, - struct pipe_surface *surface, - unsigned flags ) +cell_surface_map(struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags) { ubyte *map; @@ -364,7 +366,7 @@ cell_surface_map( struct pipe_screen *screen, #endif } - return map + surface->offset; + return (void *) (map + surface->offset); } @@ -376,12 +378,6 @@ cell_surface_unmap(struct pipe_screen *screen, } -void -cell_init_texture_functions(struct cell_context *cell) -{ - /*cell->pipe.texture_update = cell_texture_update;*/ -} - void cell_init_screen_texture_funcs(struct pipe_screen *screen) diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 6d357369841..46475097433 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -65,10 +65,6 @@ extern void cell_update_texture_mapping(struct cell_context *cell); -extern void -cell_init_texture_functions(struct cell_context *cell); - - extern void cell_init_screen_texture_funcs(struct pipe_screen *screen); -- cgit v1.2.3 From 0bee156d8518419befb50ba57d22fed4037797ce Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 10:55:38 -0600 Subject: cell: now do texture twiddling in the right way, at the right time. Also handles images smaller than 32x32 now. --- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 - src/gallium/drivers/cell/ppu/cell_state_emit.c | 4 +- src/gallium/drivers/cell/ppu/cell_texture.c | 221 +++++++++---------------- src/gallium/drivers/cell/ppu/cell_texture.h | 4 - 4 files changed, 83 insertions(+), 148 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 02721e8f385..2e3086c4fae 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -258,8 +258,6 @@ cell_set_sampler_textures(struct pipe_context *pipe, } cell->num_textures = num; - cell_update_texture_mapping(cell); /* XXX temporary! */ - cell->dirty |= CELL_NEW_TEXTURE; } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 7090b4c99f7..cae546b7002 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -222,8 +222,8 @@ cell_emit_state(struct cell_context *cell) uint level; for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { texture->start[level] = NULL; - texture->width[level] = 1; - texture->height[level] = 1; + texture->width[level] = 0; + texture->height[level] = 0; } } } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index ad3344aacd1..4bd87590cbf 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -150,15 +150,77 @@ cell_texture_release(struct pipe_screen *screen, } -#if 0 + +/** + * Convert image from linear layout to tiled layout. 4-byte pixels. + */ +static void +swizzle_image_uint(uint w, uint h, uint tile_size, uint *dst, const uint *src) +{ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = (h + tile_size - 1) / tile_size; + const uint w_t = (w + tile_size - 1) / tile_size; + + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + + /* loop over dest tiles */ + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* start of dest tile: */ + uint *tdst = dst + (it * w_t + jt) * tile_size2; + /* loop over texels in the tile */ + uint tile_width = MIN2(tile_size, w); + uint tile_height = MIN2(tile_size, h); + for (i = 0; i < tile_height; i++) { + for (j = 0; j < tile_width; j++) { + const uint srci = it * tile_size + i; + const uint srcj = jt * tile_size + j; + ASSERT(srci < w); + ASSERT(srcj < h); + tdst[i * TILE_SIZE + j] = src[srci * w + srcj]; + } + } + } + } +} + + +/** + * Convert linear texture image data to tiled format for SPU usage. + */ static void -cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, - uint face, uint levelsMask) +cell_twiddle_texture(struct pipe_screen *screen, + struct pipe_surface *surface) { - /* XXX TO DO: re-tile the texture data ... */ + struct cell_texture *texture = cell_texture(surface->texture); + const uint level = surface->level; + const uint texWidth = texture->base.width[level]; + const uint texHeight = texture->base.height[level]; + const uint bufWidth = MAX2(texWidth, TILE_SIZE); + const uint bufHeight = MAX2(texHeight, TILE_SIZE); + const uint *src = + (const uint *) pipe_buffer_map(screen, surface->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + + switch (texture->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* free old tiled data */ + if (texture->tiled_data[level]) { + align_free(texture->tiled_data[level]); + } + /* alloc new tiled data */ + texture->tiled_data[level] = align_malloc(bufWidth * bufHeight * 4, 16); + swizzle_image_uint(texWidth, texHeight, TILE_SIZE, + texture->tiled_data[level], src); + break; + default: + printf("Unsupported texture format\n"); + ; + } + pipe_buffer_unmap(screen, surface->buffer); } -#endif static struct pipe_surface * @@ -207,129 +269,12 @@ cell_get_tex_surface(struct pipe_screen *screen, } - -/** - * Copy tile data from linear layout to tiled layout. - * XXX this should be rolled into the future surface-creation code. - * XXX also need "untile" code... - */ -static void -tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) -{ - const uint tile_size2 = tile_size * tile_size; - const uint h_t = h / tile_size, w_t = w / tile_size; - - uint it, jt; /* tile counters */ - uint i, j; /* intra-tile counters */ - - /* loop over dest tiles */ - for (it = 0; it < h_t; it++) { - for (jt = 0; jt < w_t; jt++) { - /* start of dest tile: */ - uint *tdst = dst + (it * w_t + jt) * tile_size2; - /* loop over texels in the tile */ - for (i = 0; i < tile_size; i++) { - for (j = 0; j < tile_size; j++) { - const uint srci = it * tile_size + i; - const uint srcj = jt * tile_size + j; - *tdst++ = src[srci * w + srcj]; - } - } - } - } -} - - - -/** - * Convert linear texture image data to tiled format for SPU usage. - * XXX recast this in terms of pipe_surfaces (aka texture views). - */ -static void -cell_tile_texture(struct cell_context *cell, - struct cell_texture *texture) -{ - struct pipe_screen *screen = cell->pipe.screen; - uint face = 0, level, zslice = 0; - const uint *src; - - for (level = 0; level <= texture->base.last_level; level++) { - if (!texture->tiled_data[level]) { - struct pipe_surface *surf; - - const uint w = texture->base.width[level], h = texture->base.height[level]; - - if (w < 32 || h < 32) - continue; - /* temporary restrictions: */ - assert(w >= TILE_SIZE); - assert(h >= TILE_SIZE); - assert(w % TILE_SIZE == 0); - assert(h % TILE_SIZE == 0); - - surf = screen->get_tex_surface(screen, &texture->base, face, - level, zslice, - PIPE_BUFFER_USAGE_CPU_WRITE); - ASSERT(surf); - - src = (const uint *) pipe_surface_map(surf, - PIPE_BUFFER_USAGE_CPU_WRITE); - - if (texture->tiled_data[level]) { - align_free(texture->tiled_data[level]); - } - texture->tiled_data[level] = align_malloc(w * h * 4, 16); - - tile_copy_data(w, h, TILE_SIZE, texture->tiled_data[level], src); - - pipe_surface_unmap(surf); - - pipe_surface_reference(&surf, NULL); - } - } -} - - -/** XXX temporary hack */ -void -cell_update_texture_mapping(struct cell_context *cell) -{ -#if 0 - uint face = 0, level = 0, zslice = 0; -#endif - uint i; - - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - if (cell->texture[i]) - cell_tile_texture(cell, cell->texture[i]); - } - -#if 0 - if (cell->tex_surf && cell->tex_map) { - pipe_surface_unmap(cell->tex_surf); - cell->tex_map = NULL; - } - - /* XXX free old surface */ - - cell->tex_surf = cell_get_tex_surface(&cell->pipe, - &cell->texture[0]->base, - face, level, zslice); - - cell->tex_map = pipe_surface_map(cell->tex_surf); -#endif -} - - static void cell_tex_surface_release(struct pipe_screen *screen, struct pipe_surface **s) { - /* Effectively do the texture_update work here - if texture images - * needed post-processing to put them into hardware layout, this is - * where it would happen. For softpipe, nothing to do. - */ - assert ((*s)->texture); + /* XXX if done rendering to teximage, re-tile */ + pipe_texture_reference(&(*s)->texture, NULL); screen->winsys->surface_release(screen->winsys, s); @@ -351,22 +296,8 @@ cell_surface_map(struct pipe_screen *screen, map = pipe_buffer_map( screen, surface->buffer, flags ); if (map == NULL) return NULL; - - /* May want to different things here depending on read/write nature - * of the map: - */ - if (surface->texture && - (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) - { - /* Do something to notify sharing contexts of a texture change. - * In softpipe, that would mean flushing the texture cache. - */ -#if 0 - cell_screen(screen)->timestamp++; -#endif - } - - return (void *) (map + surface->offset); + else + return (void *) (map + surface->offset); } @@ -374,6 +305,16 @@ static void cell_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { + struct cell_texture *ct = cell_texture(surface->texture); + + assert(ct); + + if ((ct->base.tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) && + (surface->usage & PIPE_BUFFER_USAGE_CPU_WRITE)) { + /* convert from linear to tiled layout */ + cell_twiddle_texture(screen, surface); + } + pipe_buffer_unmap( screen, surface->buffer ); } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 46475097433..a0757091b07 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -61,10 +61,6 @@ cell_texture(struct pipe_texture *pt) -extern void -cell_update_texture_mapping(struct cell_context *cell); - - extern void cell_init_screen_texture_funcs(struct pipe_screen *screen); -- cgit v1.2.3 From 3baf83db3c60be8185bc68a0aa3adbce80d9025e Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 12:10:27 -0600 Subject: cell: fix tex image stride bugs --- src/gallium/drivers/cell/ppu/cell_texture.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 4bd87590cbf..608bda35f76 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -155,7 +155,8 @@ cell_texture_release(struct pipe_screen *screen, * Convert image from linear layout to tiled layout. 4-byte pixels. */ static void -swizzle_image_uint(uint w, uint h, uint tile_size, uint *dst, const uint *src) +swizzle_image_uint(uint w, uint h, uint tile_size, uint *dst, + uint src_stride, const uint *src) { const uint tile_size2 = tile_size * tile_size; const uint h_t = (h + tile_size - 1) / tile_size; @@ -164,6 +165,8 @@ swizzle_image_uint(uint w, uint h, uint tile_size, uint *dst, const uint *src) uint it, jt; /* tile counters */ uint i, j; /* intra-tile counters */ + src_stride /= 4; /* convert from bytes to pixels */ + /* loop over dest tiles */ for (it = 0; it < h_t; it++) { for (jt = 0; jt < w_t; jt++) { @@ -178,7 +181,7 @@ swizzle_image_uint(uint w, uint h, uint tile_size, uint *dst, const uint *src) const uint srcj = jt * tile_size + j; ASSERT(srci < w); ASSERT(srcj < h); - tdst[i * TILE_SIZE + j] = src[srci * w + srcj]; + tdst[i * tile_size + j] = src[srci * src_stride + srcj]; } } } @@ -199,9 +202,9 @@ cell_twiddle_texture(struct pipe_screen *screen, const uint texHeight = texture->base.height[level]; const uint bufWidth = MAX2(texWidth, TILE_SIZE); const uint bufHeight = MAX2(texHeight, TILE_SIZE); - const uint *src = - (const uint *) pipe_buffer_map(screen, surface->buffer, + const void *map = pipe_buffer_map(screen, surface->buffer, PIPE_BUFFER_USAGE_CPU_READ); + const uint *src = (const uint *) ((const ubyte *) map + surface->offset); switch (texture->base.format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -212,7 +215,8 @@ cell_twiddle_texture(struct pipe_screen *screen, /* alloc new tiled data */ texture->tiled_data[level] = align_malloc(bufWidth * bufHeight * 4, 16); swizzle_image_uint(texWidth, texHeight, TILE_SIZE, - texture->tiled_data[level], src); + texture->tiled_data[level], + surface->stride, src); break; default: printf("Unsupported texture format\n"); -- cgit v1.2.3 From f8bddf698d523f597fea0f721b064daee81d8005 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 12:11:52 -0600 Subject: cell: basic mipmap filtering works now Though, only GL_MIPMAP_NEAREST / GL_LINEAR works right now. --- src/gallium/drivers/cell/spu/spu_command.c | 21 ++++-- src/gallium/drivers/cell/spu/spu_funcs.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 3 +- src/gallium/drivers/cell/spu/spu_texture.c | 106 +++++++++++++++-------------- src/gallium/drivers/cell/spu/spu_texture.h | 8 +-- 5 files changed, 79 insertions(+), 61 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 089af224155..4e98eea3389 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -301,16 +301,18 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; -#if 0 + if (spu.sampler[sampler->unit].min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + /* use lambda/lod to determine min vs. mag filter */ spu.sample_texture4[sampler->unit] = sample_texture4_lod; } - else -#endif - if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + else if (spu.sampler[sampler->unit].min_img_filter + == PIPE_TEX_FILTER_LINEAR) { + /* min = mag = bilinear */ spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; } else { + /* min = mag = inearest */ spu.sample_texture4[sampler->unit] = sample_texture4_nearest; } } @@ -322,8 +324,12 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint unit = texture->unit; uint i; + //if (spu.init.id==0) Debug=1; + DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); + spu.texture[unit].max_level = 0; + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { uint width = texture->width[i]; uint height = texture->height[i]; @@ -335,14 +341,19 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].level[i].width = width; spu.texture[unit].level[i].height = height; - spu.texture[unit].level[i].tiles_per_row = width / TILE_SIZE; + spu.texture[unit].level[i].tiles_per_row = + (width + TILE_SIZE - 1) / TILE_SIZE; spu.texture[unit].level[i].width4 = spu_splats((float) width); spu.texture[unit].level[i].height4 = spu_splats((float) height); spu.texture[unit].level[i].tex_size_x_mask = spu_splats(width - 1); spu.texture[unit].level[i].tex_size_y_mask = spu_splats(height - 1); + + if (texture->start[i]) + spu.texture[unit].max_level = i; } + //Debug=0; } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index f2946010bda..66b82f673d5 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -106,7 +106,7 @@ spu_txp(vector float s, vector float t, vector float r, vector float q, unsigned unit) { struct vec_4x4 colors; - spu.sample_texture4[unit](s, t, r, q, unit, colors.v); + spu.sample_texture4[unit](s, t, r, q, unit, 0, colors.v); return colors; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 9515543efe4..cfb645add02 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -68,7 +68,7 @@ typedef void (*spu_sample_texture4_func)(vector float s, vector float t, vector float r, vector float q, - uint unit, + uint unit, uint level, vector float colors[4]); @@ -121,6 +121,7 @@ struct spu_texture_level struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; + uint max_level; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 96c09e3ccbe..10036330c6c 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -26,7 +26,6 @@ **************************************************************************/ -#include <transpose_matrix4x4.h> #include <math.h> #include "pipe/p_compiler.h" @@ -43,12 +42,14 @@ void invalidate_tex_cache(void) { - uint lvl = 0; - uint unit = 0; - uint bytes = 4 * spu.texture[unit].level[lvl].width - * spu.texture[unit].level[lvl].height; + uint lvl; + for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { + uint unit = 0; + uint bytes = 4 * spu.texture[unit].level[lvl].width + * spu.texture[unit].level[lvl].height; - spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); + } } @@ -71,8 +72,8 @@ get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; const unsigned texture_ea = (uintptr_t) tlevel->start; - vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ - vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + const vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ @@ -106,20 +107,19 @@ get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]) + uint unit, uint level, vector float colors[4]) { - const uint lvl = 0; - vector float ss = spu_mul(s, spu.texture[unit].level[lvl].width4); - vector float tt = spu_mul(t, spu.texture[unit].level[lvl].height4); + vector float ss = spu_mul(s, spu.texture[unit].level[level].width4); + vector float tt = spu_mul(t, spu.texture[unit].level[level].height4); vector unsigned int is = spu_convtu(ss, 0); vector unsigned int it = spu_convtu(tt, 0); vec_uint4 texels[4]; /* PIPE_TEX_WRAP_REPEAT */ - is = spu_and(is, spu.texture[unit].level[lvl].tex_size_x_mask); - it = spu_and(it, spu.texture[unit].level[lvl].tex_size_y_mask); + is = spu_and(is, spu.texture[unit].level[level].tex_size_x_mask); + it = spu_and(it, spu.texture[unit].level[level].tex_size_y_mask); - get_four_texels(unit, lvl, is, it, texels); + get_four_texels(unit, level, is, it, texels); /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ spu_unpack_A8R8G8B8_transpose4(texels, colors); @@ -133,11 +133,10 @@ sample_texture4_nearest(vector float s, vector float t, void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]) + uint unit, uint level, vector float colors[4]) { - const uint lvl = 0; - vector float ss = spu_madd(s, spu.texture[unit].level[lvl].width4, spu_splats(-0.5f)); - vector float tt = spu_madd(t, spu.texture[unit].level[lvl].height4, spu_splats(-0.5f)); + vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, spu_splats(-0.5f)); + vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, spu_splats(-0.5f)); vector unsigned int is0 = spu_convtu(ss, 0); vector unsigned int it0 = spu_convtu(tt, 0); @@ -147,17 +146,17 @@ sample_texture4_bilinear(vector float s, vector float t, vector unsigned int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].level[lvl].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].level[lvl].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].level[lvl].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].level[lvl].tex_size_y_mask); + is0 = spu_and(is0, spu.texture[unit].level[level].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].level[level].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].level[level].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].level[level].tex_size_y_mask); /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, lvl, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, lvl, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, lvl, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, lvl, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, level, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, level, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, level, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, level, is1, it1, texels + 12); /* lower-right */ /* XXX possibly rework following code to compute the weighted sample * colors with integer arithmetic for fewer int->float conversions. @@ -273,14 +272,13 @@ transpose(vector unsigned int *mOut0, */ void sample_texture4_bilinear_2(vector float s, vector float t, - vector float r, vector float q, - uint unit, vector float colors[4]) + vector float r, vector float q, + uint unit, uint level, vector float colors[4]) { - const uint lvl = 0; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; /* Scale texcoords by size of texture, and add half pixel bias */ - vector float ss = spu_madd(s, spu.texture[unit].level[lvl].width4, half); - vector float tt = spu_madd(t, spu.texture[unit].level[lvl].height4, half); + vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, half); + vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, half); /* convert float coords to fixed-pt coords with 8 fraction bits */ vector unsigned int is = spu_convtu(ss, 8); @@ -301,17 +299,17 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector unsigned int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].level[lvl].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].level[lvl].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].level[lvl].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].level[lvl].tex_size_y_mask); + is0 = spu_and(is0, spu.texture[unit].level[level].tex_size_x_mask); + it0 = spu_and(it0, spu.texture[unit].level[level].tex_size_y_mask); + is1 = spu_and(is1, spu.texture[unit].level[level].tex_size_x_mask); + it1 = spu_and(it1, spu.texture[unit].level[level].tex_size_y_mask); /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, lvl, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, lvl, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, lvl, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, lvl, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, level, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, level, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, level, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, level, is1, it1, texels + 12); /* lower-right */ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ { @@ -379,9 +377,9 @@ sample_texture4_bilinear_2(vector float s, vector float t, static float compute_lambda(uint unit, vector float s, vector float t) { - uint lvl = 0; - float width = spu.texture[unit].level[lvl].width; - float height = spu.texture[unit].level[lvl].width; + uint baseLevel = 0; + float width = spu.texture[unit].level[baseLevel].width; + float height = spu.texture[unit].level[baseLevel].width; float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); @@ -402,22 +400,30 @@ compute_lambda(uint unit, vector float s, vector float t) void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]) + uint unit, uint level, vector float colors[4]) { + /* + * Note that we're computing a lambda/lod here that's used for all + * four pixels in the quad. + */ float lambda = compute_lambda(unit, s, t); + /* apply lod bias */ + lambda += spu.sampler[unit].lod_bias; + + /* clamp */ if (lambda < spu.sampler[unit].min_lod) lambda = spu.sampler[unit].min_lod; else if (lambda > spu.sampler[unit].max_lod) lambda = spu.sampler[unit].max_lod; - /* hack for now */ - int level = (int) lambda; - if (level > 3) - level = 3; + /* convert to int level */ + level = (int) (lambda + 0.5f); + ASSERT(level >= 0); + + if (level > spu.texture[unit].max_level) + level = spu.texture[unit].max_level; - /* sample_texture4_bilinear_2(s, t, r, q, unit, level, colors); - */ } diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index 4802f7c47c1..ec06a50b4a1 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -39,24 +39,24 @@ invalidate_tex_cache(void); extern void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]); + uint unit, uint level, vector float colors[4]); extern void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]); + uint unit, uint level, vector float colors[4]); extern void sample_texture4_bilinear_2(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]); + uint unit, uint level, vector float colors[4]); extern void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, vector float colors[4]); + uint unit, uint level, vector float colors[4]); #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From 58ea98dc68605130dda2538027f941df39ccd514 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 12:41:46 -0600 Subject: cell: fix assertions --- src/gallium/drivers/cell/ppu/cell_texture.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 608bda35f76..87f1598dae6 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -155,7 +155,7 @@ cell_texture_release(struct pipe_screen *screen, * Convert image from linear layout to tiled layout. 4-byte pixels. */ static void -swizzle_image_uint(uint w, uint h, uint tile_size, uint *dst, +twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, uint src_stride, const uint *src) { const uint tile_size2 = tile_size * tile_size; @@ -179,8 +179,8 @@ swizzle_image_uint(uint w, uint h, uint tile_size, uint *dst, for (j = 0; j < tile_width; j++) { const uint srci = it * tile_size + i; const uint srcj = jt * tile_size + j; - ASSERT(srci < w); - ASSERT(srcj < h); + ASSERT(srci < h); + ASSERT(srcj < w); tdst[i * tile_size + j] = src[srci * src_stride + srcj]; } } @@ -214,12 +214,12 @@ cell_twiddle_texture(struct pipe_screen *screen, } /* alloc new tiled data */ texture->tiled_data[level] = align_malloc(bufWidth * bufHeight * 4, 16); - swizzle_image_uint(texWidth, texHeight, TILE_SIZE, + twiddle_image_uint(texWidth, texHeight, TILE_SIZE, texture->tiled_data[level], surface->stride, src); break; default: - printf("Unsupported texture format\n"); + printf("Cell: twiddle unsupported texture format\n"); ; } -- cgit v1.2.3 From 6d2d5ceca21c87bea5e269e8099fb6f1d821b97a Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 12:42:21 -0600 Subject: cell: use minify vs magnify filters --- src/gallium/drivers/cell/spu/spu_command.c | 50 +++++++++++++++++++++++------- src/gallium/drivers/cell/spu/spu_main.h | 2 ++ src/gallium/drivers/cell/spu/spu_texture.c | 22 +++++++------ 3 files changed, 53 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 4e98eea3389..fa78377c663 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -298,22 +298,48 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) static void cmd_state_sampler(const struct cell_command_sampler *sampler) { - DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); + uint unit = sampler->unit; - spu.sampler[sampler->unit] = sampler->state; + DEBUG_PRINTF("SAMPLER [%u]\n", unit); - if (spu.sampler[sampler->unit].min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { - /* use lambda/lod to determine min vs. mag filter */ - spu.sample_texture4[sampler->unit] = sample_texture4_lod; + spu.sampler[unit] = sampler->state; + + switch (spu.sampler[unit].min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + spu.min_sample_texture4[unit] = sample_texture4_bilinear; + break; + case PIPE_TEX_FILTER_ANISO: + /* fall-through, for now */ + case PIPE_TEX_FILTER_NEAREST: + spu.min_sample_texture4[unit] = sample_texture4_nearest; + break; + default: + ASSERT(0); } - else if (spu.sampler[sampler->unit].min_img_filter - == PIPE_TEX_FILTER_LINEAR) { - /* min = mag = bilinear */ - spu.sample_texture4[sampler->unit] = sample_texture4_bilinear; + + switch (spu.sampler[sampler->unit].mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + spu.mag_sample_texture4[unit] = sample_texture4_bilinear; + break; + case PIPE_TEX_FILTER_ANISO: + /* fall-through, for now */ + case PIPE_TEX_FILTER_NEAREST: + spu.mag_sample_texture4[unit] = sample_texture4_nearest; + break; + default: + ASSERT(0); } - else { - /* min = mag = inearest */ - spu.sample_texture4[sampler->unit] = sample_texture4_nearest; + + switch (spu.sampler[sampler->unit].min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + case PIPE_TEX_MIPFILTER_LINEAR: + spu.sample_texture4[unit] = sample_texture4_lod; + break; + case PIPE_TEX_MIPFILTER_NONE: + spu.sample_texture4[unit] = spu.mag_sample_texture4[unit]; + break; + default: + ASSERT(0); } } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index cfb645add02..56aac655e90 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -170,6 +170,8 @@ struct spu_global /** Current texture sampler function */ spu_sample_texture4_func sample_texture4[CELL_MAX_SAMPLERS]; + spu_sample_texture4_func min_sample_texture4[CELL_MAX_SAMPLERS]; + spu_sample_texture4_func mag_sample_texture4[CELL_MAX_SAMPLERS]; /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 10036330c6c..267f2302f60 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -400,7 +400,7 @@ compute_lambda(uint unit, vector float s, vector float t) void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]) + uint unit, uint level_ignored, vector float colors[4]) { /* * Note that we're computing a lambda/lod here that's used for all @@ -417,13 +417,17 @@ sample_texture4_lod(vector float s, vector float t, else if (lambda > spu.sampler[unit].max_lod) lambda = spu.sampler[unit].max_lod; - /* convert to int level */ - level = (int) (lambda + 0.5f); - ASSERT(level >= 0); - - if (level > spu.texture[unit].max_level) - level = spu.texture[unit].max_level; - - sample_texture4_bilinear_2(s, t, r, q, unit, level, colors); + if (lambda <= 0.0f) { + /* magnify */ + spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, colors); + } + else { + /* minify */ + int level = (int) (lambda + 0.5f); + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture4[unit](s, t, r, q, unit, level, colors); + /* XXX to do: mipmap level interpolation */ + } } -- cgit v1.2.3 From 4f56d5bbf2e52c815c820138eaad6c0fb93d47ba Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 12:52:16 -0600 Subject: cell: fix broken negative texcoord conversion --- src/gallium/drivers/cell/spu/spu_texture.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 267f2302f60..83cf7dc3942 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -138,8 +138,8 @@ sample_texture4_bilinear(vector float s, vector float t, vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, spu_splats(-0.5f)); vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, spu_splats(-0.5f)); - vector unsigned int is0 = spu_convtu(ss, 0); - vector unsigned int it0 = spu_convtu(tt, 0); + vector unsigned int is0 = (vector unsigned int) spu_convts(ss, 0); + vector unsigned int it0 = (vector unsigned int) spu_convts(tt, 0); /* is + 1, it + 1 */ vector unsigned int is1 = spu_add(is0, 1); @@ -281,8 +281,8 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, half); /* convert float coords to fixed-pt coords with 8 fraction bits */ - vector unsigned int is = spu_convtu(ss, 8); - vector unsigned int it = spu_convtu(tt, 8); + vector unsigned int is = (vector unsigned int) spu_convts(ss, 8); + vector unsigned int it = (vector unsigned int) spu_convts(tt, 8); /* compute integer texel weights in [0, 255] */ vector signed int sWeights0 = spu_and((vector signed int) is, 255); -- cgit v1.2.3 From 38d396e15aceaca299c5de571c4dd5b3d9b27242 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 14:02:07 -0600 Subject: cell: fix npot texture tiling bugs --- src/gallium/drivers/cell/ppu/cell_texture.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 87f1598dae6..4fd66bdea08 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -172,9 +172,17 @@ twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, for (jt = 0; jt < w_t; jt++) { /* start of dest tile: */ uint *tdst = dst + (it * w_t + jt) * tile_size2; + + /* compute size of this tile (may be smaller than tile_size) */ + /* XXX note: a compiler bug was found here. That's why the code + * looks as it does. + */ + uint tile_width = w - jt * tile_size; + tile_width = MIN2(tile_width, tile_size); + uint tile_height = h - it * tile_size; + tile_height = MIN2(tile_height, tile_size); + /* loop over texels in the tile */ - uint tile_width = MIN2(tile_size, w); - uint tile_height = MIN2(tile_size, h); for (i = 0; i < tile_height; i++) { for (j = 0; j < tile_width; j++) { const uint srci = it * tile_size + i; @@ -200,8 +208,8 @@ cell_twiddle_texture(struct pipe_screen *screen, const uint level = surface->level; const uint texWidth = texture->base.width[level]; const uint texHeight = texture->base.height[level]; - const uint bufWidth = MAX2(texWidth, TILE_SIZE); - const uint bufHeight = MAX2(texHeight, TILE_SIZE); + const uint bufWidth = align(texWidth, TILE_SIZE); + const uint bufHeight = align(texHeight, TILE_SIZE); const void *map = pipe_buffer_map(screen, surface->buffer, PIPE_BUFFER_USAGE_CPU_READ); const uint *src = (const uint *) ((const ubyte *) map + surface->offset); -- cgit v1.2.3 From 85dc1aec9c5fc63a01bb8db07215b84790d15d8f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 15:19:01 -0600 Subject: cell: support NPOT textures, clamp/repeat mode, normalized/unorm texcoords glDrawPixels works now. --- src/gallium/drivers/cell/spu/spu_command.c | 48 +++++++++++++-- src/gallium/drivers/cell/spu/spu_main.h | 12 ++-- src/gallium/drivers/cell/spu/spu_texture.c | 99 ++++++++++++++++++++---------- 3 files changed, 117 insertions(+), 42 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index fa78377c663..b1efe97e76c 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -295,6 +295,42 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) } +/** + * Tex texture mask_s/t and scale_s/t fields depend on the texture size and + * sampler wrap modes. + */ +static void +update_tex_masks(struct spu_texture *texture, + const struct pipe_sampler_state *sampler) +{ + uint i; + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + int width = texture->level[i].width; + int height = texture->level[i].height; + + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_s = spu_splats(width - 1); + else + texture->level[i].mask_s = spu_splats(~0); + + if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_t = spu_splats(height - 1); + else + texture->level[i].mask_t = spu_splats(~0); + + if (sampler->normalized_coords) { + texture->level[i].scale_s = spu_splats((float) width); + texture->level[i].scale_t = spu_splats((float) height); + } + else { + texture->level[i].scale_s = spu_splats(1.0f); + texture->level[i].scale_t = spu_splats(1.0f); + } + } +} + + static void cmd_state_sampler(const struct cell_command_sampler *sampler) { @@ -341,6 +377,8 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) default: ASSERT(0); } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); } @@ -370,15 +408,15 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].level[i].tiles_per_row = (width + TILE_SIZE - 1) / TILE_SIZE; - spu.texture[unit].level[i].width4 = spu_splats((float) width); - spu.texture[unit].level[i].height4 = spu_splats((float) height); - - spu.texture[unit].level[i].tex_size_x_mask = spu_splats(width - 1); - spu.texture[unit].level[i].tex_size_y_mask = spu_splats(height - 1); + spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); + spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); if (texture->start[i]) spu.texture[unit].max_level = i; } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); + //Debug=0; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 56aac655e90..45c6f4ced1d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -107,17 +107,21 @@ struct spu_framebuffer } ALIGN16_ATTRIB; +/** per-texture level info */ struct spu_texture_level { void *start; ushort width, height; ushort tiles_per_row; - vector float width4; /**< == {width, width, width, width} */ - vector float height4; /**< == {height, height, height, height} */ - vector unsigned int tex_size_x_mask; /**< splat(width-1) */ - vector unsigned int tex_size_y_mask; /**< splat(height-1) */ + /** texcoord scale factors */ + vector float scale_s, scale_t; + /** texcoord masks (if REPEAT then size-1, else ~0) */ + vector signed int mask_s, mask_t; + /** texcoord clamp limits */ + vector signed int max_s, max_t; } ALIGN16_ATTRIB; + struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 83cf7dc3942..b21c43a467a 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -67,13 +67,13 @@ invalidate_tex_cache(void) * a time. */ static void -get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, +get_four_texels(uint unit, uint level, vec_int4 x, vec_int4 y, vec_uint4 *texels) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; const unsigned texture_ea = (uintptr_t) tlevel->start; - const vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ - const vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ @@ -99,6 +99,20 @@ get_four_texels(uint unit, uint level, vec_uint4 x, vec_uint4 y, } +/** clamp vec to [0, max] */ +static INLINE vector signed int +spu_clamp(vector signed int vec, vector signed int max) +{ + static const vector signed int zero = {0,0,0,0}; + vector unsigned int c; + c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */ + vec = spu_sel(zero, vec, c); + c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */ + vec = spu_sel(vec, max, c); + return vec; +} + + /** * Do nearest texture sampling for four pixels. @@ -109,15 +123,20 @@ sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, uint unit, uint level, vector float colors[4]) { - vector float ss = spu_mul(s, spu.texture[unit].level[level].width4); - vector float tt = spu_mul(t, spu.texture[unit].level[level].height4); - vector unsigned int is = spu_convtu(ss, 0); - vector unsigned int it = spu_convtu(tt, 0); + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + vector float ss = spu_mul(s, tlevel->scale_s); + vector float tt = spu_mul(t, tlevel->scale_t); + vector signed int is = spu_convts(ss, 0); + vector signed int it = spu_convts(tt, 0); vec_uint4 texels[4]; /* PIPE_TEX_WRAP_REPEAT */ - is = spu_and(is, spu.texture[unit].level[level].tex_size_x_mask); - it = spu_and(it, spu.texture[unit].level[level].tex_size_y_mask); + is = spu_and(is, tlevel->mask_s); + it = spu_and(it, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is = spu_clamp(is, tlevel->max_s); + it = spu_clamp(it, tlevel->max_t); get_four_texels(unit, level, is, it, texels); @@ -135,21 +154,28 @@ sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, uint unit, uint level, vector float colors[4]) { - vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, spu_splats(-0.5f)); - vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, spu_splats(-0.5f)); + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + vector float ss = spu_madd(s, tlevel->scale_s, spu_splats(-0.5f)); + vector float tt = spu_madd(t, tlevel->scale_t, spu_splats(-0.5f)); - vector unsigned int is0 = (vector unsigned int) spu_convts(ss, 0); - vector unsigned int it0 = (vector unsigned int) spu_convts(tt, 0); + vector signed int is0 = spu_convts(ss, 0); + vector signed int it0 = spu_convts(tt, 0); /* is + 1, it + 1 */ - vector unsigned int is1 = spu_add(is0, 1); - vector unsigned int it1 = spu_add(it0, 1); + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].level[level].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].level[level].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].level[level].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].level[level].tex_size_y_mask); + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); /* get packed int texels */ vector unsigned int texels[16]; @@ -275,34 +301,41 @@ sample_texture4_bilinear_2(vector float s, vector float t, vector float r, vector float q, uint unit, uint level, vector float colors[4]) { + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; /* Scale texcoords by size of texture, and add half pixel bias */ - vector float ss = spu_madd(s, spu.texture[unit].level[level].width4, half); - vector float tt = spu_madd(t, spu.texture[unit].level[level].height4, half); + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); /* convert float coords to fixed-pt coords with 8 fraction bits */ - vector unsigned int is = (vector unsigned int) spu_convts(ss, 8); - vector unsigned int it = (vector unsigned int) spu_convts(tt, 8); + vector signed int is = spu_convts(ss, 8); + vector signed int it = spu_convts(tt, 8); /* compute integer texel weights in [0, 255] */ - vector signed int sWeights0 = spu_and((vector signed int) is, 255); - vector signed int tWeights0 = spu_and((vector signed int) it, 255); + vector signed int sWeights0 = spu_and(is, 255); + vector signed int tWeights0 = spu_and(it, 255); vector signed int sWeights1 = spu_sub(255, sWeights0); vector signed int tWeights1 = spu_sub(255, tWeights0); /* texel coords: is0 = is / 256, it0 = is / 256 */ - vector unsigned int is0 = spu_rlmask(is, -8); - vector unsigned int it0 = spu_rlmask(it, -8); + vector signed int is0 = spu_rlmask(is, -8); + vector signed int it0 = spu_rlmask(it, -8); /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ - vector unsigned int is1 = spu_add(is0, 1); - vector unsigned int it1 = spu_add(it0, 1); + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, spu.texture[unit].level[level].tex_size_x_mask); - it0 = spu_and(it0, spu.texture[unit].level[level].tex_size_y_mask); - is1 = spu_and(is1, spu.texture[unit].level[level].tex_size_x_mask); - it1 = spu_and(it1, spu.texture[unit].level[level].tex_size_y_mask); + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); /* get packed int texels */ vector unsigned int texels[16]; -- cgit v1.2.3 From e0931e520a8d7cc5b4db8a4b887c5cf139b2647f Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 17:09:56 -0600 Subject: cell: fall-through case for TGSI_OPCODE_TXB --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 68093d9e83d..3dfd5f673dd 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -1672,6 +1672,8 @@ emit_instruction(struct codegen *gen, /* fall-through for now */ case TGSI_OPCODE_TXD: /* fall-through for now */ + case TGSI_OPCODE_TXB: + /* fall-through for now */ case TGSI_OPCODE_TXP: return emit_TXP(gen, inst); -- cgit v1.2.3 From 8f7c6b55ae962e30f32cfec9a14a652d3b5b5943 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 17:11:29 -0600 Subject: cell: support for cubemaps Though, progs/demos/cubemap.c doesn't quite work right... --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 + src/gallium/drivers/cell/ppu/cell_texture.c | 37 ++++-- src/gallium/drivers/cell/spu/spu_command.c | 17 ++- src/gallium/drivers/cell/spu/spu_funcs.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 4 +- src/gallium/drivers/cell/spu/spu_texture.c | 171 ++++++++++++++++++++++--- src/gallium/drivers/cell/spu/spu_texture.h | 21 ++- 8 files changed, 214 insertions(+), 41 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index e4de9a551d7..c1e78f4db3d 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -251,6 +251,7 @@ struct cell_command_sampler struct cell_command_texture { uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */ + uint target; /**< PIPE_TEXTURE_x */ uint unit; void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ ushort width[CELL_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index cae546b7002..d4a867ffcf4 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -217,6 +217,7 @@ cell_emit_state(struct cell_context *cell) texture->width[level] = cell->texture[i]->base.width[level]; texture->height[level] = cell->texture[i]->base.height[level]; } + texture->target = cell->texture[i]->base.target; } else { uint level; @@ -225,6 +226,7 @@ cell_emit_state(struct cell_context *cell) texture->width[level] = 0; texture->height[level] = 0; } + texture->target = 0; } } } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 4fd66bdea08..4c92ef154fb 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -137,6 +137,7 @@ cell_texture_release(struct pipe_screen *screen, */ if (--(*pt)->refcount <= 0) { struct cell_texture *ct = cell_texture(*pt); + uint i; /* DBG("%s deleting %p\n", __FUNCTION__, (void *) ct); @@ -144,6 +145,12 @@ cell_texture_release(struct pipe_screen *screen, pipe_buffer_reference(screen, &ct->buffer, NULL); + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + if (ct->tiled_data[i]) { + FREE(ct->tiled_data[i]); + } + } + FREE(ct); } *pt = NULL; @@ -204,27 +211,33 @@ static void cell_twiddle_texture(struct pipe_screen *screen, struct pipe_surface *surface) { - struct cell_texture *texture = cell_texture(surface->texture); + struct cell_texture *ct = cell_texture(surface->texture); const uint level = surface->level; - const uint texWidth = texture->base.width[level]; - const uint texHeight = texture->base.height[level]; + const uint texWidth = ct->base.width[level]; + const uint texHeight = ct->base.height[level]; const uint bufWidth = align(texWidth, TILE_SIZE); const uint bufHeight = align(texHeight, TILE_SIZE); const void *map = pipe_buffer_map(screen, surface->buffer, PIPE_BUFFER_USAGE_CPU_READ); const uint *src = (const uint *) ((const ubyte *) map + surface->offset); - switch (texture->base.format) { + switch (ct->base.format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - /* free old tiled data */ - if (texture->tiled_data[level]) { - align_free(texture->tiled_data[level]); + { + int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + int offset = bufWidth * bufHeight * 4 * surface->face; + uint *dst; + + if (!ct->tiled_data[level]) { + ct->tiled_data[level] = + align_malloc(bufWidth * bufHeight * 4 * numFaces, 16); + } + + dst = (uint *) ((ubyte *) ct->tiled_data[level] + offset); + + twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, + surface->stride, src); } - /* alloc new tiled data */ - texture->tiled_data[level] = align_malloc(bufWidth * bufHeight * 4, 16); - twiddle_image_uint(texWidth, texHeight, TILE_SIZE, - texture->tiled_data[level], - surface->stride, src); break; default: printf("Cell: twiddle unsupported texture format\n"); diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index b1efe97e76c..c951fa6f317 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -301,7 +301,8 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) */ static void update_tex_masks(struct spu_texture *texture, - const struct pipe_sampler_state *sampler) + const struct pipe_sampler_state *sampler, + uint unit) { uint i; @@ -328,6 +329,11 @@ update_tex_masks(struct spu_texture *texture, texture->level[i].scale_t = spu_splats(1.0f); } } + + /* XXX temporary hack */ + if (texture->target == PIPE_TEXTURE_CUBE) { + spu.sample_texture4[unit] = sample_texture4_cube; + } } @@ -378,7 +384,7 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) ASSERT(0); } - update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); + update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); } @@ -393,6 +399,7 @@ cmd_state_texture(const struct cell_command_texture *texture) DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); spu.texture[unit].max_level = 0; + spu.texture[unit].target = texture->target; for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { uint width = texture->width[i]; @@ -408,6 +415,10 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].level[i].tiles_per_row = (width + TILE_SIZE - 1) / TILE_SIZE; + spu.texture[unit].level[i].bytes_per_image = + 4 * ((width + TILE_SIZE - 1) & ~(TILE_SIZE-1)) + * ((height + TILE_SIZE - 1) & ~(TILE_SIZE-1)); + spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); @@ -415,7 +426,7 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].max_level = i; } - update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); + update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); //Debug=0; } diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index 66b82f673d5..5c3ee305d48 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -106,7 +106,7 @@ spu_txp(vector float s, vector float t, vector float r, vector float q, unsigned unit) { struct vec_4x4 colors; - spu.sample_texture4[unit](s, t, r, q, unit, 0, colors.v); + spu.sample_texture4[unit](s, t, r, q, unit, 0, 0, colors.v); return colors; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 45c6f4ced1d..8781041bff8 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -68,7 +68,7 @@ typedef void (*spu_sample_texture4_func)(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, + uint unit, uint level, uint face, vector float colors[4]); @@ -113,6 +113,7 @@ struct spu_texture_level void *start; ushort width, height; ushort tiles_per_row; + uint bytes_per_image; /** texcoord scale factors */ vector float scale_s, scale_t; /** texcoord masks (if REPEAT then size-1, else ~0) */ @@ -126,6 +127,7 @@ struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; + uint target; /**< PIPE_TEXTURE_x */ } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index b21c43a467a..2570f02c730 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -48,6 +48,9 @@ invalidate_tex_cache(void) uint bytes = 4 * spu.texture[unit].level[lvl].width * spu.texture[unit].level[lvl].height; + if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) + bytes *= 6; + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); } } @@ -67,11 +70,11 @@ invalidate_tex_cache(void) * a time. */ static void -get_four_texels(uint unit, uint level, vec_int4 x, vec_int4 y, +get_four_texels(uint unit, uint level, uint face, vec_int4 x, vec_int4 y, vec_uint4 *texels) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - const unsigned texture_ea = (uintptr_t) tlevel->start; + unsigned texture_ea = (uintptr_t) tlevel->start; const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ @@ -88,6 +91,8 @@ get_four_texels(uint unit, uint level, vec_int4 x, vec_int4 y, vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); + texture_ea = texture_ea + face * tlevel->bytes_per_image; + spu_dcache_fetch_unaligned((qword *) & texels[0], texture_ea + spu_extract(offset, 0), 4); spu_dcache_fetch_unaligned((qword *) & texels[1], @@ -121,7 +126,8 @@ spu_clamp(vector signed int vec, vector signed int max) void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]) + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; vector float ss = spu_mul(s, tlevel->scale_s); @@ -138,7 +144,7 @@ sample_texture4_nearest(vector float s, vector float t, is = spu_clamp(is, tlevel->max_s); it = spu_clamp(it, tlevel->max_t); - get_four_texels(unit, level, is, it, texels); + get_four_texels(unit, level, face, is, it, texels); /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ spu_unpack_A8R8G8B8_transpose4(texels, colors); @@ -152,11 +158,14 @@ sample_texture4_nearest(vector float s, vector float t, void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]) + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - vector float ss = spu_madd(s, tlevel->scale_s, spu_splats(-0.5f)); - vector float tt = spu_madd(t, tlevel->scale_t, spu_splats(-0.5f)); + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); vector signed int is0 = spu_convts(ss, 0); vector signed int it0 = spu_convts(tt, 0); @@ -179,10 +188,10 @@ sample_texture4_bilinear(vector float s, vector float t, /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, level, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, level, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, level, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, level, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */ /* XXX possibly rework following code to compute the weighted sample * colors with integer arithmetic for fewer int->float conversions. @@ -299,10 +308,12 @@ transpose(vector unsigned int *mOut0, void sample_texture4_bilinear_2(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]) + uint unit, uint level, uint face, + vector float colors[4]) { const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + /* Scale texcoords by size of texture, and add half pixel bias */ vector float ss = spu_madd(s, tlevel->scale_s, half); vector float tt = spu_madd(t, tlevel->scale_t, half); @@ -339,10 +350,10 @@ sample_texture4_bilinear_2(vector float s, vector float t, /* get packed int texels */ vector unsigned int texels[16]; - get_four_texels(unit, level, is0, it0, texels + 0); /* upper-left */ - get_four_texels(unit, level, is1, it0, texels + 4); /* upper-right */ - get_four_texels(unit, level, is0, it1, texels + 8); /* lower-left */ - get_four_texels(unit, level, is1, it1, texels + 12); /* lower-right */ + get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */ /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ { @@ -433,7 +444,8 @@ compute_lambda(uint unit, vector float s, vector float t) void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level_ignored, vector float colors[4]) + uint unit, uint level_ignored, uint face, + vector float colors[4]) { /* * Note that we're computing a lambda/lod here that's used for all @@ -452,15 +464,136 @@ sample_texture4_lod(vector float s, vector float t, if (lambda <= 0.0f) { /* magnify */ - spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, colors); + spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, 0, colors); } else { /* minify */ int level = (int) (lambda + 0.5f); if (level > (int) spu.texture[unit].max_level) level = spu.texture[unit].max_level; - spu.min_sample_texture4[unit](s, t, r, q, unit, level, colors); + spu.min_sample_texture4[unit](s, t, r, q, unit, level, 0, colors); /* XXX to do: mipmap level interpolation */ } } + +/** XXX need a SIMD version of this */ +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx); + const float ary = fabsf(ry); + const float arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = (sc / ma + 1.0F) * 0.5F; + *newT = (tc / ma + 1.0F) * 0.5F; + + return face; +} + + + +void +sample_texture4_cube(vector float s, vector float t, + vector float r, vector float q, + uint unit, uint level, int face_ignored, + vector float colors[4]) +{ + static const vector float zero = {0.0f, 0.0f, 0.0f, 0.0f}; + uint p, faces[4]; + float newS[4], newT[4]; + + /* Compute cube face referenced by the four sets of texcoords. + * XXX we should SIMD-ize this. + */ + for (p = 0; p < 4; p++) { + float rx = spu_extract(s, p); + float ry = spu_extract(t, p); + float rz = spu_extract(r, p); + faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); + } + + if (faces[0] == faces[1] && + faces[0] == faces[2] && + faces[0] == faces[3]) { + /* GOOD! All four texcoords refer to the same cube face */ + s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; + t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; + sample_texture4_nearest(s, t, zero, zero, unit, level, faces[0], colors); + } + else { + /* BAD! The four texcoords refer to different faces */ + for (p = 0; p < 4; p++) { + vector float c[4]; + + sample_texture4_nearest(spu_splats(newS[p]), spu_splats(newT[p]), + zero, zero, unit, level, faces[p], c); + + float red = spu_extract(c[0], p); + float green = spu_extract(c[1], p); + float blue = spu_extract(c[2], p); + float alpha = spu_extract(c[3], p); + + colors[0] = spu_insert(red, colors[0], p); + colors[1] = spu_insert(green, colors[1], p); + colors[2] = spu_insert(blue, colors[2], p); + colors[3] = spu_insert(alpha, colors[3], p); + } + } +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index ec06a50b4a1..08b891a4a86 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -39,24 +39,35 @@ invalidate_tex_cache(void); extern void sample_texture4_nearest(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]); + uint unit, uint level, uint face, + vector float colors[4]); extern void sample_texture4_bilinear(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]); + uint unit, uint level, uint face, + vector float colors[4]); extern void sample_texture4_bilinear_2(vector float s, vector float t, - vector float r, vector float q, - uint unit, uint level, vector float colors[4]); + vector float r, vector float q, + uint unit, uint level, uint face, + vector float colors[4]); extern void sample_texture4_lod(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, vector float colors[4]); + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture4_cube(vector float s, vector float t, + vector float r, vector float q, + uint unit, uint level_ignored, int face_ignored, + vector float colors[4]); #endif /* SPU_TEXTURE_H */ -- cgit v1.2.3 From e42a394ed5ca00a9d0a51a0c26d4fef9959ba43c Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 17:19:57 -0600 Subject: cell: fix incorrect parameter type --- src/gallium/drivers/cell/spu/spu_texture.c | 2 +- src/gallium/drivers/cell/spu/spu_texture.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 2570f02c730..9e25094d13c 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -552,7 +552,7 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) void sample_texture4_cube(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level, int face_ignored, + uint unit, uint level, uint face_ignored, vector float colors[4]) { static const vector float zero = {0.0f, 0.0f, 0.0f, 0.0f}; diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index 08b891a4a86..387484c3ad3 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -66,7 +66,7 @@ sample_texture4_lod(vector float s, vector float t, extern void sample_texture4_cube(vector float s, vector float t, vector float r, vector float q, - uint unit, uint level_ignored, int face_ignored, + uint unit, uint level_ignored, uint face_ignored, vector float colors[4]); -- cgit v1.2.3 From 6c017c2c3c3649650cd0dc89a3b4946eab0e5a8c Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 17:22:06 -0600 Subject: cell: replace FREE() with align_free() --- src/gallium/drivers/cell/ppu/cell_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 4c92ef154fb..230e1925733 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -147,7 +147,7 @@ cell_texture_release(struct pipe_screen *screen, for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { if (ct->tiled_data[i]) { - FREE(ct->tiled_data[i]); + align_free(ct->tiled_data[i]); } } -- cgit v1.2.3 From 41ccdde767e7aba6e8e6a9a035eacd6338c03a95 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Tue, 14 Oct 2008 17:22:40 -0600 Subject: cell: initial bits for 3D texture support --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 ++ src/gallium/drivers/cell/spu/spu_command.c | 13 +++++++++++-- src/gallium/drivers/cell/spu/spu_main.h | 8 ++++---- src/gallium/drivers/cell/spu/spu_texture.c | 2 ++ 5 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index c1e78f4db3d..b0169b8e329 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -256,6 +256,7 @@ struct cell_command_texture void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ ushort width[CELL_MAX_TEXTURE_LEVELS]; ushort height[CELL_MAX_TEXTURE_LEVELS]; + ushort depth[CELL_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index d4a867ffcf4..bb694aa1073 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -216,6 +216,7 @@ cell_emit_state(struct cell_context *cell) texture->start[level] = cell->texture[i]->tiled_data[level]; texture->width[level] = cell->texture[i]->base.width[level]; texture->height[level] = cell->texture[i]->base.height[level]; + texture->depth[level] = cell->texture[i]->base.depth[level]; } texture->target = cell->texture[i]->base.target; } @@ -225,6 +226,7 @@ cell_emit_state(struct cell_context *cell) texture->start[level] = NULL; texture->width[level] = 0; texture->height[level] = 0; + texture->depth[level] = 0; } texture->target = 0; } diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index c951fa6f317..c28677ebf87 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -59,6 +59,14 @@ static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] +static INLINE int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + + + /** * Tell the PPU that this SPU has finished copying a buffer to * local store and that it may be reused by the PPU. @@ -404,6 +412,7 @@ cmd_state_texture(const struct cell_command_texture *texture) for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { uint width = texture->width[i]; uint height = texture->height[i]; + uint depth = texture->depth[i]; DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i, texture->start[i], texture->width[i], texture->height[i]); @@ -411,13 +420,13 @@ cmd_state_texture(const struct cell_command_texture *texture) spu.texture[unit].level[i].start = texture->start[i]; spu.texture[unit].level[i].width = width; spu.texture[unit].level[i].height = height; + spu.texture[unit].level[i].depth = depth; spu.texture[unit].level[i].tiles_per_row = (width + TILE_SIZE - 1) / TILE_SIZE; spu.texture[unit].level[i].bytes_per_image = - 4 * ((width + TILE_SIZE - 1) & ~(TILE_SIZE-1)) - * ((height + TILE_SIZE - 1) & ~(TILE_SIZE-1)); + 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth; spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 8781041bff8..eff43b870ca 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -111,15 +111,15 @@ struct spu_framebuffer struct spu_texture_level { void *start; - ushort width, height; + ushort width, height, depth; ushort tiles_per_row; uint bytes_per_image; /** texcoord scale factors */ - vector float scale_s, scale_t; + vector float scale_s, scale_t, scale_r; /** texcoord masks (if REPEAT then size-1, else ~0) */ - vector signed int mask_s, mask_t; + vector signed int mask_s, mask_t, mask_r; /** texcoord clamp limits */ - vector signed int max_s, max_t; + vector signed int max_s, max_t, max_r; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 9e25094d13c..42eb06a3625 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -50,6 +50,8 @@ invalidate_tex_cache(void) if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) bytes *= 6; + else if (spu.texture[unit].target == PIPE_TEXTURE_3D) + bytes *= spu.texture[unit].level[lvl].depth; spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); } -- cgit v1.2.3 From abcd28b0b3fb77d3f99da957faa94e21ed54cae6 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 15 Oct 2008 10:43:53 -0600 Subject: cell: need to flush draw module when constants change --- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 54a17eaf2b7..cda39f8d592 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -191,6 +191,8 @@ cell_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + draw_flush(cell->draw); + /* note: reference counting */ winsys_buffer_reference(ws, &cell->constants[shader].buffer, -- cgit v1.2.3 From 4e506f422a13b20fcc95edb6c7048a9de6e32efa Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 15 Oct 2008 10:53:48 -0600 Subject: cell: fix/add some fallback blend cases --- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 49 ++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index d252fa6dc18..9404704abf8 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -40,6 +40,24 @@ #define LINEAR_QUAD_LAYOUT 1 +static INLINE vector float +spu_min(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(a, b, m); +} + + +static INLINE vector float +spu_max(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(b, a, m); +} + + /** * Called by rasterizer for each quad after the shader has run. Do * all the per-fragment operations including alpha test, z test, @@ -293,9 +311,9 @@ spu_fallback_fragment_ops(uint x, uint y, */ switch (spu.blend.rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: - term2r = fragR; - term2g = fragG; - term2b = fragB; + term2r = fbRGBA[0]; + term2g = fbRGBA[1]; + term2b = fbRGBA[2]; break; case PIPE_BLENDFACTOR_ZERO: term2r = @@ -361,8 +379,24 @@ spu_fallback_fragment_ops(uint x, uint y, fragG = spu_sub(term1g, term2g); fragB = spu_sub(term1b, term2b); break; + case PIPE_BLEND_REVERSE_SUBTRACT: + fragR = spu_sub(term2r, term1r); + fragG = spu_sub(term2g, term1g); + fragB = spu_sub(term2b, term1b); + break; + case PIPE_BLEND_MIN: + fragR = spu_min(term1r, term2r); + fragG = spu_min(term1g, term2g); + fragB = spu_min(term1b, term2b); + break; + case PIPE_BLEND_MAX: + fragR = spu_max(term1r, term2r); + fragG = spu_max(term1g, term2g); + fragB = spu_max(term1b, term2b); + break; /* XXX more cases */ default: + printf("unsup 0x%x\n", spu.blend.rgb_func); ASSERT(0); } @@ -376,6 +410,15 @@ spu_fallback_fragment_ops(uint x, uint y, case PIPE_BLEND_SUBTRACT: fragA = spu_sub(term1a, term2a); break; + case PIPE_BLEND_REVERSE_SUBTRACT: + fragA = spu_sub(term2a, term1a); + break; + case PIPE_BLEND_MIN: + fragA = spu_min(term1a, term2a); + break; + case PIPE_BLEND_MAX: + fragA = spu_max(term1a, term2a); + break; /* XXX more cases */ default: ASSERT(0); -- cgit v1.2.3 From f60c756ed14f25731ff2a52d6b695ceb5b7a6f6b Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 15 Oct 2008 10:54:06 -0600 Subject: cell: additional debug --- src/gallium/drivers/cell/spu/spu_command.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index c28677ebf87..a07b312111e 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -250,6 +250,7 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos) /* Expand each float to float[4] for SOA execution */ for (i = 0; i < num_const; i++) { + DEBUG_PRINTF(" const[%u] = %f\n", i, constants[i]); spu.constants[i] = spu_splats(constants[i]); } -- cgit v1.2.3 From 9382a7100fd6de6e615dc661ed813bf43e24ec15 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 15 Oct 2008 10:54:36 -0600 Subject: cell: updated vertex dump/debug code --- src/gallium/drivers/cell/spu/spu_tri.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 03f094373df..2417db89604 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -404,11 +404,14 @@ flush_spans(void) static void print_vertex(const struct vertex_header *v) { - int i; - fprintf(stderr, "Vertex: (%p)\n", v); - for (i = 0; i < setup.quad.nr_attrs; i++) { - fprintf(stderr, " %d: %f %f %f %f\n", i, - v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + uint i; + fprintf(stderr, " Vertex: (%p)\n", v); + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + spu_extract(v->data[i], 0), + spu_extract(v->data[i], 1), + spu_extract(v->data[i], 2), + spu_extract(v->data[i], 3)); } } #endif @@ -420,10 +423,12 @@ setup_sort_vertices(const struct vertex_header *v0, const struct vertex_header *v2) { #if DEBUG_VERTS - fprintf(stderr, "Triangle:\n"); - print_vertex(v0); - print_vertex(v1); - print_vertex(v2); + if (spu.init.id==0) { + fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); + } #endif setup.vprovoke = v2; -- cgit v1.2.3 From a7f06dae20c173a0edbb1d310b5f6b06068a61b0 Mon Sep 17 00:00:00 2001 From: Brian Paul <brian.paul@tungstengraphics.com> Date: Wed, 15 Oct 2008 10:37:49 -0600 Subject: gallium: temporariliy revert softpipe shader optimization --- src/gallium/drivers/softpipe/sp_fs_exec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 4fea71c3140..89429c100e1 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -102,7 +102,8 @@ exec_prepare( const struct sp_fragment_shader *base, * Bind tokens/shader to the interpreter's machine state. * Avoid redundant binding. */ - if (spefs->machine_tokens != base->shader.tokens) { + /* XXX revisit this */ + if (1 /* spefs->machine_tokens != base->